VEuPathDB · kathryncrouch · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 24, 2026
diff --git a/Model/lib/wdk/model/questions/geneQuestions.xml b/Model/lib/wdk/model/questions/geneQuestions.xml
@@ -931,6 +931,84 @@ In the analysis carried out by Alsford et al., pseudogenes, genes annotated as "
     </question>
 
 
+  <!--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-->
+  <!-- Genes By Secondary Metabolites (antiSMASH) -->
+  <!--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-->
+
+    <question name="GenesBySecondaryMetabolites"
+              includeProjects="FungiDB,UniDB"
+              displayName="Secondary Metabolites"
+              shortDisplayName="Sec Metabolites"
+              searchCategory="Function prediction"
+              queryRef="GeneId.GenesBySecondaryMetabolites"
+              recordClassRef="TranscriptRecordClasses.TranscriptRecordClass">
+
+      <paramRef ref="organismParams.antismash_organism"/>
+      <paramRef ref="geneParams.antismash_category"/>
+      <paramRef ref="geneParams.antismash_annotation"/>
+
+      <attributesList
+        summary="category,antismash_annotation,cluster_location,overlapping_clusters"
+        sorting="gene_source_id asc"/>
+
+      <dynamicAttributes>
+        <columnAttribute name="category" displayName="Cluster Category" help="The category indicates the type of molecule this cluster is making based on the biochemical machinery it uses"/>
+        <columnAttribute name="antismash_annotation" displayName="Annotation" help="The annotation indicates the function of this gene within this biosynthetic cluster"/>
+        <columnAttribute name="cluster_start" displayName="Cluster Start"/>
+        <columnAttribute name="cluster_end" displayName="Cluster End"/>
+        <columnAttribute name="org_abbrev" displayName="Organism Abbreviation" internal="true" inReportMaker="false"/>
+        <columnAttribute name="cluster_context_start" displayName="Cluster Context Start" inReportMaker="false"/>
+        <columnAttribute name="cluster_context_end" displayName="Cluster Context End" inReportMaker="false"/>
+        <columnAttribute name="overlapping_clusters" displayName="View Overlapping Clusters" help="Where a gene may function in more than one cluster, the longest is shown by default. Other clusters where this gene may function are shown here."/>
+        <textAttribute name="cluster_location" displayName="View Cluster in JBrowse" inReportMaker="false" truncateTo="100000">
+          <text>
+            <![CDATA[
+              <a href="@JBROWSE_WEBPAGE_URL@?loc=$$sequence_id$$:$$cluster_context_start$$..$$cluster_context_end$$&data=@JBROWSE_SERVICE_URL@/tracks/$$org_abbrev$$&tracks=gene%2Cantibiotics%20and%20Secondary%20Metabolites%20Analysis%20SHell%20(antiSMASH)&highlight=$$sequence_id$$:$$gene_start_min$$..$$gene_end_max$$">$$sequence_id$$:$$cluster_start$$-$$cluster_end$$</a>
+            ]]>
+          </text>
+        </textAttribute>
+      </dynamicAttributes>
+
+      <summary><![CDATA[Find genes in secondary metabolite biosynthetic clusters predicted using antiSMASH.]]></summary>
+
+      <description><![CDATA[
+        Find genes associated with secondary metabolite biosynthetic clusters predicted using antiSMASH.<br><br>
+
+        In addition to primary metabolites essential for growth and survival, fungi and some other microorganisms produce secondary metabolites. These often provide competitive advantages for the microorganism in its environment. They are also of interest as an important source of natural products. These compounds are typically encoded by co-located and co-expressed groups of genes that function together to build, modify, and export the final molecule. This co-located group is called a biosynthetic gene cluster (BGC). Well-known examples of secondary metabolites include antibiotics like penicillin and erythromycin, antifungals, and immunosuppressants like rapamycin.<br><br>
+
+        antiSMASH is a bioinformatics tool to identify and annotate biosynthetic gene clusters. It works by searching for signature biosynthetic genes whose sequences are well conserved across known clusters. When it detects one of these signature genes, it defines a genomic region around it, predicts the cluster boundaries, and annotates every gene within that region with a predicted function. It then compares the identified cluster against a database of known BGCs to predict what compound the cluster might produce.<br><br>
+
+    <strong>Cluster Category</strong><br>
+    The cluster category groups biosynthetic gene clusters (BGCs) into broad classes based on the type of natural product they produce. This is predicted based on the biochemical machinery represented in the cluster.<br><br>
+
+    Common categories you are likely to see include:<br>
+<ul>
+    <li><strong>PKS (Polyketide synthases):</strong> Clusters that synthesize secondary metabolites comprising complex chains of alternating ketone and methylene groups. These molecules often function as antibiotics, antifungals, or anticancer compounds. Examples include erythromycin and rapamycin.</li>
+    <li><strong>NRPS (Nonribosomal peptide synthases):</strong> Clusters that synthesize secondary metabolites comprising amino acids, including non-proteinogenic amino acids, polymerized without using ribosomes. These molecules often function as antibiotics or siderophores. Examples include penicillin and vancomycin.</li>
+    <li><strong>Terpenes:</strong> Clusters that make terpenoids, a huge and diverse class of compounds derived from isoprene units, including sterols, pigments, and volatile compounds.</li>
+    <li><strong>RiPP (Ribosomally synthesised and post-translationally modified peptides):</strong> These clusters create compounds from small peptides that are initially made by the ribosome and then heavily modified. Examples include lanthipeptides and bacteriocins.</li>
+    <li><strong>Other:</strong> Clusters whose products cannot be categorized or that use a combination of structures from multiple categories.</li>
+</ul><br><br>
+
+    <strong>Annotation</strong><br>
+    The annotation describes what a specific gene or protein within the cluster is predicted to do based on sequence similarity to known proteins.<br><br>
+
+    Common annotations you are likely to see include:<br>
+<ul>
+    <li><strong>Biosynthetic:</strong> These are the signature biosynthetic genes that define a cluster and predict its function. These may be referred to as core or backbone genes.</li>
+    <li><strong>Biosynthetic additional:</strong> These are additional biosynthetic genes that further modify the primary product of a cluster (tailoring or decorating enzymes), or aid production of the product by supplying cofactors or substrates.</li>
+    <li><strong>Regulatory:</strong> These are transcription factors or other regulatory elements that control expression of this cluster.</li>
+    <li><strong>Resistance:</strong> These genes protect the organism from its own toxic products. Examples include efflux pumps or enzymes that modify the product futher to protect the host.</li>
+    <li><strong>Transport:</strong> These genes transport the product to its final location.</li>
+    <li><strong>Other:</strong> This label is applied by antiSMASH when a gene has similarity to genes that appear in other BCGs but where the role of the gene within the BCG is not understood.</li>
+    <li><strong>Unknown:</strong> This category is used for genes with no detectable similarity to antiSMASH models, and which have therefore not been annotated by antiSMASH.</li>
+</ul><br><br>
+
+Read more about antismash <a href=https://antismash.secondarymetabolites.org/#!/about>here</a><br><br>
+      ]]></description>
+
+    </question>
+
 
   <!--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-->
   <!-- functional profiling growth rate Phenotype -->

diff --git a/Model/lib/wdk/model/questions/params/geneParams.xml b/Model/lib/wdk/model/questions/params/geneParams.xml
@@ -5104,6 +5104,29 @@ products of your selected type (or types).<br><br>
       <suggest default="1"/>
     </stringParam>
 
+    <!-- antiSMASH secondary metabolite params (FungiDB, UniDB only) -->
+    <flatVocabParam name="antismash_category"
+                    queryRef="GeneVQ.AntismashCategories"
+                    prompt="Cluster Category"
+                    multiPick="true"
+                    quote="true"
+                    dependedParamRef="organismParams.antismash_organism"
+                    includeProjects="FungiDB,UniDB">
+      <help>The cluster category describes what kind of molecule the cluster is likely to make based on the biochemical machinery represnted in the cluster. Select one or more secondary metabolite cluster categories to explore.</help>
+      <suggest default="NRPS"/>
+    </flatVocabParam>
+
+    <flatVocabParam name="antismash_annotation"
+                    queryRef="GeneVQ.AntismashAnnotations"
+                    prompt="Annotation"
+                    multiPick="true"
+                    quote="true"
+                    dependedParamRef="geneParams.antismash_category, organismParams.antismash_organism"
+                    includeProjects="FungiDB,UniDB">
+      <help>The annotation describes the function of a specific gene within a biosynthetic cluster. Select one or more annotations to explore.</help>
+      <suggest default="biosynthetic"/>
+    </flatVocabParam>
+
     <stringParam name="genbank_accession"
                  prompt="GenBank Accession Number"
                  number="false">
@@ -9729,6 +9752,62 @@ end as term
       </sql>
     </sqlQuery>
 
+    <sqlQuery name="AntismashCategories" includeProjects="FungiDB,UniDB">
+      <paramRef ref="organismParams.antismash_organism"/>
+      <column name="display"/>
+      <column name="internal"/>
+      <column name="term"/>
+      <sql>
+        <![CDATA[
+        SELECT DISTINCT ac.category AS  display
+        , ac.category AS  internal
+        , ac.category AS  term
+        , CASE WHEN lower(ac.category) = 'other' THEN 1 ELSE 0 END AS sort_order
+        FROM apidb.antismashfeature af
+        JOIN apidb.antismashclusterfeature acf
+            ON acf.antismash_feature_id = af.antismash_feature_id
+        JOIN apidb.antismashcluster ac
+            ON ac.antismash_cluster_id = acf.antismash_cluster_id
+        JOIN apidbtuning.transcriptattributes ta
+            ON ta.gene_na_feature_id = af.na_feature_id
+        WHERE ta.org_abbrev in ($$antismash_organism$$)
+        AND ta.project_id = 'FungiDB'
+        ORDER BY sort_order, display
+        ]]>
+      </sql>
+    </sqlQuery>
+
+    <sqlQuery name="AntismashAnnotations" includeProjects="FungiDB,UniDB">
+      <paramRef ref="organismParams.antismash_organism"/>
+      <paramRef ref="geneParams.antismash_category"/>
+      <column name="display"/>
+      <column name="internal"/>
+      <column name="term"/>
+      <sql>
+        <![CDATA[
+        SELECT DISTINCT COALESCE (af.antismash_annotation, 'unknown') AS display
+        , COALESCE (af.antismash_annotation, 'unknown') AS internal
+        , CASE WHEN COALESCE(af.antismash_annotation, 'unknown') = 'biosynthetic'         THEN 0
+            WHEN COALESCE(af.antismash_annotation, 'unknown') = 'biosynthetic-additional' THEN 1
+            WHEN COALESCE(af.antismash_annotation, 'unknown') = 'other'                   THEN 3
+            WHEN COALESCE(af.antismash_annotation, 'unknown') = 'unknown'                 THEN 4
+            ELSE 2 END AS sort_order
+        , COALESCE (af.antismash_annotation, 'unknown') AS term
+        FROM apidb.antismashfeature af
+        JOIN apidb.antismashclusterfeature acf
+            ON acf.antismash_feature_id = af.antismash_feature_id
+        JOIN apidb.antismashcluster ac
+            ON ac.antismash_cluster_id = acf.antismash_cluster_id
+        JOIN apidbtuning.transcriptattributes ta
+            ON ta.gene_na_feature_id = af.na_feature_id
+        WHERE ta.org_abbrev in ($$antismash_organism$$)
+        AND ta.project_id = 'FungiDB'
+        AND ac.category in ($$antismash_category$$)
+        ORDER BY sort_order, display
+        ]]>
+      </sql>
+    </sqlQuery>
+
   </querySet>
 
   <groupSet name="geneParamGroupSet">

diff --git a/Model/lib/wdk/model/questions/params/organismParams.xml b/Model/lib/wdk/model/questions/params/organismParams.xml
@@ -438,6 +438,21 @@
       </enumList>
     </enumParam>
 
+    <flatVocabParam name="antismash_organism"
+                    queryRef="organismVQ.AntismashOrganisms"
+                    prompt="Organism"
+                    displayType="treeBox"
+                    multiPick="true"
+                    suppressNode="true"
+                    quote="true"
+                    includeProjects="FungiDB,UniDB">
+      <help>Select the organism(s) to search.</help>
+      <propertyList name="organismProperties">
+        <value>pruneNodesWithSingleExtendingChild</value>
+        <value>showOnlyPreferredOrganisms</value>
+      </propertyList>
+    </flatVocabParam>
+
   </paramSet>
 
 
@@ -1170,6 +1185,31 @@
       </sql>
     </sqlQuery>
 
+    <sqlQuery name="AntismashOrganisms" includeProjects="FungiDB,UniDB">
+      <column name="parentTerm"/>
+      <column name="internal"/>
+      <column name="term"/>
+      <sql>
+        <![CDATA[
+        WITH filter_query AS (
+            SELECT DISTINCT ga.organism, ga.org_abbrev
+            FROM apidbtuning.geneattributes ga
+            , apidb.antismashfeature af
+            WHERE ga.na_feature_id = af.na_feature_id
+            AND (ga.project_id = '@PROJECT_ID@' OR 'UniDB' = '@PROJECT_ID@')
+        )
+        SELECT DISTINCT term
+        , parentTerm
+        , string_agg(org_abbrev, ', ') AS internal
+        FROM apidbtuning.organismtree ot
+        , filter_query fq
+        WHERE ot.organism = fq.organism
+        GROUP BY term, parentTerm
+        ORDER BY parentTerm, term
+        ]]>
+      </sql>
+    </sqlQuery>
+
   </querySet>
 
 </wdkModel>
diff --git a/Model/lib/wdk/model/questions/queries/geneQueries.xml b/Model/lib/wdk/model/questions/queries/geneQueries.xml
@@ -5773,6 +5773,85 @@ select distinct ta.gene_source_id
       </sql>
     </sqlQuery>
 
+    <sqlQuery name="GenesBySecondaryMetabolites" includeProjects="FungiDB,UniDB">
+      <paramRef ref="organismParams.antismash_organism"/>
+      <paramRef ref="geneParams.antismash_category"/>
+      <paramRef ref="geneParams.antismash_annotation"/>
+      <column name="source_id"/>
+      <column name="gene_source_id"/>
+      <column name="project_id"/>
+      <column name="wdk_weight"/>
+      <column name="matched_result"/>
+      <column name="category"/>
+      <column name="antismash_annotation"/>
+      <column name="cluster_start"/>
+      <column name="cluster_end"/>
+      <column name="sequence_id"/>
+      <column name="org_abbrev"/>
+      <column name="cluster_context_start"/>
+      <column name="cluster_context_end"/>
+      <column name="overlapping_clusters"/>
+      <sql>
+        <![CDATA[
+        WITH gene_clusters AS (
+          SELECT DISTINCT
+            ta.source_id, ta.gene_source_id, ta.project_id, ta.sequence_id,
+            ta.org_abbrev,
+            ac.antismash_cluster_id, ac.category, af.antismash_annotation,
+            ac.cluster_start, ac.cluster_end,
+            (ac.cluster_end - ac.cluster_start) AS cluster_length
+          FROM apidb.antismashcluster ac
+          JOIN apidb.antismashclusterfeature acf ON acf.antismash_cluster_id = ac.antismash_cluster_id
+          JOIN apidb.antismashfeature af ON af.antismash_feature_id = acf.antismash_feature_id
+          JOIN apidbtuning.transcriptattributes ta ON ta.gene_na_feature_id = af.na_feature_id
+          WHERE ta.project_id = 'FungiDB'
+          AND ta.org_abbrev IN ($$antismash_organism$$)
+          AND ac.category IN ($$antismash_category$$)
+          AND (af.antismash_annotation IN ($$antismash_annotation$$)
+              OR ('unknown' IN ($$antismash_annotation$$) AND af.antismash_annotation IS NULL)
+          )
+        ),
+        ranked AS (
+          SELECT gc.*,
+            ROW_NUMBER() OVER (
+              PARTITION BY gc.source_id
+              ORDER BY gc.cluster_length DESC, gc.antismash_cluster_id
+            ) AS rn
+          FROM gene_clusters gc
+        ),
+        gene_overlaps AS (
+          SELECT r.source_id,
+            STRING_AGG(
+              CONCAT(
+                '<a href="@JBROWSE_WEBPAGE_URL@?loc=', gc2.sequence_id, ':',
+                GREATEST(gc2.cluster_start - 500, 1), '..', gc2.cluster_end + 500,
+                '&data=@JBROWSE_SERVICE_URL@/tracks/', gc2.org_abbrev,
+                '&tracks=gene%2Cantibiotics%20and%20Secondary%20Metabolites%20Analysis%20SHell%20(antiSMASH)">',
+                gc2.sequence_id, ':', gc2.cluster_start, '-', gc2.cluster_end, '</a>'
+              ),
+              ', ' ORDER BY gc2.cluster_start
+            ) AS overlapping_clusters
+          FROM ranked r
+          JOIN gene_clusters gc2
+            ON gc2.source_id = r.source_id
+            AND gc2.antismash_cluster_id != r.antismash_cluster_id
+          WHERE r.rn = 1
+          GROUP BY r.source_id
+        )
+        SELECT r.source_id, r.gene_source_id, r.project_id,
+          10 AS wdk_weight, 'Y' AS matched_result,
+          r.category, r.antismash_annotation, r.cluster_start, r.cluster_end, r.sequence_id,
+          r.org_abbrev,
+          GREATEST(r.cluster_start - 1000, 1) AS cluster_context_start,
+          r.cluster_end + 1000 AS cluster_context_end,
+          COALESCE(go.overlapping_clusters, 'No') AS overlapping_clusters
+        FROM ranked r
+        LEFT JOIN gene_overlaps go ON go.source_id = r.source_id
+        WHERE r.rn = 1
+        ]]>
+      </sql>
+    </sqlQuery>
+
 
 
   </querySet>

diff --git a/Model/lib/wdk/model/records/geneRecord.xml b/Model/lib/wdk/model/records/geneRecord.xml
@@ -3174,6 +3174,18 @@ name" internal="true"/>
             <columnAttribute displayName="Dataset" name="dataset"/>
           </table>
 
+         <table name="SecondaryMetaboliteClusters"
+                displayName="Secondary Metabolite Clusters (antiSMASH)"
+                inReportMaker="false"
+                includeProjects="FungiDB,UniDB"
+                queryRef="GeneTables.SecondaryMetaboliteClusters">
+            <columnAttribute name="sequence_id" displayName="Sequence"/>
+            <columnAttribute name="category" displayName="Cluster Type"/>
+            <columnAttribute name="cluster_start" displayName="Cluster Start"/>
+            <columnAttribute name="cluster_end" displayName="Cluster End"/>
+            <columnAttribute name="antismash_annotation" displayName="antiSMASH Annotation"/>
+         </table>
+
 
 
 

diff --git a/Model/lib/wdk/model/records/geneTableQueries.xml b/Model/lib/wdk/model/records/geneTableQueries.xml
@@ -4849,5 +4849,37 @@ FROM webready.GeneAttributes_p ga, (
       </sql>
     </sqlQuery>
 
+    <sqlQuery name="SecondaryMetaboliteClusters" includeProjects="FungiDB,UniDB" isCacheable="false">
+      <column name="source_id"/>
+      <column name="project_id"/>
+      <column name="sequence_id"/>
+      <column name="category"/>
+      <column name="cluster_start"/>
+      <column name="cluster_end"/>
+      <column name="antismash_annotation"/>
+      <sql>
+        <![CDATA[
+          select
+            ta.gene_source_id as source_id
+            , ta.project_id
+            , ta.sequence_id
+            , ac.category
+            , ac.cluster_start
+            , ac.cluster_end
+            , af.antismash_annotation
+          from
+            apidb.antismashcluster ac
+            join apidb.antismashclusterfeature acf
+              on acf.antismash_cluster_id = ac.antismash_cluster_id
+            join apidb.antismashfeature af
+              on af.antismash_feature_id = acf.antismash_feature_id
+            join apidbtuning.transcriptattributes ta
+              on af.na_feature_id = ta.gene_na_feature_id
+          where ta.project_id = 'FungiDB'
+            and ta.org_abbrev IN (%%PARTITION_KEYS%%)
+        ]]>
+      </sql>
+    </sqlQuery>
+
   </querySet>
 </wdkModel>