diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yaml new file mode 100644 index 0000000..64576e1 --- /dev/null +++ b/.github/workflows/linting.yaml @@ -0,0 +1,105 @@ +name: Run Linting +on: [push, pull_request] + +env: + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NXF_VER: "25.10.2" + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + nf-core-changes: + name: Detect changes + runs-on: ubuntu-latest + + outputs: + # https://github.com/dorny/paths-filter?tab=readme-ov-file#custom-processing-of-changed-files + modules: ${{ steps.filter.outputs.modules }} + modules_files: ${{ steps.module_names.outputs.result }} + subworkflows: ${{ steps.filter.outputs.subworkflows }} + subworkflows_files: ${{ steps.subworkflow_names.outputs.result }} + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4 + id: filter + with: + filters: | + modules: + - added|modified: 'modules/ensembl/**' + subworkflows: + - added|modified: 'subworkflows/ensembl/**' + token: "" + list-files: "json" + + - name: Get module name + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + id: module_names + with: + script: | + return [...new Set(${{ steps.filter.outputs.modules_files }} + .filter(x => x.endsWith('main.nf') || x.endsWith('.nf.test.snap')) + .map(path => path + .replace('modules/ensembl/', '') + .replace(/\/(main\.nf|tests\/.*)$/, '') + ) + )]; + - name: Get subworkflow name + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + id: subworkflow_names + with: + script: | + return [...new Set(${{ steps.filter.outputs.subworkflows_files }} + .filter(x => x.endsWith('main.nf') || x.endsWith('.nf.test.snap')) + .map(path => path + .replace('subworkflows/nf-core/', '') + .replace(/\/(main\.nf|tests\/.*)$/, '') + ) + )]; + + - name: debug + run: | + echo ${{ steps.filter.outputs.modules_files }} + echo ${{ steps.module_names.outputs.result }} + echo ${{ steps.filter.outputs.subworkflows_files }} + echo ${{ steps.subworkflow_names.outputs.result }} + + + nf-core-lint: + name: nf-core lint + runs-on: ubuntu-latest + needs: nf-core-changes + if: needs.nf-core-changes.outputs.modules_files != '[]' + strategy: + fail-fast: false + matrix: + module: ${{ fromJson(needs.nf-core-changes.outputs.modules_files) }} + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-python@v6 + with: + python-version: "3.14" + + - uses: actions/cache@v5 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip + + - uses: nf-core/setup-nextflow@v2 + + - name: Install nf-core tools + run: pip install --upgrade git+https://github.com/nf-core/tools.git@dev + + - name: Lint modules + run: nf-core modules lint ${{ matrix.module }} + + - name: Lint subworkflows + if: needs.nf-core-changes.outputs.subworkflows == 'true' + run: nf-core subworkflows lint --all \ No newline at end of file diff --git a/.github/workflows/nf-test.yaml b/.github/workflows/nf-test.yaml new file mode 100644 index 0000000..d7b570c --- /dev/null +++ b/.github/workflows/nf-test.yaml @@ -0,0 +1,48 @@ +name: NF Tests +on: [push, pull_request] + +env: + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + NFTEST_VER: "0.9.0" +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + java-version: '11' + distribution: 'adopt' + + - name: Setup Nextflow latest + uses: nf-core/setup-nextflow@v2 + with: + version: "latest-stable" + + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFTEST_VER }} + + - name: Setup apptainer + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up Python + uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5 + with: + python-version: "3.11" + + - name: Run Tests + run: nf-test test --ci #this is used to prevent updating the snapshot diff --git a/modules/ensembl/annotation/dumpanno/main.nf b/modules/ensembl/annotation/dumpanno/main.nf index 0c26e02..e26a678 100644 --- a/modules/ensembl/annotation/dumpanno/main.nf +++ b/modules/ensembl/annotation/dumpanno/main.nf @@ -50,7 +50,7 @@ process ANNOTATION_DUMPANNO { stub: version = "0.4" """ - echo "No change, create an empty json file" > functional_annotation.json + touch functional_annotation.json # Get version from genomio please echo -e -n "${task.process}:\n\tensembl-legacy-scripts:e112_APIv0.4 : $version" > versions.yml diff --git a/modules/ensembl/annotation/dumpanno/meta.yml b/modules/ensembl/annotation/dumpanno/meta.yml index e474f32..4656902 100644 --- a/modules/ensembl/annotation/dumpanno/meta.yml +++ b/modules/ensembl/annotation/dumpanno/meta.yml @@ -1,51 +1,47 @@ --- -name: "annotation_dumpanno" - -description: This module is used to dump the functional annotation of a genome from core databases. +name: "database_dbfactory" +description: > + Generate a list of one or more databases plus some associated meta table information. keywords: - container - core_database - docker - - ensembl-legacy-scripts - - fasta - + - database + - ensembl-genomio + tools: - - "annotation": - homepage: "https://github.com/Ensembl/nextflow_modules" - description: "This module handles all annotation-related interactions with the core database, including annotation dumping and processing" - license: ['Apache License version 2.0'] + database: + description: > + Modules which function to read or write information from core databases. + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - # Only when we have meta - - meta: - type: map - description: | - A meta map including 'core database information'. - e.g. [ id:'db_name', server_details:[ host:'db' ] ] - pattern: "[a-z]+_[a-z]_gc[af][0-9]v[0-9]+_core_*" + - name: server + type: map + description: > + Groovy Map containing MySQL server host, port, user, and password meta information, + e.g. `[ host:"mysql-host-test-prod" ]` -output: - - meta: - type: map - description: | - Database containing meta information. - e.g. [ id:'db_name', server_details:[ host:'db' ] ] - pattern: "[a-z]+_[a-z]_gc[af][0-9]v[0-9]+_core_*" + - name: filter_map + type: file + description: "JSON file containing database filter information." + pattern: "*.json" - - functional_annotation.json: - type: map - description: | - Functional annotation file containing biotypes. - e.g [ db:'database name', file("*.json") ] - pattern: ".json" +output: + - name: dbs_meta + type: file + description: "JSON file containing database metadata." + pattern: "dbs.json" - - versions: - type: file - description: File containing software versions. - pattern: "versions.yml" + - name: versions + type: file + description: "File containing information on module software version" + pattern: "versions.yml" authors: - - "@ensembl-dev" + - "ensembl-dev@ebi.ac.uk" maintainers: - - "@ensembl-dev" + - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/database/dbfactory/meta.yml b/modules/ensembl/database/dbfactory/meta.yml index d0cb82f..4656902 100644 --- a/modules/ensembl/database/dbfactory/meta.yml +++ b/modules/ensembl/database/dbfactory/meta.yml @@ -1,6 +1,7 @@ --- name: "database_dbfactory" -description: Generate a list of one or more databases plus some associated meta table information. +description: > + Generate a list of one or more databases plus some associated meta table information. keywords: - container @@ -10,30 +11,35 @@ keywords: - ensembl-genomio tools: - - database: - description: "Modules which function to read or write information from core databases." - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] + database: + description: > + Modules which function to read or write information from core databases. + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - - server: - type: map - description: "Groovy Map containing mysql server host, port, user and password - meta and file of JSON database filter information." - e.g. [[ host:"mysql-host-test-prod" ], [ filter_map:"dbrename_re" ]] - - filtermap: - type: map - description: "Prefix to filter core databases. [default: none]" + - name: server + type: map + description: > + Groovy Map containing MySQL server host, port, user, and password meta information, + e.g. `[ host:"mysql-host-test-prod" ]` + + - name: filter_map + type: file + description: "JSON file containing database filter information." + pattern: "*.json" output: - - dbs_meta_json: - type: file - description: "JSON file containing database metadata." - pattern: "dbs.json" - - versions: - type: file - description: File containing information on module software version - pattern: "versions.yml" + - name: dbs_meta + type: file + description: "JSON file containing database metadata." + pattern: "dbs.json" + + - name: versions + type: file + description: "File containing information on module software version" + pattern: "versions.yml" authors: - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/download/assemblydata/meta.yml b/modules/ensembl/download/assemblydata/meta.yml index 0f93a86..16e2c39 100644 --- a/modules/ensembl/download/assemblydata/meta.yml +++ b/modules/ensembl/download/assemblydata/meta.yml @@ -1,6 +1,7 @@ --- name: "download_assemblydata" -description: Generate a list of one or more databases plus some associated meta table information. +description: > + Generate a list of one or more databases plus some associated meta table information. keywords: - ensembl-genomio @@ -12,35 +13,38 @@ keywords: - INSDC tools: - - download: - description: "Modules which function to download information or various data from externally hosted resources." - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] + download: + description: > + Modules which function to download information or various data from + externally hosted resources. + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing A single INSDC GCA or NCBI 'amended' GCF genome assembly accession meta information. - e.g. `[ accession:'GCA_000000000.1' ]` + - name: meta + type: map + description: > + Groovy Map containing a single INSDC GCA or NCBI 'amended' GCF genome assembly accession meta information, + e.g. `[ accession:'GCA_000000000.1' ]` output: - - min_set: - type: map - description: "Groovy Map including various genome assembly data files (.txt, .fna, .gbff) into compressed Gzip archives." - e.g. [ accession:'GCA_000000000.1', file("*_assembly_report.txt"), file("*_genomic.fna.gz"), file("*_genomic.gbff.gz") ] - pattern: (*.txt, *.fna.gz, *.gbff.gz) - - opt_set: - type: map - description: "Groovy Map including various genome assembly data files (.txt, .faa, .gff) into compressed Gzip archives." - e.g. [ accession:'GCA_000000000.1', file("*_assembly_report.txt"), file("*_genomic.gff.gz"), file("*_genomic.gbff.gz") ] - pattern: (*.txt, *.fna.gz, *.gbff.gz) - - versions: - type: file - description: File containing information on module software version - pattern: "versions.yml" + - name: min_set + type: file + description: "Minimal set of downloaded assembly data files" + pattern: "*" + + - name: opt_set + type: file + description: "Optional set of downloaded assembly data files" + pattern: "*" + + - name: versions + type: file + description: "File containing information on module software version" + pattern: "versions.yml" authors: - "ensembl-dev@ebi.ac.uk" maintainers: - - "ensembl-dev@ebi.ac.uk" \ No newline at end of file + - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/download/genbank/meta.yml b/modules/ensembl/download/genbank/meta.yml index 79b8e31..153b23a 100644 --- a/modules/ensembl/download/genbank/meta.yml +++ b/modules/ensembl/download/genbank/meta.yml @@ -1,6 +1,7 @@ --- name: "download_genbank" -description: Download a single sequence from NCBI GenBank. +description: > + Download a single sequence from NCBI GenBank. keywords: - core_database @@ -11,27 +12,33 @@ keywords: - genomics tools: - - download: - description: "Modules which function to download information or various data from externally hosted resources." - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] + download: + description: > + Modules which function to download information or various data from + externally hosted resources. + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - - accession: - type: map - description: Meta map including sequence accession. Sequence record associated with - NCBI accession (e.g. NC_0-9+, NM_0-9+, NT_0-9+, NP_0-9+) - e.g. [[ accession:'NC_12341']] + - name: meta + type: map + description: "Optional meta information about the sequence to download." output: - - gb_sequence: - type: file - description: A sequence record outfile in genbank format. - pattern: "output.gb" - - versions: - type: file - description: File containing information on module software version - pattern: "versions.yml" + - name: gb_sequence_file + type: file + description: "A sequence record outfile in GenBank format" + pattern: "output.gb" + + - name: gb_sequence_meta + type: map + description: "Metadata associated with the downloaded GenBank sequence" + + - name: versions + type: file + description: "File containing information on module software version" + pattern: "versions.yml" authors: - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/dump/genomestats/meta.yml b/modules/ensembl/dump/genomestats/meta.yml index 65ebf0a..80d5967 100644 --- a/modules/ensembl/dump/genomestats/meta.yml +++ b/modules/ensembl/dump/genomestats/meta.yml @@ -1,6 +1,8 @@ --- name: "dump_genomestats" -description: A module for dumping a core database +description: > + A module for dumping a core database, including assembly and annotation statistics. + keywords: - container - core_database @@ -9,30 +11,38 @@ keywords: - json tools: - - dump: - description: "Modules which function to dump various forms of data from a core database." - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] + dump: + description: > + Modules which function to dump various forms of data from a core database. + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - - db: - type: map - description: "A meta map including 'core database name'." - e.g. [[ db:'genus_species_gca00000000v1_core_110_1', species:'Genus species', ...]] - pattern: "[a-z]+_[a-z]_gc[af][0-9]v[0-9]+_core_*" + - name: db + type: map + description: > + A meta map including 'core database name', + e.g. [[ db:'genus_species_gca00000000v1_core_110_1', species:'Genus species', ...]] + pattern: "[a-z]+_[a-z]_gc[af][0-9]v[0-9]+_core_*" output: - - core_stats: - type: map - description: "JSON file containing core database statistics (assembly and annotation)." - e.g [ db:"database name", file("core_stats.json") ] - pattern: "core_stats.json" - - versions: - type: file - description: File containing information on module software version - pattern: "versions.yml" + - name: core_stats_meta + type: map + description: > + Metadata for the core database statistics JSON, e.g. [ db:"database name", file("core_stats.json") ] + + - name: core_stats_file + type: file + description: "JSON file containing core database statistics (assembly and annotation)" + pattern: "core_stats.json" + + - name: versions + type: file + description: "File containing information on module software version" + pattern: "versions.yml" authors: - "ensembl-dev@ebi.ac.uk" maintainers: - - "ensembl-dev@ebi.ac.uk" \ No newline at end of file + - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/fasta/dumpfastanuc/meta.yml b/modules/ensembl/fasta/dumpfastanuc/meta.yml index 09f29c1..3ceff2b 100644 --- a/modules/ensembl/fasta/dumpfastanuc/meta.yml +++ b/modules/ensembl/fasta/dumpfastanuc/meta.yml @@ -1,6 +1,7 @@ --- name: "fasta_dumpfastanuc" -description: A module for dumping core database gene module nucleotide fasta seqs +description: > + A module for dumping core database gene module nucleotide FASTA sequences. keywords: - container @@ -11,30 +12,39 @@ keywords: - fasta tools: - - "fasta": - description: "Modules which function to facilitate data deposition/retrieval of fasta sequences." - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] + fasta: + description: > + Modules which function to facilitate data deposition and retrieval + of FASTA sequences. + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - - db: - type: map - description: "A meta map including 'core database name'." - e.g. [[ db:'genus_species_gca00000000v1_core_110_1', species:'Genus species', ...]] - pattern: "[a-z]+_[a-z]_gc[af][0-9]v[0-9]+_core_*" + - name: db + type: map + description: > + A meta map including 'core database name', + e.g. [[ db:'genus_species_gca00000000v1_core_110_1', species:'Genus species', ...]] + pattern: "[a-z]+_[a-z]_gc[af][0-9]v[0-9]+_core_*" output: - - nucleotide_fasta: - type: map - description: "Fasta file containing nucleotide sequence(s)." - e.g [ db:'database name', file("*_fasta_dna.fasta") ] - pattern: "*_fasta_dna.fasta" - - versions: - type: file - description: File containing information on module software version - pattern: "versions.yml" + - name: nucleotide_fasta_meta + type: map + description: > + Metadata for nucleotide FASTA sequences, e.g. [ db:'database name', file("*_fasta_dna.fasta") ] + + - name: nucleotide_fasta_file + type: file + description: "Nucleotide FASTA sequences from the database" + pattern: "*_fasta_dna.fasta" + + - name: versions + type: file + description: "File containing information on module software version" + pattern: "versions.yml" authors: - "ensembl-dev@ebi.ac.uk" maintainers: - - "ensembl-dev@ebi.ac.uk" \ No newline at end of file + - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/fasta/dumpfastapeptide/meta.yml b/modules/ensembl/fasta/dumpfastapeptide/meta.yml index a4f81eb..c6feafd 100644 --- a/modules/ensembl/fasta/dumpfastapeptide/meta.yml +++ b/modules/ensembl/fasta/dumpfastapeptide/meta.yml @@ -1,6 +1,7 @@ --- name: "fasta_dumpfastapeptide" -description: A module for dumping peptide FASTA sequences from core databases +description: > + A module for dumping peptide FASTA sequences from core databases. keywords: - container @@ -11,30 +12,39 @@ keywords: - fasta tools: - - "fasta": - description: "Modules which function to facilitate data deposition/retrieval of fasta sequences." - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] + fasta: + description: > + Modules which function to facilitate data deposition and retrieval + of FASTA sequences. + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - - db: - type: map - description: "A meta map including 'core database name'." - e.g. [[ db:'genus_species_gca00000000v1_core_110_1', species:'Genus species', ...]] - pattern: "[a-z]+_[a-z]_gc[af][0-9]v[0-9]+_core_*" + - name: db + type: map + description: > + A meta map including 'core database name', + e.g. [[ db:'genus_species_gca00000000v1_core_110_1', species:'Genus species', ...]] + pattern: "[a-z]+_[a-z]_gc[af][0-9]v[0-9]+_core_*" output: - - peptide_fasta: - type: map - description: "Fasta file containing peptide sequence(s)." - e.g [ db:'database name', file("*_fasta_pep.fasta") ] - pattern: "*_fasta_pep.fasta" - - versions: - type: file - description: File containing information on module software version - pattern: "versions.yml" + - name: peptide_fasta_meta + type: map + description: > + Metadata for peptide FASTA sequences, e.g. [ db:'database name', file("*_fasta_pep.fasta") ] + + - name: peptide_fasta_file + type: file + description: "Peptide FASTA sequences from the database" + pattern: "*_fasta_pep.fasta" + + - name: versions + type: file + description: "File containing information on module software version" + pattern: "versions.yml" authors: - "ensembl-dev@ebi.ac.uk" maintainers: - - "ensembl-dev@ebi.ac.uk" \ No newline at end of file + - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/genbank/extractgb/meta.yml b/modules/ensembl/genbank/extractgb/meta.yml index 9ddcd0f..fd4d090 100644 --- a/modules/ensembl/genbank/extractgb/meta.yml +++ b/modules/ensembl/genbank/extractgb/meta.yml @@ -1,6 +1,8 @@ --- name: "genbank_extractgb" -description: Parse a GenBank file and create cleaned up files and validate generated JSON files. +description: > + Parse a GenBank file and create cleaned up files, validating generated + JSON files. keywords: - container @@ -11,50 +13,54 @@ keywords: - genomics tools: - - "genbank": - description: "Modules with functions related to work with Genbank related data files." - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] + genbank: + description: > + Modules with functions related to working with GenBank data files. + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing genome database meta information - e.g. `[ accession:'NC_XXXX.X' ]` - - gb_file: - type: file - description: | - File path to genbank formatted input file (*.gb) - e.g. `[gb_file("genome.gb")]` + - name: meta + type: map + description: "Groovy Map containing genome database meta information, e.g. `[ accession:'NC_XXXX.X' ]`" + + - name: gb_file + type: file + description: "File path to GenBank formatted input file (*.gb), e.g. `[gb_file('genome.gb')]`" output: - - genome: - type: file - description: Genome metadata (JSON) - pattern: "genome.json" - - seq_regions: - type: file - description: Sequence region(s) meta (JSON) - pattern: "seq_region.json" - - dna_fasta: - type: file - description: Genome nucleotide sequence(s) (FASTA) - pattern: "dna.fasta" - - gene_gff: - type: file - description: Gene model information (GFF3) - pattern: "*.gff" - - pep_fasta: - type: file - description: Genome protein-coding gene amino acid sequence(s) (FASTA) - pattern: "pep.fasta" - - versions: - type: file - description: File containing information on module software version - pattern: "versions.yml" + - name: genome_meta + type: file + description: "Genome metadata (JSON)" + pattern: "genome.json" + + - name: seq_regions_meta + type: file + description: "Sequence region(s) metadata (JSON)" + pattern: "seq_region.json" + + - name: dna_fasta + type: file + description: "Genome nucleotide sequence(s) (FASTA)" + pattern: "dna.fasta" + + - name: gene_gff + type: file + description: "Gene model information (GFF3)" + pattern: "*.gff" + + - name: pep_fasta + type: file + description: "Genome protein-coding gene amino acid sequence(s) (FASTA)" + pattern: "pep.fasta" + + - name: versions + type: file + description: "File containing information on module software version" + pattern: "versions.yml" authors: - "ensembl-dev@ebi.ac.uk" maintainers: - - "ensembl-dev@ebi.ac.uk" \ No newline at end of file + - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/gff3/dumpgff3/meta.yml b/modules/ensembl/gff3/dumpgff3/meta.yml index 1d27a3c..2aa4365 100644 --- a/modules/ensembl/gff3/dumpgff3/meta.yml +++ b/modules/ensembl/gff3/dumpgff3/meta.yml @@ -1,6 +1,8 @@ --- name: "gff3_dumpgff3" -description: write your description here +description: > + Dump GFF3 gene models from a core Ensembl database into GFF3 files. + (Add more details as needed.) keywords: - container @@ -10,30 +12,40 @@ keywords: - gff3 tools: - - "gff3": - description: "Modules for GFF3 validation, data deposition and retrieval from/into GFF3 file format" - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] + gff3: + description: > + Modules for GFF3 validation, data deposition, and retrieval from/into + GFF3 file format. + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - - db: - type: map - description: "A meta map including 'core database name'." - e.g. [[ db:'genus_species_gca00000000v1_core_110_1', species:'Genus species', ...]] - pattern: "[a-z]+_[a-z]_gc[af][0-9]v[0-9]+_core_*" + - name: db + type: map + description: > + A meta map including 'core database name', e.g. + [[ db:'genus_species_gca00000000v1_core_110_1', species:'Genus species', ...]] + pattern: "[a-z]+_[a-z]_gc[af][0-9]v[0-9]+_core_*" output: - - gff3: - type: map - description: "GFF3 file containing gene model(s)." - e.g [ db:'database name', file("*.gff3") ] - pattern: "*.gff3" - - versions: - type: file - description: File containing information on module software version - pattern: "versions.yml" + - name: gff3_meta + type: map + description: > + GFF3 file metadata including database information, e.g. + [ db:'database name', file("*.gff3") ] + + - name: gff3_file + type: file + description: "GFF3 file containing gene model(s)." + pattern: "*.gff3" + + - name: versions + type: file + description: "File containing information on module software version." + pattern: "versions.yml" authors: - "ensembl-dev@ebi.ac.uk" maintainers: - - "ensembl-dev@ebi.ac.uk" \ No newline at end of file + - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/gff3/validation/meta.yml b/modules/ensembl/gff3/validation/meta.yml index 344861b..e7d8784 100644 --- a/modules/ensembl/gff3/validation/meta.yml +++ b/modules/ensembl/gff3/validation/meta.yml @@ -1,6 +1,7 @@ --- name: "gff3_validation" -description: Check for proper validation of the GFF3 using GenomeTools. +description: > + Check for proper validation of GFF3 files using GenomeTools. keywords: - ensembl @@ -11,37 +12,38 @@ keywords: - validation tools: - - "gff3": - description: "Modules for GFF3 validation, data deposition and retrieval from/into GFF3 file format" - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] + gff3: + description: > + Modules for GFF3 validation, data deposition, and retrieval from/into + GFF3 file format. + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing meta information - e.g. `[ id:'accession1' ]` - - - gene_models: - type: file - description: GFF3 files - pattern: "*.{gff3}" + - name: meta + type: map + description: "Groovy Map containing meta information, e.g. `[ id:'accession1' ]`" + + - name: gene_models + type: file + description: "GFF3 files" + pattern: "*.{gff3}" output: - - meta: - type: map - description: | - Groovy Map containing meta information - e.g. `[ id:'accession1' ]` - - gene_models: - type: file - description: All files used as entry - pattern: "*.{gff3}" - - versions: - type: file - description: File containing information on module software version - pattern: "versions.yml" + - name: gene_models_meta + type: map + description: "Groovy Map containing meta information, e.g. `[ id:'accession1' ]`" + + - name: gene_models_file + type: file + description: "All GFF3 files used as input" + pattern: "*.{gff3}" + + - name: versions + type: file + description: "File containing information on module software version" + pattern: "versions.yml" authors: - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/manifest/integrity/meta.yml b/modules/ensembl/manifest/integrity/meta.yml index 44b8ec6..d18bfcf 100644 --- a/modules/ensembl/manifest/integrity/meta.yml +++ b/modules/ensembl/manifest/integrity/meta.yml @@ -1,6 +1,7 @@ --- name: "manifest_integrity" -description: Check the data integrity of a set of genomic files, listed by their manifest. +description: > + Check the data integrity of a set of genomic files, listed by their manifest. keywords: - ensembl - genomics @@ -9,47 +10,46 @@ keywords: - validation tools: - - "manifest": - description: "Nextflow modules for genomic data file manifest comparison/checking" - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] + manifest: + description: "Nextflow modules for genomic data file manifest comparison/checking" + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing meta information - e.g. `[ id:'accession1' ]` - - manifest_files: - type: file - description: Set of genomic data files - pattern: "*.{gff3,fasta,json}" + - name: meta + type: map + description: "Groovy Map containing meta information, e.g. `[ id:'accession1' ]`" + + - name: manifest_files + type: file + description: "Set of genomic data files" + pattern: "*.{gff3,fasta,json}" output: - - meta: - type: map - description: | - Groovy Map containing meta information - e.g. `[ id:'accession1' ]` - - - all_files: - type: file - description: All files used as entry. - pattern: "*.{gff3,fasta,json}" - - - integrity_file: - type: file - description: Captures potential errors detected during the integrity check of a manifest file (manifest.json). - pattern: "integrity.out" - - - error_log: - type: file - description: Integrity error log if it failed - pattern: "integrity.out" - - versions: - type: file - description: File containing information on module software version - pattern: "versions.yml" + - name: all_files_meta + type: map + description: "Groovy Map containing meta information, e.g. `[ id:'accession1' ]`" + + - name: all_files + type: file + description: "All files used as entry" + pattern: "*.{gff3,fasta,json}" + includeInputs: true + + - name: error_log_meta + type: map + description: "Groovy Map containing meta information, e.g. `[ id:'accession1' ]`" + + - name: integrity_file + type: file + description: "Captures potential errors detected during integrity check" + pattern: "integrity.out" + + - name: versions + type: file + description: "File containing module software versions" + pattern: "versions.yml" authors: - "@ensembl-dev" diff --git a/modules/ensembl/manifest/maker/meta.yml b/modules/ensembl/manifest/maker/meta.yml index 0f56d39..1c3dece 100644 --- a/modules/ensembl/manifest/maker/meta.yml +++ b/modules/ensembl/manifest/maker/meta.yml @@ -1,50 +1,38 @@ --- name: "manifest_maker" -description: Generate a manifest and validate its JSON schema using provided input files and metadata. +description: > + Generate a manifest and validate its JSON schema using provided input + files and metadata. keywords: - ensembl - genomics - genomio - integrity - validation + tools: - - "manifest_maker": - description: "Nextflow modules for genomic data file manifest comparison/checking." - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] - + manifest_maker: + description: "Nextflow modules for genomic data file manifest comparison/checking." + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" + input: - - meta: - type: map - description: | - Groovy Map containing meta information - e.g. `[ accession:'accession01' ]` - - manifest_files: - type: file - description: Set of genomic data files - pattern: "*.{gff3,fasta,json}" + - name: meta + type: map + description: > + Groovy Map containing meta information, e.g. `[ accession:'accession01' ]` + - name: file_name + type: file + description: "Input file to validate." output: - - meta: - type: map - description: | - Groovy Map containing meta information - e.g. `[ accession:'accession1' ]` - - - all_files: - type: file - description: All files used as entry - pattern: "*.{gff3,fasta,json}" - - error_log: - type: file - description: Integrity error log if it failed - pattern: "integrity.out" - - versions: - type: file - description: File containing information on module software version - pattern: "versions.yml" + - name: versions + type: file + description: "File containing information on module software version." + pattern: "versions.yml" authors: - "ensembl-dev@ebi.ac.uk" maintainers: - - "ensembl-dev@ebi.ac.uk" \ No newline at end of file + - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/metadata/amendgenome/meta.yml b/modules/ensembl/metadata/amendgenome/meta.yml index 7864827..fe59eb0 100644 --- a/modules/ensembl/metadata/amendgenome/meta.yml +++ b/modules/ensembl/metadata/amendgenome/meta.yml @@ -1,57 +1,60 @@ --- name: "metadata_amendgenome" -description: Extends genome metadata using input files and validates the output against a genome schema. +description: > + Extends genome metadata using input files and validates the output against + a genome schema. keywords: - amend - genome - - metada + - metadata - validation tools: - - "metadata_amendgenome": - description: "This module processes genome metadata by incorporating additional data from assembly reports and GenBank files, ensuring compliance with the genome schema" - homepage: "https://github.com/Ensembl/nextflow_modules" - licence: ['Apache License version 2.0'] + metadata_amendgenome: + description: > + This module processes genome metadata by incorporating additional + data from assembly reports and GenBank files, ensuring compliance with the + genome schema + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] + identifier: "" input: - - meta: - type: map - description: - Groovy Map containing meta information - e.g. `[ accession:'accession01' ] - - genome_json: - type: file - description: "JSON file containing genome metadata." - pattern: "*.json" - - asm_report: - type: file - description: "Assembly report file with genomic metadata." - pattern: "*.txt" - - genomic_fna: - type: file - description: "FASTA file containing genomic sequences." - pattern: "*.{fna,fasta,fa}" - - - genbank_gbff: - type: file - description: "GenBank file containing genome annotations." - pattern: "*.{gbff,gb}" - + - name: meta + type: map + description: > + Groovy Map containing meta information e.g. `[ accession:'accession01' ]` + - name: genome_json + type: file + description: "JSON file containing genome metadata." + pattern: "*.json" + - name: asm_report + type: file + description: "Assembly report file with genomic metadata." + pattern: "*.txt" + - name: genomic_fna + type: file + description: "FASTA file containing genomic sequences." + pattern: "*.{fna,fasta,fa}" + - name: genbank_gbff + type: file + description: "GenBank file containing genome annotations." + pattern: "*.{gbff,gb}" + output: - - meta: - type: map - description: - Groovy Map containing meta information - e.g. `[ accession:'accession01' ] - - amended_json: - description: "JSON file containing genome metadata." - pattern: "*.json" - - versions: - type: file - description: "File containing information on module software version." - pattern: "versions.yml" - - + - name: amended_json + type: map + description: > + Groovy Map containing meta information e.g. `[ accession:'accession01' ]` + - name: amended_json_file + type: file + description: "JSON file containing genome metadata." + pattern: "*.json" + - name: versions + type: file + description: "File containing information on module software version." + pattern: "versions.yml" + authors: - "@ensembl-dev" maintainers: diff --git a/modules/ensembl/metadata/genomesummary/meta.yml b/modules/ensembl/metadata/genomesummary/meta.yml index f390100..c2b5b5f 100644 --- a/modules/ensembl/metadata/genomesummary/meta.yml +++ b/modules/ensembl/metadata/genomesummary/meta.yml @@ -1,7 +1,6 @@ --- name: "metadata_genomesummary" -description: Extract genome meta info from am NCBI datasets-cli 'genome summary' JSON. - A structured query JSON is also input and used to extract the key meta from the genome summary JSON. +description: Extract genome meta info from an NCBI datasets-cli 'genome summary' JSON. keywords: - container - docker @@ -9,48 +8,47 @@ keywords: - ensembl-genomio - genome - metadata + tools: - - "metadata": - description: Modules which function to generate,fetch or otherwise work \ - with metadata curation or preparation. - homepage: "https://github.com/Ensembl/nextflow_modules" - license: ['Apache License version 2.0'] + metadata: + description: Modules which function to generate, fetch or otherwise work \ + with metadata curation or preparation. + homepage: "https://github.com/Ensembl/nextflow_modules" + license: ["Apache License version 2.0"] input: - - accession: - type: map - description: Meta map including sequence accession. Sequence record associated with - NCBI accession (e.g. NC_0-9+, NM_0-9+, NT_0-9+, NP_0-9+) - e.g. [[ accession:'NC_12341']] - - input_query: - type: file - description: Input query to parse an NCBI genome summary JSON - pattern: "*-query.json" - - genome_summary_json: - type: file - description: A genome summary JSON file downloaded via datasets-cli (NCBI) \ - containing 1> genome summaries '{reports: {}}' - pattern: "*.json" + - name: accession + type: map + description: Meta map including sequence accession. + + - name: input_query + type: file + description: Input query to parse an NCBI genome summary JSON + pattern: "*-query.json" + + - name: genome_summary_json + type: file + description: | + A genome summary JSON file downloaded via datasets-cli (NCBI) + pattern: "*.json" output: - - meta: - type: map - description: | - Meta map including sequence accession. Sequence record associated with - NCBI accession (e.g. NC_0-9+, NM_0-9+, NT_0-9+, NP_0-9+) - e.g. [[ accession:'NC_12341']] - - meta_json: - - "genome-meta.json": - type: file - description: Genome metadata queried from complete NCBI datasets genome summary JSON. - pattern: "*-meta.json" - - versions: - - "versions.yml": - type: file - description: File containing software versions - pattern: "versions.yml" + - name: meta + type: map + description: Meta map including sequence accession. + + - name: meta_json + type: file + description: Genome metadata queried from JSON. + pattern: "*-meta.json" + + - name: versions + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "ensembl-dev@ebi.ac.uk" + maintainers: - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/schema/json/meta.yml b/modules/ensembl/schema/json/meta.yml new file mode 100644 index 0000000..e8d09ec --- /dev/null +++ b/modules/ensembl/schema/json/meta.yml @@ -0,0 +1,48 @@ +--- +name: "schema_json" +description: > + Validate the schema for JSON files. + +keywords: + - validate + - JSON + - schema + +tools: + Schema: + description: "Nextflow modules for Ensembl pipelines" + homepage: "https://github.com/Ensembl/nextflow_modules" + license: "Apache License 2.0" + identifier: "" + +input: + - name: meta + type: map + description: > + Groovy Map containing sample information, e.g. `[ id:'sample1', single_end:false ]` + + - name: json + type: file + description: JSON file to validate + pattern: "*.{json}" + +output: + - name: meta + type: map + description: > + Groovy Map containing sample information, e.g. `[ id:'sample1', single_end:false ]` + + - name: verified_json + type: file + description: Verified JSON file + pattern: "*.{json}" + + - name: versions + type: file + description: File containing ensembl-genomio versions + pattern: "versions.yml" + +authors: + - "ensembl-dev@ebi.ac.uk" +maintainers: + - "ensembl-dev@ebi.ac.uk"