From d830e164c56ce6093f57aaba35ffb962f08da244 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 30 Jan 2026 14:18:42 +0000
Subject: [PATCH 01/36] Add python script for splitting FASTA, chunking if
 necessary

---
 .../ensembl/fasta/splitfasta/split_fasta.py   | 462 ++++++++++++++++++
 1 file changed, 462 insertions(+)
 create mode 100644 modules/ensembl/fasta/splitfasta/split_fasta.py

diff --git a/modules/ensembl/fasta/splitfasta/split_fasta.py b/modules/ensembl/fasta/splitfasta/split_fasta.py
new file mode 100644
index 0000000..164ec44
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/split_fasta.py
@@ -0,0 +1,462 @@
+#!/usr/bin/env python3
+
+"""Split a FASTA file (possibly gzipped) into multiple smaller FASTA files."""
+
+import inspect
+import logging
+import shutil
+from pathlib import Path
+from typing import Optional, List, Set, Tuple
+
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+
+try:
+    from ensembl.utils.archive import open_gz_file  # type: ignore
+except ImportError:
+    import gzip
+
+    def open_gz_file(path):
+        p = str(path)
+        return gzip.open(p, "rt") if p.endswith(".gz") else open(p, "rt")
+
+
+try:
+    from ensembl.utils.argparse import ArgumentParser  # type: ignore
+except ImportError:
+    from argparse import ArgumentParser
+
+try:
+    from ensembl.utils.logging import init_logging_with_args  # type: ignore
+except ImportError:
+    import logging
+
+    def init_logging_with_args(args):
+        level = getattr(args, "log_level", "INFO")
+        logging.basicConfig(level=level)
+
+
+class Params:
+    """Class to hold parameters for splitting FASTA files."""
+
+    def __init__(
+        self,
+        fasta_file: Path,
+        out_dir: Optional[Path] = None,
+        write_agp: bool = False,
+        max_seqs_per_file: Optional[int] = None,
+        max_seq_length_per_file: Optional[int] = None,
+        min_chunk_length: Optional[int] = None,
+        max_files_per_directory: Optional[int] = None,
+        max_dirs_per_directory: Optional[int] = None,
+        delete_existing_files: bool = False,
+        unique_file_names: bool = False,
+        delete_original_file: bool = False,
+        force_max_seq_length: bool = False,
+    ):
+        self.fasta_file = fasta_file
+        self.out_dir = out_dir if out_dir is not None else fasta_file.parent
+        self.write_agp = write_agp
+        self.max_seqs_per_file = max_seqs_per_file
+        self.max_seq_length_per_file = max_seq_length_per_file
+        self.min_chunk_length = min_chunk_length
+        self.max_files_per_directory = max_files_per_directory
+        self.max_dirs_per_directory = max_dirs_per_directory
+        self.delete_existing_files = delete_existing_files
+        self.unique_file_names = unique_file_names
+        self.delete_original_file = delete_original_file
+        self.force_max_seq_length = force_max_seq_length
+
+        self._validate_params()
+
+    def _validate_params(self) -> None:
+        if self.max_dirs_per_directory is not None and self.max_dirs_per_directory <= 0:
+            raise ValueError("--max-dirs-per-directory must be > 0 or None")
+        if (
+            self.max_files_per_directory is not None
+            and self.max_files_per_directory <= 0
+        ):
+            raise ValueError("--max-files-per-directory must be > 0 or None")
+        if self.max_seqs_per_file is not None and self.max_seqs_per_file <= 0:
+            raise ValueError("--max-seqs-per-file must be > 0 or None")
+        if (
+            self.max_seq_length_per_file is not None
+            and self.max_seq_length_per_file <= 0
+        ):
+            raise ValueError("--max-seq-length-per-file must be > 0 or None")
+        if self.min_chunk_length is not None:
+            if self.max_seq_length_per_file is None:
+                raise ValueError(
+                    "--min-chunk-length requires --max-seq-length-per-file"
+                )
+            if self.min_chunk_length <= 0:
+                raise ValueError("--min-chunk-length must be > 0")
+
+
+class OutputWriter:
+    """
+    Manages output file creation and counters, writing in a single pass.
+    Creates/cleans directories lazily as required.
+    """
+
+    def __init__(self, params: Params):
+        self.params = params
+        self.basename = (
+            params.fasta_file.name.removesuffix(".gz")
+            .removesuffix(".fa")
+            .removesuffix(".fasta")
+        )
+        self.agp_file = (
+            self.params.out_dir.joinpath(self.basename + ".agp")
+            if params.write_agp
+            else None
+        )
+        self.file_count = 0
+        self.record_count = 0
+        self.file_len = 0
+        self._fh = None
+        self._agp_fh = None
+        self._cleaned_dirs: Set[Path] = set()
+
+        self.open_new_file()
+
+    def _create_or_clean_dir(self, dir_path: Path) -> None:
+        try:
+            dir_path.mkdir(parents=True, exist_ok=True)
+            if self.params.delete_existing_files and dir_path not in self._cleaned_dirs:
+                for child in dir_path.iterdir():
+                    if child.is_dir():
+                        shutil.rmtree(child)
+                    else:
+                        child.unlink()
+                self._cleaned_dirs.add(dir_path)
+        except Exception:
+            logging.exception("Failed to prepare output directory '%s'", dir_path)
+            raise
+
+    def _get_subdir_path(self, dir_index: int) -> Path:
+        """Computes subdirectory path based on dir_index and max_dirs_per_directory."""
+        parts = []
+        max_dirs = self.params.max_dirs_per_directory
+        if max_dirs is None:
+            parts.append("1")
+        else:
+            current_index = dir_index
+            while current_index >= 0:
+                parts.append(f"{current_index % max_dirs}")
+                current_index = current_index // max_dirs - 1
+
+        parts.reverse()
+        return self.params.out_dir.joinpath(*parts)
+
+    def _get_file_and_dir_index(self) -> Tuple[int, int]:
+        """
+        Determines index of file and directory based on file count and max files per directory.
+        Returns (file_index, dir_index).
+        """
+        max_files = self.params.max_files_per_directory
+        if max_files is None:
+            return self.file_count, 0
+        adjusted_count = self.file_count - 1
+        return (adjusted_count % max_files + 1, adjusted_count // max_files)
+
+    def _get_path_for_next_file(self) -> Path:
+        """Computes path for the next output file."""
+        self.file_count += 1
+        file_index, dir_index = self._get_file_and_dir_index()
+        subdir_path = self._get_subdir_path(dir_index)
+        self._create_or_clean_dir(subdir_path)
+
+        if self.params.unique_file_names:
+            file_name = f"{self.basename}.{dir_index}.{file_index}.fa"
+        else:
+            file_name = f"{self.basename}.{file_index}.fa"
+        return subdir_path.joinpath(file_name)
+
+    def add_agp_entry(
+        self,
+        object_id: str,
+        start: int,
+        end: int,
+        part_nr: int,
+        part_id: str,
+        part_length: int,
+    ) -> None:
+        """Adds an entry to the AGP file."""
+        # AGP columns for WGS contig component type:
+        # object, object_beg, object_end, part_number, component_type,
+        # component_id, component_beg, component_end, orientation
+        if self._agp_fh is None:
+            return
+        try:
+            line = f"{object_id}\t{start}\t{end}\t{part_nr}\tW\t{part_id}\t1\t{part_length}\t+\n"
+            self._agp_fh.write(line)
+        except Exception:
+            logging.exception("Failed to write AGP entry for part '%s'", part_id)
+            raise
+
+    def create_agp_file(self) -> None:
+        """Creates the AGP file for recording sequence chunking."""
+        if self.agp_file is None:
+            return
+        try:
+            self.params.out_dir.mkdir(parents=True, exist_ok=True)
+            self._agp_fh = open(self.agp_file, "w")
+            self._agp_fh.write("# AGP-version 2.0\n")
+            logging.info("Created AGP file '%s'", self.agp_file)
+        except Exception:
+            logging.exception("Failed to open AGP file '%s'", self.agp_file)
+            raise
+
+    def open_new_file(self) -> None:
+        """Closes current file (if any) and opens a new output file."""
+        if self._fh is not None:
+            self._fh.close()
+
+        path = self._get_path_for_next_file()
+        try:
+            self._fh = open(path, "w")
+            logging.debug("Opened output file '%s'", path)
+        except Exception:
+            logging.exception("Failed to open output file '%s'", path)
+            raise
+        self.record_count = 0
+        self.file_len = 0
+
+    def write_record(self, record: SeqRecord) -> None:
+        """Writes a SeqRecord to the current output file."""
+        try:
+            SeqIO.write(record, self._fh, "fasta")
+            self.record_count += 1
+            self.file_len += len(record.seq)
+        except Exception:
+            logging.exception("Failed to write record '%s' to output file", record.id)
+            raise
+
+    def close(self) -> None:
+        if self._fh is not None:
+            self._fh.close()
+            self._fh = None
+        if self._agp_fh is not None:
+            self._agp_fh.close()
+            self._agp_fh = None
+
+
+def _get_param_defaults() -> dict:
+    """Retrieve default values for Params class attributes."""
+    signature = inspect.signature(Params.__init__)
+    defaults = {}
+    for name, param in signature.parameters.items():
+        if name != "self" and param.default is not inspect.Parameter.empty:
+            defaults[name] = param.default
+    return defaults
+
+
+def split_fasta(params: Params) -> None:
+    """Splits the input FASTA file into multiple smaller FASTA files, chunking long sequences if required."""
+    if not params.fasta_file.exists():
+        logging.error(
+            "DEBUG: fasta_file=%r resolved=%r cwd=%r",
+            str(params.fasta_file),
+            str(Path(params.fasta_file).resolve()),
+            str(Path.cwd()),
+        )
+        raise FileNotFoundError(f"Fasta file '{params.fasta_file}' does not exist")
+
+    # Do nothing if file size is 0
+    if params.fasta_file.stat().st_size == 0:
+        logging.info("Input FASTA '%s' is empty; nothing to do", params.fasta_file)
+        return
+
+    params.out_dir.mkdir(parents=True, exist_ok=True)
+
+    writer = OutputWriter(params)
+
+    try:
+        if params.write_agp:
+            writer.create_agp_file()
+
+        with open_gz_file(params.fasta_file) as fh:
+            for record in SeqIO.parse(fh, "fasta"):
+                seq_len = len(record.seq)
+                max_seq_len = params.max_seq_length_per_file
+                max_seqs = params.max_seqs_per_file
+
+                if max_seqs is not None and writer.record_count >= max_seqs:
+                    writer.open_new_file()
+
+                if max_seq_len is None or writer.file_len + seq_len <= max_seq_len:
+                    writer.write_record(record)
+                    if params.write_agp:
+                        writer.add_agp_entry(
+                            record.id, 1, seq_len, 1, record.id, seq_len
+                        )
+                    continue
+
+                if params.force_max_seq_length and seq_len > max_seq_len:
+                    starts = list(range(0, seq_len, max_seq_len))
+                    ends = [min(s + max_seq_len, seq_len) for s in starts]
+
+                    if params.min_chunk_length is not None and len(starts) > 1:
+                        last_chunk_len = ends[-1] - starts[-1]
+                        if last_chunk_len < params.min_chunk_length:
+                            logging.warning(
+                                "Length of last chunk of record '%s' is %d, lower than min_chunk_length: %d;"
+                                + "merging with previous chunk",
+                                record.id,
+                                last_chunk_len,
+                                params.min_chunk_length,
+                            )
+                            ends[-2] = seq_len
+                            starts.pop()
+                            ends.pop()
+
+                    for i, (start, end) in enumerate(zip(starts, ends), start=1):
+                        chunk_seq = record.seq[start:end]
+                        chunk_record = SeqRecord(
+                            chunk_seq,
+                            id=f"{record.id}_chunk_start_{start}",
+                            description=f"{record.description} (part {i})",
+                        )
+                        if writer.record_count > 0:
+                            writer.open_new_file()
+                        writer.write_record(chunk_record)
+
+                        if params.write_agp:
+                            writer.add_agp_entry(
+                                record.id,
+                                start + 1,
+                                end,
+                                i,
+                                chunk_record.id,
+                                len(chunk_seq),
+                            )
+                else:
+                    logging.warning(
+                        "Record '%s' length %d exceeds max_seq_length_per_file %d but chunking not enabled",
+                        record.id,
+                        seq_len,
+                        max_seq_len,
+                    )
+                    if writer.record_count > 0:
+                        writer.open_new_file()
+                    writer.write_record(record)
+                    if params.write_agp:
+                        writer.add_agp_entry(
+                            record.id, 1, seq_len, 1, record.id, seq_len
+                        )
+    except Exception:
+        logging.exception("Error processing FASTA file '%s'", params.fasta_file)
+        raise
+    finally:
+        writer.close()
+
+    if params.delete_original_file:
+        try:
+            params.fasta_file.unlink(missing_ok=True)
+        except Exception:
+            logging.warning(
+                "Failed to delete original FASTA file '%s'",
+                params.fasta_file,
+                exc_info=True,
+            )
+
+
+def parse_args(argv: Optional[List[str]] = None) -> Params:
+    defaults = _get_param_defaults()
+    parser = ArgumentParser(
+        description="Split a FASTA file into multiple FASTA files, optionally chunking long sequences."
+    )
+    parser.add_argument(
+        "--fasta-file",
+        type=Path,
+        required=True,
+        help="Input raw or compressed FASTA file containing sequences to split",
+    )
+    parser.add_argument(
+        "--out-dir",
+        type=Path,
+        help="Top-level output directory (default: input FASTA directory)",
+    )
+    parser.add_argument(
+        "--write-agp",
+        action="store_true",
+        help=f"Write AGP file describing the splits (default: {defaults['write_agp']})",
+    )
+    parser.add_argument(
+        "--max-seqs-per-file",
+        type=int,
+        help=f"Max records per output file (default: {defaults['max_seqs_per_file']})",
+    )
+    parser.add_argument(
+        "--max-seq-length-per-file",
+        type=int,
+        help=f"Max cumulative sequence length per output file (default: {defaults['max_seq_length_per_file']})",
+    )
+    parser.add_argument(
+        "--min-chunk-length",
+        type=int,
+        help=f"Minimum length of a chunk allowed as a remainder (default: {defaults['min_chunk_length']})",
+    )
+    parser.add_argument(
+        "--max-files-per-directory",
+        type=int,
+        help=f"Max files per directory before moving to next computed dir (default: {defaults['max_files_per_directory']})",
+    )
+    parser.add_argument(
+        "--max-dirs-per-directory",
+        type=int,
+        help=f"Max subdirectories per directory level (default: {defaults['max_dirs_per_directory']})",
+    )
+    parser.add_argument(
+        "--delete-existing-files",
+        action="store_true",
+        help=f"Delete existing files within computed output dirs (default: {defaults['delete_existing_files']})",
+    )
+    parser.add_argument(
+        "--unique-file-names",
+        action="store_true",
+        help=f"Make output file names unique across dirs by including dir_index (default: {defaults['unique_file_names']})",
+    )
+    parser.add_argument(
+        "--delete-original-file",
+        action="store_true",
+        help=f"Delete original input FASTA after splitting (default: {defaults['delete_original_file']})",
+    )
+    parser.add_argument(
+        "--force-max-seq-length",
+        action="store_true",
+        help=f"Chunk single sequences longer than max-seq-length-per-file (default: {defaults['force_max_seq_length']})",
+    )
+
+    args = parser.parse_args(argv)
+    init_logging_with_args(args)
+
+    params = Params(
+        fasta_file=args.fasta_file,
+        out_dir=args.out_dir,
+        write_agp=args.write_agp,
+        max_seqs_per_file=args.max_seqs_per_file,
+        max_seq_length_per_file=args.max_seq_length_per_file,
+        min_chunk_length=args.min_chunk_length,
+        max_files_per_directory=args.max_files_per_directory,
+        max_dirs_per_directory=args.max_dirs_per_directory,
+        delete_existing_files=args.delete_existing_files,
+        unique_file_names=args.unique_file_names,
+        delete_original_file=args.delete_original_file,
+        force_max_seq_length=args.force_max_seq_length,
+    )
+    return params
+
+
+def main(argv: Optional[List[str]] = None) -> None:
+    try:
+        params = parse_args(argv)
+        split_fasta(params)
+    except Exception:
+        logging.exception("Error processing FASTA file '%s'", params.fasta_file)
+        raise
+
+
+if __name__ == "__main__":
+    main()

From f89d0b21e9185451f00b4c95143f889cadfb6126 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 30 Jan 2026 14:19:44 +0000
Subject: [PATCH 02/36] Add pytest tests for split_fasta.py

---
 requirements-dev.txt      |   2 +
 tests/conftest.py         |  24 +++++++
 tests/test_split_fasta.py | 144 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+)
 create mode 100644 requirements-dev.txt
 create mode 100644 tests/conftest.py
 create mode 100644 tests/test_split_fasta.py

diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..c0367d2
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,2 @@
+biopython
+pytest
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..766dbc3
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,24 @@
+import importlib.util
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture(scope="session")
+def split_fasta_module():
+    """
+    Load modules/ensembl/fasta/splitfasta/split_fasta.py as a Python module
+    regardless of whether 'modules/' is a Python package.
+    """
+    repo_root = Path(__file__).resolve().parents[1]
+    module_path = (
+        repo_root / "modules" / "ensembl" / "fasta" / "splitfasta" / "split_fasta.py"
+    )
+
+    spec = importlib.util.spec_from_file_location("split_fasta", module_path)
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"Could not load module spec from {module_path}")
+
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return mod
diff --git a/tests/test_split_fasta.py b/tests/test_split_fasta.py
new file mode 100644
index 0000000..8a48af2
--- /dev/null
+++ b/tests/test_split_fasta.py
@@ -0,0 +1,144 @@
+# tests/test_split_fasta.py
+from pathlib import Path
+
+import pytest
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+
+def write_fasta(path: Path, records):
+    with open(path, "w", encoding="utf-8", newline="\n") as fh:
+        SeqIO.write(records, fh, "fasta")
+
+
+def list_output_fastas(out_dir: Path):
+    return sorted(out_dir.rglob("*.fa"))
+
+
+def read_all_ids_from_fastas(out_dir: Path):
+    ids = []
+    for fa in list_output_fastas(out_dir):
+        with open(fa, "r", encoding="utf-8") as fh:
+            ids.extend([r.id for r in SeqIO.parse(fh, "fasta")])
+    return ids
+
+
+def parse_agp_lines(agp_path: Path):
+    lines = [l.rstrip("\n") for l in agp_path.read_text(encoding="utf-8").splitlines()]
+    lines = [l for l in lines if l and not l.startswith("#")]
+    return [l.split("\t") for l in lines]
+
+
+def test_no_agp_by_default(tmp_path: Path, split_fasta_module):
+    inp = tmp_path / "in.fa"
+    out = tmp_path / "out"
+    write_fasta(inp, [SeqRecord(Seq("ACGT"), id="seq1", description="")])
+
+    params = split_fasta_module.Params(
+        fasta_file=inp,
+        out_dir=out,
+        write_agp=False,
+    )
+    split_fasta_module.split_fasta(params)
+
+    assert not (out / "in.agp").exists()
+    assert len(list_output_fastas(out)) >= 1
+
+
+def test_split_by_max_seqs_per_file(tmp_path: Path, split_fasta_module):
+    inp = tmp_path / "in.fa"
+    out = tmp_path / "out"
+    recs = [
+        SeqRecord(Seq("A" * 10), id="s1", description=""),
+        SeqRecord(Seq("C" * 10), id="s2", description=""),
+        SeqRecord(Seq("G" * 10), id="s3", description=""),
+    ]
+    write_fasta(inp, recs)
+
+    params = split_fasta_module.Params(
+        fasta_file=inp,
+        out_dir=out,
+        max_seqs_per_file=2,
+        write_agp=False,
+    )
+    split_fasta_module.split_fasta(params)
+
+    fas = list_output_fastas(out)
+    assert len(fas) == 2
+    assert read_all_ids_from_fastas(out) == ["s1", "s2", "s3"]
+
+
+def test_chunk_merge_final_small_chunk_and_agp(tmp_path: Path, split_fasta_module):
+    """
+    seq_len=2100, max=1000 -> chunks [1000, 1000, 100]
+    min_chunk_length=200 -> final chunk merged -> [1000, 1100]
+    """
+    inp = tmp_path / "in.fa"
+    out = tmp_path / "out"
+    write_fasta(inp, [SeqRecord(Seq("A" * 2100), id="chr1", description="chr1")])
+
+    params = split_fasta_module.Params(
+        fasta_file=inp,
+        out_dir=out,
+        write_agp=True,
+        force_max_seq_length=True,
+        max_seq_length_per_file=1000,
+        min_chunk_length=200,
+        max_seqs_per_file=100000,  # avoid seq-count splitting interfering
+    )
+    split_fasta_module.split_fasta(params)
+
+    # 2 chunks expected after merge
+    assert read_all_ids_from_fastas(out) == [
+        "chr1_chunk_start_0",
+        "chr1_chunk_start_1000",
+    ]
+
+    agp = out / "in.agp"
+    assert agp.exists()
+
+    cols = parse_agp_lines(agp)
+    assert len(cols) == 2
+
+    # object, obj_beg, obj_end, part_no, type, comp_id, comp_beg, comp_end, orient
+    assert cols[0][0] == "chr1"
+    assert cols[0][1:4] == ["1", "1000", "1"]
+    assert cols[0][4] == "W"
+    assert cols[0][5] == "chr1_chunk_start_0"
+    assert cols[0][6:9] == ["1", "1000", "+"]
+
+    assert cols[1][0] == "chr1"
+    assert cols[1][1:4] == ["1001", "2100", "2"]
+    assert cols[1][4] == "W"
+    assert cols[1][5] == "chr1_chunk_start_1000"
+    assert cols[1][6:9] == ["1", "1100", "+"]
+
+
+def test_agp_part_numbers_restart_per_object(tmp_path: Path, split_fasta_module):
+    inp = tmp_path / "in.fa"
+    out = tmp_path / "out"
+    recs = [
+        SeqRecord(Seq("A" * 1200), id="obj1", description=""),
+        SeqRecord(Seq("C" * 1200), id="obj2", description=""),
+    ]
+    write_fasta(inp, recs)
+
+    params = split_fasta_module.Params(
+        fasta_file=inp,
+        out_dir=out,
+        write_agp=True,
+        force_max_seq_length=True,
+        max_seq_length_per_file=1000,
+        min_chunk_length=100,  # => 2 chunks each, no merge
+    )
+    split_fasta_module.split_fasta(params)
+
+    cols = parse_agp_lines(out / "in.agp")
+
+    by_obj = {}
+    for c in cols:
+        by_obj.setdefault(c[0], []).append(int(c[3]))
+
+    assert by_obj["obj1"] == [1, 2]
+    assert by_obj["obj2"] == [1, 2]

From cfe0b4e479f0a92e552f6e73fc6f69d1204e7168 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 30 Jan 2026 14:20:36 +0000
Subject: [PATCH 03/36] Add nextflow module and tests for running
 split_fasta.py

---
 .gitignore                                    |   3 +
 .../ensembl/fasta/splitfasta/environment.yml  |   8 +
 modules/ensembl/fasta/splitfasta/main.nf      | 106 ++++++
 .../fasta/splitfasta/tests/data/agp/test.agp  |   4 +
 .../fasta/splitfasta/tests/data/real/in.fa    |   6 +
 .../tests/data/splits/default/0/test.1.fa     |   4 +
 .../tests/data/splits/default/0/test.2.fa     |   2 +
 .../tests/data/splits/multi_dir/0/0/test.1.fa |   2 +
 .../tests/data/splits/multi_dir/0/1/test.2.fa |   2 +
 .../tests/data/splits/unique/0/test.0.1.fa    |   2 +
 .../tests/data/splits/unique/0/test.0.2.fa    |   2 +
 .../fasta/splitfasta/tests/main.nf.test       | 301 ++++++++++++++++++
 .../fasta/splitfasta/tests/main.nf.test.snap  | 168 ++++++++++
 13 files changed, 610 insertions(+)
 create mode 100644 modules/ensembl/fasta/splitfasta/environment.yml
 create mode 100644 modules/ensembl/fasta/splitfasta/main.nf
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/real/in.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/main.nf.test
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap

diff --git a/.gitignore b/.gitignore
index e75900d..961b31c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
 .nextflow*
 .nf-test*
+__pycache__/
+*.pyc
+.python-version
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/environment.yml b/modules/ensembl/fasta/splitfasta/environment.yml
new file mode 100644
index 0000000..759f3da
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/environment.yml
@@ -0,0 +1,8 @@
+---
+name: "fasta_splitfasta"
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - python=3.11.7
+  - biopython=1.86
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/main.nf b/modules/ensembl/fasta/splitfasta/main.nf
new file mode 100644
index 0000000..0a8b761
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/main.nf
@@ -0,0 +1,106 @@
+// See the NOTICE file distributed with this work for additional information
+// regarding copyright ownership.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+process FASTA_SPLITFASTA {
+
+    tag "${meta.id}"
+    label 'process_low'
+
+    publishDir "${params.outdir ?: '.'}", mode: 'copy'
+
+    input:
+        tuple val(meta), path(fasta)
+
+    output:
+        tuple val(meta), path("**/*.fa"), emit: fasta
+        tuple val(meta), path("*.agp"), emit: agp, optional: true
+
+    script:
+        def args = []
+
+        if (params.max_seqs_per_file) {
+            args << "--max-seqs-per-file ${params.max_seqs_per_file}"
+        }
+
+        if (params.max_seq_length_per_file) {
+            args << "--max-seq-length-per-file ${params.max_seq_length_per_file}"
+        }
+
+        if (params.min_chunk_length) {
+            args << "--min-chunk-length ${params.min_chunk_length}"
+        }
+
+        if (params.max_files_per_directory) {
+            args << "--max-files-per-directory ${params.max_files_per_directory}"
+        }
+
+        if (params.max_dirs_per_directory) {
+            args << "--max-dirs-per-directory ${params.max_dirs_per_directory}"
+        }
+
+        if (params.force_max_seq_length) {
+            args << "--force-max-seq-length"
+        }
+
+        if (params.write_agp) {
+            args << "--write-agp"
+        }
+
+        if (params.unique_file_names) {
+            args << "--unique-file-names"
+        }
+
+        if (params.delete_existing_files) {
+            args << "--delete-existing-files"
+        }
+
+        """
+        python \\
+            ${moduleDir}/split_fasta.py \\
+            --fasta-file \$PWD/${fasta} \\
+            --out-dir \$PWD \\
+            ${args.join(' ')}
+        """
+
+    stub:
+        """
+        set -euo pipefail
+
+        FIXTURE_DIR="${moduleDir}/tests/data"
+
+        LAYOUT="default"
+        if [[ "${params.unique_file_names ?: false}" == "true" ]]; then
+            LAYOUT="unique"
+        elif [[ -n "${params.max_dirs_per_directory ?: ''}" || -n "${params.max_files_per_directory ?: ''}" ]]; then
+        LAYOUT="multi_dir"
+        fi
+
+        mkdir -p splits
+        cp -R "\$FIXTURE_DIR/splits/\$LAYOUT/." "splits/"
+
+        find splits -type f -name 'test*.fa' | while read -r f; do
+            bn=\$(basename "\$f")
+            dir=\$(dirname "\$f")
+            new_bn="\${bn/test/${meta.id}}"
+            mv "\$f" "\${dir}/\${new_bn}"
+        done
+
+        if [[ "${params.write_agp ?: false}" == "true" ]]; then
+            cp "\$FIXTURE_DIR/agp/test.agp" "${meta.id}.agp"
+        fi
+        """
+
+        
+}
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp b/modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp
new file mode 100644
index 0000000..46fc419
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp
@@ -0,0 +1,4 @@
+# AGP-version 2.0
+seq1	1	10	1	W	seq1	1	10	+
+seq2	1	10	1	W	seq2	1	10	+
+seq3	1	11	1	W	seq3	1	11	+
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/real/in.fa b/modules/ensembl/fasta/splitfasta/tests/data/real/in.fa
new file mode 100644
index 0000000..3d3f65c
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/real/in.fa
@@ -0,0 +1,6 @@
+>seq1
+AAAAAAAAAA
+>seq2
+CCCCCCCCCC
+>seq3
+GGGGGGGGGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa
new file mode 100644
index 0000000..7abe938
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa
@@ -0,0 +1,4 @@
+>seq1
+AAAAAAAAAA
+>seq2
+CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa
new file mode 100644
index 0000000..6287efa
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa
@@ -0,0 +1,2 @@
+>seq3
+GGGGGGGGGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa
new file mode 100644
index 0000000..9512f36
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa
@@ -0,0 +1,2 @@
+>seq1
+AAAAAAAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa
new file mode 100644
index 0000000..2f3b40f
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa
@@ -0,0 +1,2 @@
+>seq2
+CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa
new file mode 100644
index 0000000..9512f36
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa
@@ -0,0 +1,2 @@
+>seq1
+AAAAAAAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa
new file mode 100644
index 0000000..2f3b40f
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa
@@ -0,0 +1,2 @@
+>seq2
+CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/main.nf.test b/modules/ensembl/fasta/splitfasta/tests/main.nf.test
new file mode 100644
index 0000000..3db1283
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/main.nf.test
@@ -0,0 +1,301 @@
+// nf-core modules test fasta/splitfasta
+nextflow_process {
+
+    name "Test Process FASTA_SPLITFASTA"
+    script "../main.nf"
+    process "FASTA_SPLITFASTA"
+
+    tag "modules"
+    tag "modules_ensembl"
+    tag "fasta"
+    tag "fasta/splitfasta"
+
+
+    def real_fa = new File("modules/ensembl/fasta/splitfasta/tests/data/real/in.fa").canonicalFile
+
+    test("Stub outputs: default layout, no AGP") {
+
+        when {
+            options "-stub"
+
+            // Ensure params are set explicitly for this test
+            params.write_agp = false
+            params.unique_file_names = false
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('dummy.fa')]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+
+            // fasta: tuple(meta, fa_paths)
+            assert process.out.fasta != null
+            assert process.out.fasta.size() == 1
+
+            def fasta_out = process.out.fasta[0]
+            def meta = fasta_out[0]
+            def fas  = fasta_out[1]
+
+            assert meta.id == "test"
+            assert fas != null
+            assert fas.size() == 2
+
+            // agp: tuple(meta, agp_paths) optional -> should be absent
+            assert process.out.agp != null
+            assert process.out.agp.size() == 0
+
+            // Ensure FASTA parsing works (downstream contract)
+            def merged = fas
+                .collect { path(it).fasta }
+                .inject([:]) { acc, m -> acc + m }
+
+            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
+
+            assertAll(
+                { assert process.success }
+            )
+        }
+    }
+
+    test("Stub outputs: AGP optional output appears when enabled") {
+
+        when {
+            options "-stub"
+
+            params.write_agp = true
+            params.unique_file_names = false
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('dummy.fa')]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+
+            assert process.out.fasta.size() == 1
+            def fasta_out = process.out.fasta[0]
+            def fas = fasta_out[1]
+            assert fas.size() == 2
+
+            assert process.out.agp.size() == 1
+            def agp_out = process.out.agp[0]
+            def agp_meta = agp_out[0]
+            def agp = agp_out[1]
+            def agp_paths = agp instanceof List ? agp : [agp]
+            def agp_file = path(agp_paths[0]).toFile()
+
+            assert agp_meta.id == "test"
+            assert agp_paths.size() == 1
+            assert agp_file.name == "test.agp"
+
+            def agp_text = agp_file.text
+            assert agp_text.startsWith("# AGP-version 2.0")
+            assert agp_text.contains("seq1\t1\t10\t1\tW\tseq1\t1\t10\t+")
+            assert agp_text.contains("seq2\t1\t10\t1\tW\tseq2\t1\t10\t+")
+            assert agp_text.contains("seq3\t1\t11\t1\tW\tseq3\t1\t11\t+")
+
+            assertAll(
+                { assert process.success }
+            )
+        }
+    }
+
+    test("Stub outputs: unique_file_names contract") {
+
+        when {
+            options "-stub"
+
+            params.write_agp = false
+            params.unique_file_names = true
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('dummy.fa')]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+
+            def fasta_out = process.out.fasta[0]
+            def fas = fasta_out[1]
+
+            assert fas.size() == 2
+            assert process.out.agp.size() == 0
+
+            // Contract check: names match the unique fixture pattern
+            assert fas.collect { path(it).toFile().name }.sort() == ["test.0.1.fa", "test.0.2.fa"]
+
+            assertAll(
+                { assert process.success }
+            )
+        }
+    }
+
+    test("Stub outputs: nested directory layout contract") {
+
+        when {
+            options "-stub"
+
+            params.write_agp = false
+            params.unique_file_names = false
+
+            // Trigger stub's nested fixture selection
+            params.max_files_per_directory = 100
+            params.max_dirs_per_directory  = 100
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('dummy.fa')]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+
+            def fastas = process.out.fasta[0][1]
+            assert fastas.size() == 2
+            assert process.out.agp.size() == 0
+
+            def rels = fastas.collect { path(it).toString() }
+            assert rels.any { it.contains("splits/0/0/") }
+            assert rels.any { it.contains("splits/0/1/") }
+
+            assertAll(
+                { assert process.success }
+            )
+        }
+    }
+
+    test("Real run: default behaviour produces FASTAs and no AGP") {
+
+        when {
+            params.write_agp = false
+            params.unique_file_names = false
+            params.max_seqs_per_file = null
+            params.max_seq_length_per_file = null
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+            params.force_max_seq_length = false
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            assert process.out.fasta != null
+            assert process.out.fasta.size() == 1
+
+            def out = process.out.fasta[0]
+            def meta = out[0]
+            def fas  = out[1]
+
+            assert meta.id == "test"
+            def fas_list = (fas instanceof List) ? fas : [fas]
+            assert fas_list.size() >= 1
+
+            assert process.out.agp != null
+            assert process.out.agp.size() == 0
+
+            def merged = fas_list
+                .collect { path(it).fasta }
+                .inject([:]) { acc, m -> acc + m }
+
+            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
+        }
+    }
+
+    test("Real run: write_agp=true emits exactly one AGP file") {
+
+        when {
+            params.write_agp = true
+            params.unique_file_names = false
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+            params.max_seqs_per_file = null
+            params.max_seq_length_per_file = null
+            params.force_max_seq_length = false
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            assert process.out.agp != null
+            assert process.out.agp.size() == 1
+
+            def agp_out = process.out.agp[0]
+            def agp_meta = agp_out[0]
+            def agp_val  = agp_out[1]
+
+            assert agp_meta.id == "test"
+
+            def agp_list = (agp_val instanceof List) ? agp_val : [agp_val]
+            assert agp_list.size() == 1
+
+            def agp_path = path(agp_list[0])
+            assert agp_path.fileName.toString().endsWith(".agp")
+
+            def agp_text = agp_path.toFile().text
+            assert agp_text.startsWith("# AGP-version 2.0")
+            assert agp_text.contains("seq1\t1\t10\t1\tW\tseq1\t1\t10\t+")
+            assert agp_text.contains("seq2\t1\t10\t1\tW\tseq2\t1\t10\t+")
+            assert agp_text.contains("seq3\t1\t11\t1\tW\tseq3\t1\t11\t+")
+        }
+    }
+
+    test("Real run: max_seqs_per_file=2 splits into 2 FASTA outputs") {
+
+        when {
+            params.write_agp = false
+            params.max_seqs_per_file = 2
+            params.unique_file_names = false
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            def fas = process.out.fasta[0][1]
+            assert fas.size() == 2
+
+            def merged = fas
+                .collect { path(it).fasta }
+                .inject([:]) { acc, m -> acc + m }
+
+            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
+        }
+    }
+}
diff --git a/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap b/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
new file mode 100644
index 0000000..3390583
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
@@ -0,0 +1,168 @@
+{
+    "Stub outputs: AGP optional output appears when enabled": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
+                    ]
+                ],
+                "agp": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
+                    ]
+                ],
+                "fasta": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-01-30T10:38:07.606463"
+    },
+    "Stub outputs: nested directory layout contract": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "agp": [
+                    
+                ],
+                "fasta": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-01-30T10:38:11.815126"
+    },
+    "Stub outputs: default layout, no AGP": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "agp": [
+                    
+                ],
+                "fasta": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-01-30T10:38:05.482323"
+    },
+    "Stub outputs: unique_file_names contract": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "agp": [
+                    
+                ],
+                "fasta": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-01-30T10:38:09.698407"
+    }
+}
\ No newline at end of file

From acfe54f1ea92e21e918152fc55cea025757cfe79 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 30 Jan 2026 14:28:51 +0000
Subject: [PATCH 04/36] Add python script for splitting FASTA, chunking if
 necessary

---
 .../ensembl/fasta/splitfasta/split_fasta.py   | 462 ++++++++++++++++++
 1 file changed, 462 insertions(+)
 create mode 100644 modules/ensembl/fasta/splitfasta/split_fasta.py

diff --git a/modules/ensembl/fasta/splitfasta/split_fasta.py b/modules/ensembl/fasta/splitfasta/split_fasta.py
new file mode 100644
index 0000000..164ec44
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/split_fasta.py
@@ -0,0 +1,462 @@
+#!/usr/bin/env python3
+
+"""Split a FASTA file (possibly gzipped) into multiple smaller FASTA files."""
+
+import inspect
+import logging
+import shutil
+from pathlib import Path
+from typing import Optional, List, Set, Tuple
+
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+
+try:
+    from ensembl.utils.archive import open_gz_file  # type: ignore
+except ImportError:
+    import gzip
+
+    def open_gz_file(path):
+        p = str(path)
+        return gzip.open(p, "rt") if p.endswith(".gz") else open(p, "rt")
+
+
+try:
+    from ensembl.utils.argparse import ArgumentParser  # type: ignore
+except ImportError:
+    from argparse import ArgumentParser
+
+try:
+    from ensembl.utils.logging import init_logging_with_args  # type: ignore
+except ImportError:
+    import logging
+
+    def init_logging_with_args(args):
+        level = getattr(args, "log_level", "INFO")
+        logging.basicConfig(level=level)
+
+
+class Params:
+    """Class to hold parameters for splitting FASTA files."""
+
+    def __init__(
+        self,
+        fasta_file: Path,
+        out_dir: Optional[Path] = None,
+        write_agp: bool = False,
+        max_seqs_per_file: Optional[int] = None,
+        max_seq_length_per_file: Optional[int] = None,
+        min_chunk_length: Optional[int] = None,
+        max_files_per_directory: Optional[int] = None,
+        max_dirs_per_directory: Optional[int] = None,
+        delete_existing_files: bool = False,
+        unique_file_names: bool = False,
+        delete_original_file: bool = False,
+        force_max_seq_length: bool = False,
+    ):
+        self.fasta_file = fasta_file
+        self.out_dir = out_dir if out_dir is not None else fasta_file.parent
+        self.write_agp = write_agp
+        self.max_seqs_per_file = max_seqs_per_file
+        self.max_seq_length_per_file = max_seq_length_per_file
+        self.min_chunk_length = min_chunk_length
+        self.max_files_per_directory = max_files_per_directory
+        self.max_dirs_per_directory = max_dirs_per_directory
+        self.delete_existing_files = delete_existing_files
+        self.unique_file_names = unique_file_names
+        self.delete_original_file = delete_original_file
+        self.force_max_seq_length = force_max_seq_length
+
+        self._validate_params()
+
+    def _validate_params(self) -> None:
+        if self.max_dirs_per_directory is not None and self.max_dirs_per_directory <= 0:
+            raise ValueError("--max-dirs-per-directory must be > 0 or None")
+        if (
+            self.max_files_per_directory is not None
+            and self.max_files_per_directory <= 0
+        ):
+            raise ValueError("--max-files-per-directory must be > 0 or None")
+        if self.max_seqs_per_file is not None and self.max_seqs_per_file <= 0:
+            raise ValueError("--max-seqs-per-file must be > 0 or None")
+        if (
+            self.max_seq_length_per_file is not None
+            and self.max_seq_length_per_file <= 0
+        ):
+            raise ValueError("--max-seq-length-per-file must be > 0 or None")
+        if self.min_chunk_length is not None:
+            if self.max_seq_length_per_file is None:
+                raise ValueError(
+                    "--min-chunk-length requires --max-seq-length-per-file"
+                )
+            if self.min_chunk_length <= 0:
+                raise ValueError("--min-chunk-length must be > 0")
+
+
+class OutputWriter:
+    """
+    Manages output file creation and counters, writing in a single pass.
+    Creates/cleans directories lazily as required.
+    """
+
+    def __init__(self, params: Params):
+        self.params = params
+        self.basename = (
+            params.fasta_file.name.removesuffix(".gz")
+            .removesuffix(".fa")
+            .removesuffix(".fasta")
+        )
+        self.agp_file = (
+            self.params.out_dir.joinpath(self.basename + ".agp")
+            if params.write_agp
+            else None
+        )
+        self.file_count = 0
+        self.record_count = 0
+        self.file_len = 0
+        self._fh = None
+        self._agp_fh = None
+        self._cleaned_dirs: Set[Path] = set()
+
+        self.open_new_file()
+
+    def _create_or_clean_dir(self, dir_path: Path) -> None:
+        try:
+            dir_path.mkdir(parents=True, exist_ok=True)
+            if self.params.delete_existing_files and dir_path not in self._cleaned_dirs:
+                for child in dir_path.iterdir():
+                    if child.is_dir():
+                        shutil.rmtree(child)
+                    else:
+                        child.unlink()
+                self._cleaned_dirs.add(dir_path)
+        except Exception:
+            logging.exception("Failed to prepare output directory '%s'", dir_path)
+            raise
+
+    def _get_subdir_path(self, dir_index: int) -> Path:
+        """Computes subdirectory path based on dir_index and max_dirs_per_directory."""
+        parts = []
+        max_dirs = self.params.max_dirs_per_directory
+        if max_dirs is None:
+            parts.append("1")
+        else:
+            current_index = dir_index
+            while current_index >= 0:
+                parts.append(f"{current_index % max_dirs}")
+                current_index = current_index // max_dirs - 1
+
+        parts.reverse()
+        return self.params.out_dir.joinpath(*parts)
+
+    def _get_file_and_dir_index(self) -> Tuple[int, int]:
+        """
+        Determines index of file and directory based on file count and max files per directory.
+        Returns (file_index, dir_index).
+        """
+        max_files = self.params.max_files_per_directory
+        if max_files is None:
+            return self.file_count, 0
+        adjusted_count = self.file_count - 1
+        return (adjusted_count % max_files + 1, adjusted_count // max_files)
+
+    def _get_path_for_next_file(self) -> Path:
+        """Computes path for the next output file."""
+        self.file_count += 1
+        file_index, dir_index = self._get_file_and_dir_index()
+        subdir_path = self._get_subdir_path(dir_index)
+        self._create_or_clean_dir(subdir_path)
+
+        if self.params.unique_file_names:
+            file_name = f"{self.basename}.{dir_index}.{file_index}.fa"
+        else:
+            file_name = f"{self.basename}.{file_index}.fa"
+        return subdir_path.joinpath(file_name)
+
+    def add_agp_entry(
+        self,
+        object_id: str,
+        start: int,
+        end: int,
+        part_nr: int,
+        part_id: str,
+        part_length: int,
+    ) -> None:
+        """Adds an entry to the AGP file."""
+        # AGP columns for WGS contig component type:
+        # object, object_beg, object_end, part_number, component_type,
+        # component_id, component_beg, component_end, orientation
+        if self._agp_fh is None:
+            return
+        try:
+            line = f"{object_id}\t{start}\t{end}\t{part_nr}\tW\t{part_id}\t1\t{part_length}\t+\n"
+            self._agp_fh.write(line)
+        except Exception:
+            logging.exception("Failed to write AGP entry for part '%s'", part_id)
+            raise
+
+    def create_agp_file(self) -> None:
+        """Creates the AGP file for recording sequence chunking."""
+        if self.agp_file is None:
+            return
+        try:
+            self.params.out_dir.mkdir(parents=True, exist_ok=True)
+            self._agp_fh = open(self.agp_file, "w")
+            self._agp_fh.write("# AGP-version 2.0\n")
+            logging.info("Created AGP file '%s'", self.agp_file)
+        except Exception:
+            logging.exception("Failed to open AGP file '%s'", self.agp_file)
+            raise
+
+    def open_new_file(self) -> None:
+        """Closes current file (if any) and opens a new output file."""
+        if self._fh is not None:
+            self._fh.close()
+
+        path = self._get_path_for_next_file()
+        try:
+            self._fh = open(path, "w")
+            logging.debug("Opened output file '%s'", path)
+        except Exception:
+            logging.exception("Failed to open output file '%s'", path)
+            raise
+        self.record_count = 0
+        self.file_len = 0
+
+    def write_record(self, record: SeqRecord) -> None:
+        """Writes a SeqRecord to the current output file."""
+        try:
+            SeqIO.write(record, self._fh, "fasta")
+            self.record_count += 1
+            self.file_len += len(record.seq)
+        except Exception:
+            logging.exception("Failed to write record '%s' to output file", record.id)
+            raise
+
+    def close(self) -> None:
+        if self._fh is not None:
+            self._fh.close()
+            self._fh = None
+        if self._agp_fh is not None:
+            self._agp_fh.close()
+            self._agp_fh = None
+
+
+def _get_param_defaults() -> dict:
+    """Retrieve default values for Params class attributes."""
+    signature = inspect.signature(Params.__init__)
+    defaults = {}
+    for name, param in signature.parameters.items():
+        if name != "self" and param.default is not inspect.Parameter.empty:
+            defaults[name] = param.default
+    return defaults
+
+
+def split_fasta(params: Params) -> None:
+    """Splits the input FASTA file into multiple smaller FASTA files, chunking long sequences if required."""
+    if not params.fasta_file.exists():
+        logging.error(
+            "DEBUG: fasta_file=%r resolved=%r cwd=%r",
+            str(params.fasta_file),
+            str(Path(params.fasta_file).resolve()),
+            str(Path.cwd()),
+        )
+        raise FileNotFoundError(f"Fasta file '{params.fasta_file}' does not exist")
+
+    # Do nothing if file size is 0
+    if params.fasta_file.stat().st_size == 0:
+        logging.info("Input FASTA '%s' is empty; nothing to do", params.fasta_file)
+        return
+
+    params.out_dir.mkdir(parents=True, exist_ok=True)
+
+    writer = OutputWriter(params)
+
+    try:
+        if params.write_agp:
+            writer.create_agp_file()
+
+        with open_gz_file(params.fasta_file) as fh:
+            for record in SeqIO.parse(fh, "fasta"):
+                seq_len = len(record.seq)
+                max_seq_len = params.max_seq_length_per_file
+                max_seqs = params.max_seqs_per_file
+
+                if max_seqs is not None and writer.record_count >= max_seqs:
+                    writer.open_new_file()
+
+                if max_seq_len is None or writer.file_len + seq_len <= max_seq_len:
+                    writer.write_record(record)
+                    if params.write_agp:
+                        writer.add_agp_entry(
+                            record.id, 1, seq_len, 1, record.id, seq_len
+                        )
+                    continue
+
+                if params.force_max_seq_length and seq_len > max_seq_len:
+                    starts = list(range(0, seq_len, max_seq_len))
+                    ends = [min(s + max_seq_len, seq_len) for s in starts]
+
+                    if params.min_chunk_length is not None and len(starts) > 1:
+                        last_chunk_len = ends[-1] - starts[-1]
+                        if last_chunk_len < params.min_chunk_length:
+                            logging.warning(
+                                "Length of last chunk of record '%s' is %d, lower than min_chunk_length: %d;"
+                                + "merging with previous chunk",
+                                record.id,
+                                last_chunk_len,
+                                params.min_chunk_length,
+                            )
+                            ends[-2] = seq_len
+                            starts.pop()
+                            ends.pop()
+
+                    for i, (start, end) in enumerate(zip(starts, ends), start=1):
+                        chunk_seq = record.seq[start:end]
+                        chunk_record = SeqRecord(
+                            chunk_seq,
+                            id=f"{record.id}_chunk_start_{start}",
+                            description=f"{record.description} (part {i})",
+                        )
+                        if writer.record_count > 0:
+                            writer.open_new_file()
+                        writer.write_record(chunk_record)
+
+                        if params.write_agp:
+                            writer.add_agp_entry(
+                                record.id,
+                                start + 1,
+                                end,
+                                i,
+                                chunk_record.id,
+                                len(chunk_seq),
+                            )
+                else:
+                    logging.warning(
+                        "Record '%s' length %d exceeds max_seq_length_per_file %d but chunking not enabled",
+                        record.id,
+                        seq_len,
+                        max_seq_len,
+                    )
+                    if writer.record_count > 0:
+                        writer.open_new_file()
+                    writer.write_record(record)
+                    if params.write_agp:
+                        writer.add_agp_entry(
+                            record.id, 1, seq_len, 1, record.id, seq_len
+                        )
+    except Exception:
+        logging.exception("Error processing FASTA file '%s'", params.fasta_file)
+        raise
+    finally:
+        writer.close()
+
+    if params.delete_original_file:
+        try:
+            params.fasta_file.unlink(missing_ok=True)
+        except Exception:
+            logging.warning(
+                "Failed to delete original FASTA file '%s'",
+                params.fasta_file,
+                exc_info=True,
+            )
+
+
+def parse_args(argv: Optional[List[str]] = None) -> Params:
+    defaults = _get_param_defaults()
+    parser = ArgumentParser(
+        description="Split a FASTA file into multiple FASTA files, optionally chunking long sequences."
+    )
+    parser.add_argument(
+        "--fasta-file",
+        type=Path,
+        required=True,
+        help="Input raw or compressed FASTA file containing sequences to split",
+    )
+    parser.add_argument(
+        "--out-dir",
+        type=Path,
+        help="Top-level output directory (default: input FASTA directory)",
+    )
+    parser.add_argument(
+        "--write-agp",
+        action="store_true",
+        help=f"Write AGP file describing the splits (default: {defaults['write_agp']})",
+    )
+    parser.add_argument(
+        "--max-seqs-per-file",
+        type=int,
+        help=f"Max records per output file (default: {defaults['max_seqs_per_file']})",
+    )
+    parser.add_argument(
+        "--max-seq-length-per-file",
+        type=int,
+        help=f"Max cumulative sequence length per output file (default: {defaults['max_seq_length_per_file']})",
+    )
+    parser.add_argument(
+        "--min-chunk-length",
+        type=int,
+        help=f"Minimum length of a chunk allowed as a remainder (default: {defaults['min_chunk_length']})",
+    )
+    parser.add_argument(
+        "--max-files-per-directory",
+        type=int,
+        help=f"Max files per directory before moving to next computed dir (default: {defaults['max_files_per_directory']})",
+    )
+    parser.add_argument(
+        "--max-dirs-per-directory",
+        type=int,
+        help=f"Max subdirectories per directory level (default: {defaults['max_dirs_per_directory']})",
+    )
+    parser.add_argument(
+        "--delete-existing-files",
+        action="store_true",
+        help=f"Delete existing files within computed output dirs (default: {defaults['delete_existing_files']})",
+    )
+    parser.add_argument(
+        "--unique-file-names",
+        action="store_true",
+        help=f"Make output file names unique across dirs by including dir_index (default: {defaults['unique_file_names']})",
+    )
+    parser.add_argument(
+        "--delete-original-file",
+        action="store_true",
+        help=f"Delete original input FASTA after splitting (default: {defaults['delete_original_file']})",
+    )
+    parser.add_argument(
+        "--force-max-seq-length",
+        action="store_true",
+        help=f"Chunk single sequences longer than max-seq-length-per-file (default: {defaults['force_max_seq_length']})",
+    )
+
+    args = parser.parse_args(argv)
+    init_logging_with_args(args)
+
+    params = Params(
+        fasta_file=args.fasta_file,
+        out_dir=args.out_dir,
+        write_agp=args.write_agp,
+        max_seqs_per_file=args.max_seqs_per_file,
+        max_seq_length_per_file=args.max_seq_length_per_file,
+        min_chunk_length=args.min_chunk_length,
+        max_files_per_directory=args.max_files_per_directory,
+        max_dirs_per_directory=args.max_dirs_per_directory,
+        delete_existing_files=args.delete_existing_files,
+        unique_file_names=args.unique_file_names,
+        delete_original_file=args.delete_original_file,
+        force_max_seq_length=args.force_max_seq_length,
+    )
+    return params
+
+
+def main(argv: Optional[List[str]] = None) -> None:
+    try:
+        params = parse_args(argv)
+        split_fasta(params)
+    except Exception:
+        logging.exception("Error processing FASTA file '%s'", params.fasta_file)
+        raise
+
+
+if __name__ == "__main__":
+    main()

From 8a6adaa760dcd8a067d831d27e9b5e39bf56bf8c Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 30 Jan 2026 14:29:53 +0000
Subject: [PATCH 05/36] Add pytest tests for split_fasta.py

---
 requirements-dev.txt                          |   2 +
 .../conftest.cpython-311-pytest-9.0.2.pyc     | Bin 0 -> 1583 bytes
 ...t_split_fasta.cpython-311-pytest-9.0.2.pyc | Bin 0 -> 23708 bytes
 tests/conftest.py                             |  24 +++
 tests/test_split_fasta.py                     | 144 ++++++++++++++++++
 5 files changed, 170 insertions(+)
 create mode 100644 requirements-dev.txt
 create mode 100644 tests/__pycache__/conftest.cpython-311-pytest-9.0.2.pyc
 create mode 100644 tests/__pycache__/test_split_fasta.cpython-311-pytest-9.0.2.pyc
 create mode 100644 tests/conftest.py
 create mode 100644 tests/test_split_fasta.py

diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..c0367d2
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,2 @@
+biopython
+pytest
\ No newline at end of file
diff --git a/tests/__pycache__/conftest.cpython-311-pytest-9.0.2.pyc b/tests/__pycache__/conftest.cpython-311-pytest-9.0.2.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..187575a0d07bcbd9727705c0be55b2143b47d56e
GIT binary patch
literal 1583
zcmZ`&&1)M+6rcT&cC{-ja*CU#YRVK!QWdJL$+d(M*AzES2rY)9%d*yvqIFi<?aZkD
zhzx4+tvR@ml0qr?<fga<_ts<o1g#NR45S3w9&%GKx#ZNDU9CPsJ3IUKH*ened;8Pe
zoB{ay_3wM$pE3Y{i9@=S5jYDucnDnJN<OGc@mFptHCX~pwVIKVn`~9%H?prcbJg64
zXH*RtAY;Q!O}ff2>Y)U{5r32AX7<1#hes&`RP(O%87OC;3eK&v#MIjkZ7_`>LcE~G
z85vRwLR1@TaG>&6#CwLh<%fWd?E^r0Ug?_keK2*CN`RN5*+{Fvv%feN+65h+>kiM$
zRbr~fG!xUTpt};)$Kqfklj4JHil)D=){zc*G)LozcIA%zmDZ6rv{bHpp3bM3`^|Nv
zAHa|DY(`)IpV!<(bAmcAY40l2{tn{Fm`Eq6qs)4gKu6)-GIqW(AYZG2<HBa(wtYk_
z)FP<4?pt+-P{$&n@6jn}$G8%1Lx(^I-rlB-pf%EsZDDlB!7evMU{Hsf4MZCV!z-!v
zE6|(L3!U0s=MJjGnomXhnKqJS+9(i~iCr>d63R{?#^YF+Id#6Z1DArf@4{9<p+CtC
z5<Y#fj)NvFn^+78(`~!%`N+1Jh7k$;FA>v12cs4xEax@D08`&vXKI^zKD!|FZK0ni
z`9aO0e6(3sI6;`@KW(?D*F?)02bdL;JQ4+=%1pGy4J7Qc%5oTmfsKQJGIJywieE;J
zg%!>s;{Oo+d9i4%5rm1={DNA`*H^3-+M;zo*c@3S7Da>-s}{8C0`c~+^HU`miQzZ6
zJu#W&H%_|Xc^=F!eE0dcg$ISx;^mX#<*4{hU+njf=ljL0-IXW${QgR$UpmM|dil}E
zk$&~qkM!%k<+Vs(8!8}Ee6EB1!u@Nf=HiLDc>LZUrO3S5H*a=78fa#>vriw~>s@&F
zapCn|>BjGOIpI4hto945J$>~b<CXiBQ=@!hlpnnv8CKu0x=RC9-@9?DmQK{t_e+rq
z`zq|Ia9|d?pPccaL3xhp>ut}cUW>5&hvBw8K3%lK?Kpdf@M4M9R504a9z`rSW{FNG
zJcBC>aofSHFm855^cokrCou}N!TX=gN;Z*ZTHV{CZH(e#{tWOXE{aQ>ywE>L+IS6@
z`B4lr>2NYsBuN^GdjI#+0}JCl0I&6C_Y*MJjh|r_$i?ofdvET0Jqbn<>`QPc7o~GU
Qz{xWO$ZySXVs;$ezw@M@X#fBK

literal 0
HcmV?d00001

diff --git a/tests/__pycache__/test_split_fasta.cpython-311-pytest-9.0.2.pyc b/tests/__pycache__/test_split_fasta.cpython-311-pytest-9.0.2.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9a063375becde6ff48f52c527835c05f70f38849
GIT binary patch
literal 23708
zcmeHPYj6}-cJ7|(dG<W?00{v$0$HGuC6K@vL6B?=hQ%9WV=%VawI_pmB!ou~x|;>#
znJOjmrUa{9%PDWHO{mmt%1a=SS}K(v{+IQ?tyF%@s77T~S5cuVaT2QXLw;4c_?7S6
z+tbtCngMyeTd~V(wa%P-?m72y?xW9r{(UqW7V!Mrf4@0$woVZKgB<0@V<Mk@0OUPE
z5fm{aoD|uwXVg3B5rt9dq)#LtPsTqQI2mBLm<gT?g67SHM#Cq=j3#B|(a6aN!+n|P
zXzXMRxIY~j@+g6~q?2(a$nX&GIuj2AuUBM7j{t8_q707#Z#3~Z@Kq*W2Yj`O*8^`-
z#A8CD;Vxz4NJ2EE=TrJ={Q8ci&mMjbh!+TcLil+hJvgo^hBN_|!E(k)(2svF{y)16
z@SdQH_%WX>6ry+(?@j5dBnUU~&!3$_&ZCoz`JA!B%Zgc#;=2mJ8<sy)2~>C$TsJIE
zq8=04gkC|D&U-F+UlGoWqVS3!;*We1{v(EX{+{ogKGfZJPYfqKMld}#IIfJ04I6>6
z^!d!lSlaLnr8GU2@EX$iM0!l66dJzs>WH3Bc+~*80xHY6W_X59Ym}ze*4BHwx4o#P
zRc+hoS$*5nPaWPimcF15Wya4ZN5_?OnY6Y|Pis1dNo^uCq9<8ATPNNyWEMJitE&(m
zRWI#l0F%P)P;^R}*`MEX>#;(puNdkph58l+(bxIGQ@4ZBX?f<*><jtkVlYt(CJMpC
zq8DDwpMg(^^K$|fT21WBFTbqm$2_zMlJstKpZ5c4Z{T0Vy#XUIeojv+BkG{7gaseC
zNGqQm1aOyr-p&bs{wU&A&~I3NP8UkfbL67bxpgRoBJ*qGs-9L1pE{fwKa~*G)d)i)
z(N?t)&}hhvXnGPUn?T~2k~Osj9&zGZ4*;#o^)p-E>;B8`yuaA6rPQ#cD0h_Pj)K(j
z8T<uDc0wSq?EDvy_k>{~Cps&%GqDGRS6_2hWmI2O`I}y66-I^kX2rZ!c`4rVqeyQF
zI#o6EIb$`GtQSqeQRxo|SHX3o%qlojQ>&qj$<>?5bTUirpBjp)QZ>9Ts#NL@jgU4-
z)1Q$tNh4X|)lg2%if7>F&pV#!@G;@KfS$rLqM+0~TYFSu*C@lSyrk}h_fk;}j*m_Z
zT<lWQDJ7Z8WRfF_mK;*YN6oU^x;HaEn967aK+4?B6w+FNNukjE1b_4Ov*H!Wa;oI5
z#sCsNR{ScJzu`x-OphsMvz|z)S~?-IDr9&OF`9HrZ2+T2Do49E>(rWBsZ+OrkCdVP
zE5M|%5ROdlza48un~Wt&vBaX_@$C~iobud`*3WF38O$FlM7xU7u2QrM4J))yyyaPl
z#NU4A@-sa7ze>OV`mIg3hKrjAN}C5jD>gq-0*pLaiac3}Jc(8gAuh@x8agOw=`OmR
z$By&1UX}UFJ92FD8Lb}Kxw>OtyYItxX<s7rVV4MK1XIHkN!H(6Z54c^E7nB*3J5-W
z;#;T^dEiSQ)({+Ry?T$}tkmvGo)f8wWW_3UUo|?b(vCVzofNCXeL&1Q+)Q;izckID
z;8WvBH0t*n1S+aBJW<L0s-~+W6W2ZJMi2}!W3T7xCeSXnvC5Vt1=4p$t5&aB3n6aQ
z>&Q#|&5Kh<-+uY>%QMQQ*QQ^y$h>}26ICAqsGhke;bUfQ9cAv<(sDC+t0gNUlj|vy
z_rx}2F^sRGs%z&*^wVk=!u{QHNNLr!0#FY7`8(5_uLpLm_t$&>Vebdt;-h;@kM0Fw
z>ZrxSa$7x*U#|e~i6P|8$G*s4BU=wsw#<T)69x9prl$72edzL`nFE)eoqm?n09jVh
zBZgR)qN6{8YRMpAwIbnBI}n!PV=lXnjkv1<sk^|qjv{^q09_BOG5ME2YTq^2zUw#5
zh4x*=_QR$2!ymODpKCu}Y(G(IKY{vFjJ#Beyfpdr0@MGj4@xdZ`b&|1kXXX=JCV$y
zoG8I1@$?!&ebj9D)Pa{D>)Ut&?dd%<dPTg57LoG+^DxZo_*ObhJe}I0`6ymz`%)yu
zchm3uVh)SWFQ14GAWz-Sk|*oIcQSA@=#<Y<<(HN)gOHV+vE!>5%K5V1tdHfvpY@-y
z(wYl+glxdsJ>`PgU_{7;vLWqnm9XN`qm@|BSl@A{%#Q?Zl}??k`J5qLvM1}(h3e#^
zCSR3r3|59C#0sa+nZFubfn{+8om`51MY`xmnTK`yDw~fZ1~MSnlMOp%ovuH_DCvTj
z^Woo*{{a4jxsXmZ+<e@c$@z8aPs+Z2XLvbTiJ(m6Z0L1Xdbvn8G9zlwI?_f)0eh5a
zHZmkCG2$QcI0q+6e3>QM;t1O+QJwmA^Kqt8bxK$zR$Bt499QbTW_rjr`fBN^DS`CV
zeFJN}t1dXo#nN(Cc9upBu8@;UslVc9H9o2|D2>_Zh{#;AWz_$eGbE?Wtp8N^W&ES=
zvO(*rBlmnGV;K7wUW{oBY2W@s#|=qKpY1gS*c+`VM?+T9+92!?ePn#>Acn4CHtb0b
zjbzf?whi$iCXA1JeguZ!i$_x$2CRDP*tp)>-ZAmUu7nnT(?nYP8EI#k1O%iG^6de-
zjrnd*Xo;=q3$V$xaIPZ}&Lt$n3kCw)3G5`W>n>S}Fd{qy{R#2n5enn+a=1Xus1Pg*
z{;{$b5YwyI^rH9>NjTR!z>~K-p&8x`#^^M<OZe3TqzXjYt3GXFkCF3n0=qB9&HPdV
zdJ!h(E$Fw7jks0xV1i2BgyDZarKUzTL$;05Y1lydG$Kz<ypcS0ZX~0RjA@3Sk4%g(
zs5HD!rc`51Qae3<E~6yX^n{wEA!;g<97~O+jZjic4W)@}#P%`WqqLq@)p6BmOb(5!
zqbWU^zA%wVjism@`Jm+)N>Y6i1E35Gkp@dCHJym7WcL^iwnUmU4H(CaO{jmwvI16?
z6-qVFey<@{96Rie$Lx;B?T$X~;7QzV_t|N8?Bd$tY`r}W*LICMS{hH;9zI-eVwxKp
zXO{7)H<C(vD0MEQtEVAA#<ceDusCR7)iCo?F}!|qKVdHx!)r0llHxPopO5^?n?LwV
z`F;78|2Iz+nhq744wae?6;^+*C><_IhYQl-JJAgvMK{kyH!n0cEz~tGG&C;;gxHo(
z1S#hKoWSJ4#jxOO{z%$1CvD0<UX(gZQfEQxywmhB<^*QEGpFB)5fUrKH{-Ve7~fos
zca-8ZXHWoSlLsaQMe3NBI!aPUkwM}qQ)avtjkDMY6{gdiiE~g~ES$2_5iGp8oatMR
z)QX13UJKriZ_Kw9<C|H|x_-KYB9ph2<ShlM>sl{BLEdt$cTVmiRO%{9UGq{`iQ$}5
zX5pUPhqHj!V&s&c?yzV}2t2e}lJoJ6E>Sz>{<6qlIVC)J8v&A>#*|FyHJ$PnF2tSJ
zdaWRMK6#85gf9=tlxB)TV0I~sc`3q&H+IXKtVTAZ{%3Sb54NoBQ>V+ZrR`R^L)K*j
zD#4|8cuiUNcQU+GcG#Pf>~=pab?GZv^|0w~MxE9fEcIG;z02+u1)aw0=2OL*wAt-h
zmj#gvC^GE)KsNX~D<@{{f2v#dqad6KmP3XW8G<btC+;P-WSvg)4(4;lvPG7R4q6;x
zTj|y<YZ~)2jyVa~lJ?YCe6=N*?N-0c^sI7(Zl%YqCyZ9ArozpKxa(_hy6RvJF84U3
zp*#*z8kJSquxXXa_VI%wB$^szkJ}1#YSpyLXfzVa1@#SODU2Fj<Fs60SWs5~9Y!s`
zhg#CK%u;cdxm7AUO-PuJGe0$JNlgiqR1+-x<<(ke>09Y>>#Algap@FsdmN=^)oP5v
z9kss(*Z)f`VRn1-5p@qrfLmhWeb!@mv|e?9e4YR>!uzcUKHH7(A?tyA$6e}d4q}RW
zA4$+shfzP8x{!qZuO%naD!1{U0{uEn?Ee1i;>8}aaCemLXdo^V8@4YQ(f!<rh6%1R
zBh!m9!*z)n(dzdp9$3vg)MtU+i`h))J~Fx|-sr<(*z539M+uA*pqX%()0QR84eRI0
z>nH)T(pgmMi-esdaE!ol0w)N(MBrrtG^NEuj4(@A{Q(hvNPwBP64u}}^;L481F+kn
z+f{|ge`1ljMY>BJ1cj|(*=$gAet!cqLb)*w8+56LbCB(2^7txOw?;n>*8jW>mSXq2
z-T4EzP8EW?i^1Kc;O^UBk-iv^BL2w(U%(=*gXwv^D6K6?YYWm^w;8*39a*v=X36@<
zk_CVz8>wc=ejHvud#o7VSPE~1-7LqZv`c-{eFbUF%vpe`zL~R^`+@LxURqNs<D5ba
zz=~xL&S5#>#VKUqnX*d=_OLB3OL9uHXk5}ps4(?$CeAT)))D~+a2|sNIR(6&o%0}L
zb}5UQBe&(Y{QmM{;6S0P{7M64+jkAj$pZ!8?46eeN@d(6G9KW?vKQyEobck5c^NjT
zDZZ3q58q<5WRXCpsEuGTA-SB(Ol4OkP2hnSkHdnT0%nuHoChl^i_?WHHC?SqVVX{P
zu<4rpDwD#}P1jC<uQ(~}SNuu<Ms4V3_$o~78`k9PSD75XuMz1}V4@Cta{jC@>o-ls
zKsKO#T7Ak6(~bc)?FjBArefSNA!SV`RxuTAhHJ&7)Ggz(im7Oqus+kZCGa7RyQY_G
zr)QNTuU0Cm7y(sN;pSsTKy^-+f$xad%9Y#nRO(DqF{sol4cVY+Du#6WLYa>vBrEQ$
zYl!1CL0~F2!c3$I>;TH&tE^(n2jOh^jAd-*WGo-Z&Jv`FY3(P<YNctJ<y}j^uIY6c
zSysw1JgMlz#YYBS4Nmt2Y4z|TPCY2iSG;_JbQv0rI%S<MQ=@V&;xN2j>CeS<nkX`#
ztZR}r=Tp}FURtAbs=jvZZ*l0-s{MLvIfct^jWs1uVlB$ruUT7Mda7AlTzrUots{q(
zdi70ijWK0irFq-&TwIaR0^`|;*<1UvJ}u+Oq4kwOZ|y^Gy`H$;y|qK4qLa9@M%UnS
zx1fivc+4e+hm{T15<_qq?a4LZamH@8r#SUe$g@)?t;)t9dn!80#W9@SgyHOpZUkkc
zT`x-8icYX4<`IWZ&_}PdA*Ef}yrd0n`3|?C)=C{&>2u9K*RGRleQw7}eIZL}rO&NW
zUz~kz<U8Bv+S^dIJ~#1QZ9_^Uc5HTDd^1dQoR}m|Vt+*c_THYJu2xKH_5<l@?H*_y
zJbiBLb*=RW<nC&<;Hyk*bYv{a7)W?nKi$8b3k{8or7~Q!bu_IGrxgmS90mg6;+1Ld
z4<Eylm^Jaef~;s@MFT4uSkb_W239n%qJjHpz>o${tG#T2{P^Af!HVnM-(o~ClK%@)
zNCYZN;VP|r8>_IsJD4u7*_t+s(QK~anr>^Y+^BP#H|kjmrGa8J%^r5rCD~)FUP2>F
zNmaqG>I7)XTm2D%3k2Q(IHK+cytvP{WL{-YEB3PT&9*=)%kz%iw1yYEbg{}#yIR?X
zR`rhwn;^gz?4`l+jHc5504(}9ux+gr7+=;$oY)-vePSjt#}Ym*`9DiO-vhWS0bt9p
z*lS2EcRz#M5MR1Wn_=(LV*Xv4>PAAyLwy_Ip7<yoj|Dem$hI`Z?WRAdaI5c<M7?Wg
zty<X%!1T7<T>vjoymZ!uWg9yTXy>bIKLFne@Fsa!i~nktoV4h#P7!#GKoY>dopq;r
z$)fNg>(S~!v2PI|obOfbWZML|ZOklSY`=g_oYY2Xe=et|#&C>rctZUfghC3|LYNGm
z6c)pR+<2cItk3*1{(k({!Qc7|P0tmZo+~vyS6KagQ94?Zjuxb&?xp{RriF(#F0?+f
z(6WAEZR=v3?EwhV9smIB0cfbU2jC085P77$yuB!Reg1{u`i0<zMW46Z&)irw#&5U(
zQ^6bI6h>y?3ljKW|6%|?6)MBZV$AUg@qJFfF7^qDRj78aF9Jwbb*XAzOw=xapP;eL
zx4ol!(AB)`p~63ssEAf_-|eqZD`{c0iqnNvd|5b^z<jpAss-x`$L2(Jip>NsZN+|Q
zIX1ngAT>|znc8z{@AO_m3GjDbYA#95MF#DZDeTlBAJ909jZmTO;Y=pSUP}ZVz&RiH
z<P=v_AoBNPdF||uqP(FbZ<stl+fVdMyQg;-q?TC`U~2cQco{nqTKGFJwUo*@rw{|M
zV%dXpSWb9x3T;0zWtR}_VOw05<dkmFxTK9xVcN}^ILEALiGTw*kHLbR0$$F}d5|%?
zl*P=EMYPbm8F6~oOfMDbhS?^7sXenzbMgjY{GFFJl*%}#5C^a#^Wff7C^=%|uGuE;
z;_SdVVnI#;uN5}uw@X>f99fX#JfSm3XLeAf>n^M-zfw0U;JWTPxtqxBotL^xWt>wO
z3(z9-;r>&oVZ>*0uQNTl0K7P2K~4d)6*?E^&|<SAR<cRI>t`DQpuNU9c|9=x&P(e{
zWt>xp16YxHaBtEtv2oXIBX@Cj;2g0ar-0WAoAcYHEM|^4653s8p}bG+!S4gKN-SXI
zDloN*wA!@V@(0xFx2rPTDc9j|U1RGhtSi4#54EJ8Ik~4GneV*RQ!3+}$XI|DnGg4;
z#zuT5_d3&)3&6_?&Q1Zd6*?E^(8bK1jyn9UX>2`(7Roy{wg+jISiox6BXtZmt+xCD
zwfgO9Y`hm<x?L~`8yO=!;j{k)eDHQb$7vPUZt-fT#%rEdaqS<kcG3mg_`cq0m5>s`
zd2sn=<SGVIH)t@$p5Oh%iV~$QWzJ0<Isb@|3uOJcdFB9}J;+<}l;fk{Y*2|+7GsB9
zS10AIEiQH$`vxP($1C#A6gXX?F#U8kphy*NoMf$2>Y>eWHk1vU+ic{ljBPemPZeNO
zo{Viajl{iVn~hzfvdzY|G0zchh0C>ZOj%`avx(5=yKKbV2pDy2v$4l#ZGX!?Xxkc{
zWDntAMxL?ZbVV0j7fVh?e0U4<5BHe~TJc)Zz={S|H1Lhm0Imi+b;fpxi*;hwF|HM4
zMFT4uSkb^YTm!5V+kRaAQ&{|WY0(5Xs91+g*~*&w=b)-@5%@8I{RC*e=`M}e*y<5q
zj$#X9mx=Zh0@DO&fhiHIeHxS&ba3%mS~u_eF#M?r+_$RG^(DIPgsAgKC_=2Z`g4(D
zWQRf7YK|XAIMG8J;-LE1<U@--Dq9@-1!1%<q|u2S=OPbZ;_2WQxv*;vRXsU&ZuAtc
zb;aE)d{K~&+#=}opsrp+2(<R69fMBpcO>-8JKNsfmLK|HQz6)24EC3T{rpJiQaV;*
z*wN46gN}aM)_)eGZ1E>Ri$4HZ{Hd$9_`_!4^J{)FP?S4Ma_8iMJJI;$!G&l&u7&hH
z&cKwo5R82kY?%wT%x;|Bcjb}1rx<K21=~IfcFqMm|8d8)v$yJR>eqMQ#L3p3rQps2
zd+*>f)y>nGzHggN;S$ahK&A#}Pt3_}!1y~awUx>^rw|9QBJ<$hQ#h7LY}_?_g1b07
zaE@4zQ^0G5&H3$87Bfc{zAXvm$>kRXHXFh6eM9WDw9%5}mkn*@%ayn(XseoL*E<;@
zi$r%6Ga742Mu4vD%#55;so@%a9)RI}YGj;Wgc<<0^;r5WzX2&~y7>Lhyb05_#QX#|
zTf1e;v8Rbf2cy|qCu<KX-H*T$&RWyG;9h?H@qqeI@DTB@9S6h(rlNR<T21x8g0RN*
z-Vvgc>~BF>U2wh&LUY0SE(mP}=ey|lh@CSE?qL;i6|xhAMKO_=&;mr<*6IX7pBT?W
z3O{a8=M_6=;e{VJh(yKi{P9HrKW@;vN_^rPr10YgTOSp-ez0m$z>gdJP+TiMa!X$n
K@KX~ot^Xh0r`|dM

literal 0
HcmV?d00001

diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..766dbc3
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,24 @@
+import importlib.util
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture(scope="session")
+def split_fasta_module():
+    """
+    Load modules/ensembl/fasta/splitfasta/split_fasta.py as a Python module
+    regardless of whether 'modules/' is a Python package.
+    """
+    repo_root = Path(__file__).resolve().parents[1]
+    module_path = (
+        repo_root / "modules" / "ensembl" / "fasta" / "splitfasta" / "split_fasta.py"
+    )
+
+    spec = importlib.util.spec_from_file_location("split_fasta", module_path)
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"Could not load module spec from {module_path}")
+
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return mod
diff --git a/tests/test_split_fasta.py b/tests/test_split_fasta.py
new file mode 100644
index 0000000..8a48af2
--- /dev/null
+++ b/tests/test_split_fasta.py
@@ -0,0 +1,144 @@
+# tests/test_split_fasta.py
+from pathlib import Path
+
+import pytest
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+
+def write_fasta(path: Path, records):
+    with open(path, "w", encoding="utf-8", newline="\n") as fh:
+        SeqIO.write(records, fh, "fasta")
+
+
+def list_output_fastas(out_dir: Path):
+    return sorted(out_dir.rglob("*.fa"))
+
+
+def read_all_ids_from_fastas(out_dir: Path):
+    ids = []
+    for fa in list_output_fastas(out_dir):
+        with open(fa, "r", encoding="utf-8") as fh:
+            ids.extend([r.id for r in SeqIO.parse(fh, "fasta")])
+    return ids
+
+
+def parse_agp_lines(agp_path: Path):
+    lines = [l.rstrip("\n") for l in agp_path.read_text(encoding="utf-8").splitlines()]
+    lines = [l for l in lines if l and not l.startswith("#")]
+    return [l.split("\t") for l in lines]
+
+
+def test_no_agp_by_default(tmp_path: Path, split_fasta_module):
+    inp = tmp_path / "in.fa"
+    out = tmp_path / "out"
+    write_fasta(inp, [SeqRecord(Seq("ACGT"), id="seq1", description="")])
+
+    params = split_fasta_module.Params(
+        fasta_file=inp,
+        out_dir=out,
+        write_agp=False,
+    )
+    split_fasta_module.split_fasta(params)
+
+    assert not (out / "in.agp").exists()
+    assert len(list_output_fastas(out)) >= 1
+
+
+def test_split_by_max_seqs_per_file(tmp_path: Path, split_fasta_module):
+    inp = tmp_path / "in.fa"
+    out = tmp_path / "out"
+    recs = [
+        SeqRecord(Seq("A" * 10), id="s1", description=""),
+        SeqRecord(Seq("C" * 10), id="s2", description=""),
+        SeqRecord(Seq("G" * 10), id="s3", description=""),
+    ]
+    write_fasta(inp, recs)
+
+    params = split_fasta_module.Params(
+        fasta_file=inp,
+        out_dir=out,
+        max_seqs_per_file=2,
+        write_agp=False,
+    )
+    split_fasta_module.split_fasta(params)
+
+    fas = list_output_fastas(out)
+    assert len(fas) == 2
+    assert read_all_ids_from_fastas(out) == ["s1", "s2", "s3"]
+
+
+def test_chunk_merge_final_small_chunk_and_agp(tmp_path: Path, split_fasta_module):
+    """
+    seq_len=2100, max=1000 -> chunks [1000, 1000, 100]
+    min_chunk_length=200 -> final chunk merged -> [1000, 1100]
+    """
+    inp = tmp_path / "in.fa"
+    out = tmp_path / "out"
+    write_fasta(inp, [SeqRecord(Seq("A" * 2100), id="chr1", description="chr1")])
+
+    params = split_fasta_module.Params(
+        fasta_file=inp,
+        out_dir=out,
+        write_agp=True,
+        force_max_seq_length=True,
+        max_seq_length_per_file=1000,
+        min_chunk_length=200,
+        max_seqs_per_file=100000,  # avoid seq-count splitting interfering
+    )
+    split_fasta_module.split_fasta(params)
+
+    # 2 chunks expected after merge
+    assert read_all_ids_from_fastas(out) == [
+        "chr1_chunk_start_0",
+        "chr1_chunk_start_1000",
+    ]
+
+    agp = out / "in.agp"
+    assert agp.exists()
+
+    cols = parse_agp_lines(agp)
+    assert len(cols) == 2
+
+    # object, obj_beg, obj_end, part_no, type, comp_id, comp_beg, comp_end, orient
+    assert cols[0][0] == "chr1"
+    assert cols[0][1:4] == ["1", "1000", "1"]
+    assert cols[0][4] == "W"
+    assert cols[0][5] == "chr1_chunk_start_0"
+    assert cols[0][6:9] == ["1", "1000", "+"]
+
+    assert cols[1][0] == "chr1"
+    assert cols[1][1:4] == ["1001", "2100", "2"]
+    assert cols[1][4] == "W"
+    assert cols[1][5] == "chr1_chunk_start_1000"
+    assert cols[1][6:9] == ["1", "1100", "+"]
+
+
+def test_agp_part_numbers_restart_per_object(tmp_path: Path, split_fasta_module):
+    inp = tmp_path / "in.fa"
+    out = tmp_path / "out"
+    recs = [
+        SeqRecord(Seq("A" * 1200), id="obj1", description=""),
+        SeqRecord(Seq("C" * 1200), id="obj2", description=""),
+    ]
+    write_fasta(inp, recs)
+
+    params = split_fasta_module.Params(
+        fasta_file=inp,
+        out_dir=out,
+        write_agp=True,
+        force_max_seq_length=True,
+        max_seq_length_per_file=1000,
+        min_chunk_length=100,  # => 2 chunks each, no merge
+    )
+    split_fasta_module.split_fasta(params)
+
+    cols = parse_agp_lines(out / "in.agp")
+
+    by_obj = {}
+    for c in cols:
+        by_obj.setdefault(c[0], []).append(int(c[3]))
+
+    assert by_obj["obj1"] == [1, 2]
+    assert by_obj["obj2"] == [1, 2]

From 1dbf7ebbe7a14451b6747443851c22c8003771fe Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 30 Jan 2026 14:34:48 +0000
Subject: [PATCH 06/36] Add Nextflow module and tests for running
 split_fasta.py

---
 .gitignore                                    |   2 +
 .../ensembl/fasta/splitfasta/environment.yml  |   8 +
 modules/ensembl/fasta/splitfasta/main.nf      | 106 ++++++
 .../fasta/splitfasta/tests/data/agp/test.agp  |   4 +
 .../fasta/splitfasta/tests/data/real/in.fa    |   6 +
 .../tests/data/splits/default/0/test.1.fa     |   4 +
 .../tests/data/splits/default/0/test.2.fa     |   2 +
 .../tests/data/splits/multi_dir/0/0/test.1.fa |   2 +
 .../tests/data/splits/multi_dir/0/1/test.2.fa |   2 +
 .../tests/data/splits/unique/0/test.0.1.fa    |   2 +
 .../tests/data/splits/unique/0/test.0.2.fa    |   2 +
 .../fasta/splitfasta/tests/main.nf.test       | 301 ++++++++++++++++++
 .../fasta/splitfasta/tests/main.nf.test.snap  | 168 ++++++++++
 13 files changed, 609 insertions(+)
 create mode 100644 modules/ensembl/fasta/splitfasta/environment.yml
 create mode 100644 modules/ensembl/fasta/splitfasta/main.nf
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/real/in.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/main.nf.test
 create mode 100644 modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap

diff --git a/.gitignore b/.gitignore
index e75900d..e03c5c1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
 .nextflow*
 .nf-test*
+__pycache__/
+.python-version
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/environment.yml b/modules/ensembl/fasta/splitfasta/environment.yml
new file mode 100644
index 0000000..759f3da
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/environment.yml
@@ -0,0 +1,8 @@
+---
+name: "fasta_splitfasta"
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - python=3.11.7
+  - biopython=1.86
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/main.nf b/modules/ensembl/fasta/splitfasta/main.nf
new file mode 100644
index 0000000..0a8b761
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/main.nf
@@ -0,0 +1,106 @@
+// See the NOTICE file distributed with this work for additional information
+// regarding copyright ownership.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+process FASTA_SPLITFASTA {
+
+    tag "${meta.id}"
+    label 'process_low'
+
+    publishDir "${params.outdir ?: '.'}", mode: 'copy'
+
+    input:
+        tuple val(meta), path(fasta)
+
+    output:
+        tuple val(meta), path("**/*.fa"), emit: fasta
+        tuple val(meta), path("*.agp"), emit: agp, optional: true
+
+    script:
+        def args = []
+
+        if (params.max_seqs_per_file) {
+            args << "--max-seqs-per-file ${params.max_seqs_per_file}"
+        }
+
+        if (params.max_seq_length_per_file) {
+            args << "--max-seq-length-per-file ${params.max_seq_length_per_file}"
+        }
+
+        if (params.min_chunk_length) {
+            args << "--min-chunk-length ${params.min_chunk_length}"
+        }
+
+        if (params.max_files_per_directory) {
+            args << "--max-files-per-directory ${params.max_files_per_directory}"
+        }
+
+        if (params.max_dirs_per_directory) {
+            args << "--max-dirs-per-directory ${params.max_dirs_per_directory}"
+        }
+
+        if (params.force_max_seq_length) {
+            args << "--force-max-seq-length"
+        }
+
+        if (params.write_agp) {
+            args << "--write-agp"
+        }
+
+        if (params.unique_file_names) {
+            args << "--unique-file-names"
+        }
+
+        if (params.delete_existing_files) {
+            args << "--delete-existing-files"
+        }
+
+        """
+        python \\
+            ${moduleDir}/split_fasta.py \\
+            --fasta-file \$PWD/${fasta} \\
+            --out-dir \$PWD \\
+            ${args.join(' ')}
+        """
+
+    stub:
+        """
+        set -euo pipefail
+
+        FIXTURE_DIR="${moduleDir}/tests/data"
+
+        LAYOUT="default"
+        if [[ "${params.unique_file_names ?: false}" == "true" ]]; then
+            LAYOUT="unique"
+        elif [[ -n "${params.max_dirs_per_directory ?: ''}" || -n "${params.max_files_per_directory ?: ''}" ]]; then
+        LAYOUT="multi_dir"
+        fi
+
+        mkdir -p splits
+        cp -R "\$FIXTURE_DIR/splits/\$LAYOUT/." "splits/"
+
+        find splits -type f -name 'test*.fa' | while read -r f; do
+            bn=\$(basename "\$f")
+            dir=\$(dirname "\$f")
+            new_bn="\${bn/test/${meta.id}}"
+            mv "\$f" "\${dir}/\${new_bn}"
+        done
+
+        if [[ "${params.write_agp ?: false}" == "true" ]]; then
+            cp "\$FIXTURE_DIR/agp/test.agp" "${meta.id}.agp"
+        fi
+        """
+
+        
+}
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp b/modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp
new file mode 100644
index 0000000..46fc419
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp
@@ -0,0 +1,4 @@
+# AGP-version 2.0
+seq1	1	10	1	W	seq1	1	10	+
+seq2	1	10	1	W	seq2	1	10	+
+seq3	1	11	1	W	seq3	1	11	+
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/real/in.fa b/modules/ensembl/fasta/splitfasta/tests/data/real/in.fa
new file mode 100644
index 0000000..3d3f65c
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/real/in.fa
@@ -0,0 +1,6 @@
+>seq1
+AAAAAAAAAA
+>seq2
+CCCCCCCCCC
+>seq3
+GGGGGGGGGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa
new file mode 100644
index 0000000..7abe938
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa
@@ -0,0 +1,4 @@
+>seq1
+AAAAAAAAAA
+>seq2
+CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa
new file mode 100644
index 0000000..6287efa
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa
@@ -0,0 +1,2 @@
+>seq3
+GGGGGGGGGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa
new file mode 100644
index 0000000..9512f36
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa
@@ -0,0 +1,2 @@
+>seq1
+AAAAAAAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa
new file mode 100644
index 0000000..2f3b40f
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa
@@ -0,0 +1,2 @@
+>seq2
+CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa
new file mode 100644
index 0000000..9512f36
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa
@@ -0,0 +1,2 @@
+>seq1
+AAAAAAAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa
new file mode 100644
index 0000000..2f3b40f
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa
@@ -0,0 +1,2 @@
+>seq2
+CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/main.nf.test b/modules/ensembl/fasta/splitfasta/tests/main.nf.test
new file mode 100644
index 0000000..3db1283
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/main.nf.test
@@ -0,0 +1,301 @@
+// nf-core modules test fasta/splitfasta
+nextflow_process {
+
+    name "Test Process FASTA_SPLITFASTA"
+    script "../main.nf"
+    process "FASTA_SPLITFASTA"
+
+    tag "modules"
+    tag "modules_ensembl"
+    tag "fasta"
+    tag "fasta/splitfasta"
+
+
+    def real_fa = new File("modules/ensembl/fasta/splitfasta/tests/data/real/in.fa").canonicalFile
+
+    test("Stub outputs: default layout, no AGP") {
+
+        when {
+            options "-stub"
+
+            // Ensure params are set explicitly for this test
+            params.write_agp = false
+            params.unique_file_names = false
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('dummy.fa')]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+
+            // fasta: tuple(meta, fa_paths)
+            assert process.out.fasta != null
+            assert process.out.fasta.size() == 1
+
+            def fasta_out = process.out.fasta[0]
+            def meta = fasta_out[0]
+            def fas  = fasta_out[1]
+
+            assert meta.id == "test"
+            assert fas != null
+            assert fas.size() == 2
+
+            // agp: tuple(meta, agp_paths) optional -> should be absent
+            assert process.out.agp != null
+            assert process.out.agp.size() == 0
+
+            // Ensure FASTA parsing works (downstream contract)
+            def merged = fas
+                .collect { path(it).fasta }
+                .inject([:]) { acc, m -> acc + m }
+
+            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
+
+            assertAll(
+                { assert process.success }
+            )
+        }
+    }
+
+    test("Stub outputs: AGP optional output appears when enabled") {
+
+        when {
+            options "-stub"
+
+            params.write_agp = true
+            params.unique_file_names = false
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('dummy.fa')]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+
+            assert process.out.fasta.size() == 1
+            def fasta_out = process.out.fasta[0]
+            def fas = fasta_out[1]
+            assert fas.size() == 2
+
+            assert process.out.agp.size() == 1
+            def agp_out = process.out.agp[0]
+            def agp_meta = agp_out[0]
+            def agp = agp_out[1]
+            def agp_paths = agp instanceof List ? agp : [agp]
+            def agp_file = path(agp_paths[0]).toFile()
+
+            assert agp_meta.id == "test"
+            assert agp_paths.size() == 1
+            assert agp_file.name == "test.agp"
+
+            def agp_text = agp_file.text
+            assert agp_text.startsWith("# AGP-version 2.0")
+            assert agp_text.contains("seq1\t1\t10\t1\tW\tseq1\t1\t10\t+")
+            assert agp_text.contains("seq2\t1\t10\t1\tW\tseq2\t1\t10\t+")
+            assert agp_text.contains("seq3\t1\t11\t1\tW\tseq3\t1\t11\t+")
+
+            assertAll(
+                { assert process.success }
+            )
+        }
+    }
+
+    test("Stub outputs: unique_file_names contract") {
+
+        when {
+            options "-stub"
+
+            params.write_agp = false
+            params.unique_file_names = true
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('dummy.fa')]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+
+            def fasta_out = process.out.fasta[0]
+            def fas = fasta_out[1]
+
+            assert fas.size() == 2
+            assert process.out.agp.size() == 0
+
+            // Contract check: names match the unique fixture pattern
+            assert fas.collect { path(it).toFile().name }.sort() == ["test.0.1.fa", "test.0.2.fa"]
+
+            assertAll(
+                { assert process.success }
+            )
+        }
+    }
+
+    test("Stub outputs: nested directory layout contract") {
+
+        when {
+            options "-stub"
+
+            params.write_agp = false
+            params.unique_file_names = false
+
+            // Trigger stub's nested fixture selection
+            params.max_files_per_directory = 100
+            params.max_dirs_per_directory  = 100
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('dummy.fa')]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+
+            def fastas = process.out.fasta[0][1]
+            assert fastas.size() == 2
+            assert process.out.agp.size() == 0
+
+            def rels = fastas.collect { path(it).toString() }
+            assert rels.any { it.contains("splits/0/0/") }
+            assert rels.any { it.contains("splits/0/1/") }
+
+            assertAll(
+                { assert process.success }
+            )
+        }
+    }
+
+    test("Real run: default behaviour produces FASTAs and no AGP") {
+
+        when {
+            params.write_agp = false
+            params.unique_file_names = false
+            params.max_seqs_per_file = null
+            params.max_seq_length_per_file = null
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+            params.force_max_seq_length = false
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            assert process.out.fasta != null
+            assert process.out.fasta.size() == 1
+
+            def out = process.out.fasta[0]
+            def meta = out[0]
+            def fas  = out[1]
+
+            assert meta.id == "test"
+            def fas_list = (fas instanceof List) ? fas : [fas]
+            assert fas_list.size() >= 1
+
+            assert process.out.agp != null
+            assert process.out.agp.size() == 0
+
+            def merged = fas_list
+                .collect { path(it).fasta }
+                .inject([:]) { acc, m -> acc + m }
+
+            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
+        }
+    }
+
+    test("Real run: write_agp=true emits exactly one AGP file") {
+
+        when {
+            params.write_agp = true
+            params.unique_file_names = false
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+            params.max_seqs_per_file = null
+            params.max_seq_length_per_file = null
+            params.force_max_seq_length = false
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            assert process.out.agp != null
+            assert process.out.agp.size() == 1
+
+            def agp_out = process.out.agp[0]
+            def agp_meta = agp_out[0]
+            def agp_val  = agp_out[1]
+
+            assert agp_meta.id == "test"
+
+            def agp_list = (agp_val instanceof List) ? agp_val : [agp_val]
+            assert agp_list.size() == 1
+
+            def agp_path = path(agp_list[0])
+            assert agp_path.fileName.toString().endsWith(".agp")
+
+            def agp_text = agp_path.toFile().text
+            assert agp_text.startsWith("# AGP-version 2.0")
+            assert agp_text.contains("seq1\t1\t10\t1\tW\tseq1\t1\t10\t+")
+            assert agp_text.contains("seq2\t1\t10\t1\tW\tseq2\t1\t10\t+")
+            assert agp_text.contains("seq3\t1\t11\t1\tW\tseq3\t1\t11\t+")
+        }
+    }
+
+    test("Real run: max_seqs_per_file=2 splits into 2 FASTA outputs") {
+
+        when {
+            params.write_agp = false
+            params.max_seqs_per_file = 2
+            params.unique_file_names = false
+            params.max_files_per_directory = null
+            params.max_dirs_per_directory  = null
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            def fas = process.out.fasta[0][1]
+            assert fas.size() == 2
+
+            def merged = fas
+                .collect { path(it).fasta }
+                .inject([:]) { acc, m -> acc + m }
+
+            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
+        }
+    }
+}
diff --git a/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap b/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
new file mode 100644
index 0000000..3390583
--- /dev/null
+++ b/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
@@ -0,0 +1,168 @@
+{
+    "Stub outputs: AGP optional output appears when enabled": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
+                    ]
+                ],
+                "agp": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
+                    ]
+                ],
+                "fasta": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-01-30T10:38:07.606463"
+    },
+    "Stub outputs: nested directory layout contract": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "agp": [
+                    
+                ],
+                "fasta": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-01-30T10:38:11.815126"
+    },
+    "Stub outputs: default layout, no AGP": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "agp": [
+                    
+                ],
+                "fasta": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-01-30T10:38:05.482323"
+    },
+    "Stub outputs: unique_file_names contract": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "agp": [
+                    
+                ],
+                "fasta": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-01-30T10:38:09.698407"
+    }
+}
\ No newline at end of file

From 2e62385135913b35874892a3ddedb17b0162ee13 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 30 Jan 2026 14:43:29 +0000
Subject: [PATCH 07/36] Remove accidentally commited Python bytecode files

---
 .gitignore                                      |   1 +
 .../conftest.cpython-311-pytest-9.0.2.pyc       | Bin 1583 -> 0 bytes
 ...est_split_fasta.cpython-311-pytest-9.0.2.pyc | Bin 23708 -> 0 bytes
 3 files changed, 1 insertion(+)
 delete mode 100644 tests/__pycache__/conftest.cpython-311-pytest-9.0.2.pyc
 delete mode 100644 tests/__pycache__/test_split_fasta.cpython-311-pytest-9.0.2.pyc

diff --git a/.gitignore b/.gitignore
index e03c5c1..961b31c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 .nextflow*
 .nf-test*
 __pycache__/
+*.pyc
 .python-version
\ No newline at end of file
diff --git a/tests/__pycache__/conftest.cpython-311-pytest-9.0.2.pyc b/tests/__pycache__/conftest.cpython-311-pytest-9.0.2.pyc
deleted file mode 100644
index 187575a0d07bcbd9727705c0be55b2143b47d56e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1583
zcmZ`&&1)M+6rcT&cC{-ja*CU#YRVK!QWdJL$+d(M*AzES2rY)9%d*yvqIFi<?aZkD
zhzx4+tvR@ml0qr?<fga<_ts<o1g#NR45S3w9&%GKx#ZNDU9CPsJ3IUKH*ened;8Pe
zoB{ay_3wM$pE3Y{i9@=S5jYDucnDnJN<OGc@mFptHCX~pwVIKVn`~9%H?prcbJg64
zXH*RtAY;Q!O}ff2>Y)U{5r32AX7<1#hes&`RP(O%87OC;3eK&v#MIjkZ7_`>LcE~G
z85vRwLR1@TaG>&6#CwLh<%fWd?E^r0Ug?_keK2*CN`RN5*+{Fvv%feN+65h+>kiM$
zRbr~fG!xUTpt};)$Kqfklj4JHil)D=){zc*G)LozcIA%zmDZ6rv{bHpp3bM3`^|Nv
zAHa|DY(`)IpV!<(bAmcAY40l2{tn{Fm`Eq6qs)4gKu6)-GIqW(AYZG2<HBa(wtYk_
z)FP<4?pt+-P{$&n@6jn}$G8%1Lx(^I-rlB-pf%EsZDDlB!7evMU{Hsf4MZCV!z-!v
zE6|(L3!U0s=MJjGnomXhnKqJS+9(i~iCr>d63R{?#^YF+Id#6Z1DArf@4{9<p+CtC
z5<Y#fj)NvFn^+78(`~!%`N+1Jh7k$;FA>v12cs4xEax@D08`&vXKI^zKD!|FZK0ni
z`9aO0e6(3sI6;`@KW(?D*F?)02bdL;JQ4+=%1pGy4J7Qc%5oTmfsKQJGIJywieE;J
zg%!>s;{Oo+d9i4%5rm1={DNA`*H^3-+M;zo*c@3S7Da>-s}{8C0`c~+^HU`miQzZ6
zJu#W&H%_|Xc^=F!eE0dcg$ISx;^mX#<*4{hU+njf=ljL0-IXW${QgR$UpmM|dil}E
zk$&~qkM!%k<+Vs(8!8}Ee6EB1!u@Nf=HiLDc>LZUrO3S5H*a=78fa#>vriw~>s@&F
zapCn|>BjGOIpI4hto945J$>~b<CXiBQ=@!hlpnnv8CKu0x=RC9-@9?DmQK{t_e+rq
z`zq|Ia9|d?pPccaL3xhp>ut}cUW>5&hvBw8K3%lK?Kpdf@M4M9R504a9z`rSW{FNG
zJcBC>aofSHFm855^cokrCou}N!TX=gN;Z*ZTHV{CZH(e#{tWOXE{aQ>ywE>L+IS6@
z`B4lr>2NYsBuN^GdjI#+0}JCl0I&6C_Y*MJjh|r_$i?ofdvET0Jqbn<>`QPc7o~GU
Qz{xWO$ZySXVs;$ezw@M@X#fBK

diff --git a/tests/__pycache__/test_split_fasta.cpython-311-pytest-9.0.2.pyc b/tests/__pycache__/test_split_fasta.cpython-311-pytest-9.0.2.pyc
deleted file mode 100644
index 9a063375becde6ff48f52c527835c05f70f38849..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 23708
zcmeHPYj6}-cJ7|(dG<W?00{v$0$HGuC6K@vL6B?=hQ%9WV=%VawI_pmB!ou~x|;>#
znJOjmrUa{9%PDWHO{mmt%1a=SS}K(v{+IQ?tyF%@s77T~S5cuVaT2QXLw;4c_?7S6
z+tbtCngMyeTd~V(wa%P-?m72y?xW9r{(UqW7V!Mrf4@0$woVZKgB<0@V<Mk@0OUPE
z5fm{aoD|uwXVg3B5rt9dq)#LtPsTqQI2mBLm<gT?g67SHM#Cq=j3#B|(a6aN!+n|P
zXzXMRxIY~j@+g6~q?2(a$nX&GIuj2AuUBM7j{t8_q707#Z#3~Z@Kq*W2Yj`O*8^`-
z#A8CD;Vxz4NJ2EE=TrJ={Q8ci&mMjbh!+TcLil+hJvgo^hBN_|!E(k)(2svF{y)16
z@SdQH_%WX>6ry+(?@j5dBnUU~&!3$_&ZCoz`JA!B%Zgc#;=2mJ8<sy)2~>C$TsJIE
zq8=04gkC|D&U-F+UlGoWqVS3!;*We1{v(EX{+{ogKGfZJPYfqKMld}#IIfJ04I6>6
z^!d!lSlaLnr8GU2@EX$iM0!l66dJzs>WH3Bc+~*80xHY6W_X59Ym}ze*4BHwx4o#P
zRc+hoS$*5nPaWPimcF15Wya4ZN5_?OnY6Y|Pis1dNo^uCq9<8ATPNNyWEMJitE&(m
zRWI#l0F%P)P;^R}*`MEX>#;(puNdkph58l+(bxIGQ@4ZBX?f<*><jtkVlYt(CJMpC
zq8DDwpMg(^^K$|fT21WBFTbqm$2_zMlJstKpZ5c4Z{T0Vy#XUIeojv+BkG{7gaseC
zNGqQm1aOyr-p&bs{wU&A&~I3NP8UkfbL67bxpgRoBJ*qGs-9L1pE{fwKa~*G)d)i)
z(N?t)&}hhvXnGPUn?T~2k~Osj9&zGZ4*;#o^)p-E>;B8`yuaA6rPQ#cD0h_Pj)K(j
z8T<uDc0wSq?EDvy_k>{~Cps&%GqDGRS6_2hWmI2O`I}y66-I^kX2rZ!c`4rVqeyQF
zI#o6EIb$`GtQSqeQRxo|SHX3o%qlojQ>&qj$<>?5bTUirpBjp)QZ>9Ts#NL@jgU4-
z)1Q$tNh4X|)lg2%if7>F&pV#!@G;@KfS$rLqM+0~TYFSu*C@lSyrk}h_fk;}j*m_Z
zT<lWQDJ7Z8WRfF_mK;*YN6oU^x;HaEn967aK+4?B6w+FNNukjE1b_4Ov*H!Wa;oI5
z#sCsNR{ScJzu`x-OphsMvz|z)S~?-IDr9&OF`9HrZ2+T2Do49E>(rWBsZ+OrkCdVP
zE5M|%5ROdlza48un~Wt&vBaX_@$C~iobud`*3WF38O$FlM7xU7u2QrM4J))yyyaPl
z#NU4A@-sa7ze>OV`mIg3hKrjAN}C5jD>gq-0*pLaiac3}Jc(8gAuh@x8agOw=`OmR
z$By&1UX}UFJ92FD8Lb}Kxw>OtyYItxX<s7rVV4MK1XIHkN!H(6Z54c^E7nB*3J5-W
z;#;T^dEiSQ)({+Ry?T$}tkmvGo)f8wWW_3UUo|?b(vCVzofNCXeL&1Q+)Q;izckID
z;8WvBH0t*n1S+aBJW<L0s-~+W6W2ZJMi2}!W3T7xCeSXnvC5Vt1=4p$t5&aB3n6aQ
z>&Q#|&5Kh<-+uY>%QMQQ*QQ^y$h>}26ICAqsGhke;bUfQ9cAv<(sDC+t0gNUlj|vy
z_rx}2F^sRGs%z&*^wVk=!u{QHNNLr!0#FY7`8(5_uLpLm_t$&>Vebdt;-h;@kM0Fw
z>ZrxSa$7x*U#|e~i6P|8$G*s4BU=wsw#<T)69x9prl$72edzL`nFE)eoqm?n09jVh
zBZgR)qN6{8YRMpAwIbnBI}n!PV=lXnjkv1<sk^|qjv{^q09_BOG5ME2YTq^2zUw#5
zh4x*=_QR$2!ymODpKCu}Y(G(IKY{vFjJ#Beyfpdr0@MGj4@xdZ`b&|1kXXX=JCV$y
zoG8I1@$?!&ebj9D)Pa{D>)Ut&?dd%<dPTg57LoG+^DxZo_*ObhJe}I0`6ymz`%)yu
zchm3uVh)SWFQ14GAWz-Sk|*oIcQSA@=#<Y<<(HN)gOHV+vE!>5%K5V1tdHfvpY@-y
z(wYl+glxdsJ>`PgU_{7;vLWqnm9XN`qm@|BSl@A{%#Q?Zl}??k`J5qLvM1}(h3e#^
zCSR3r3|59C#0sa+nZFubfn{+8om`51MY`xmnTK`yDw~fZ1~MSnlMOp%ovuH_DCvTj
z^Woo*{{a4jxsXmZ+<e@c$@z8aPs+Z2XLvbTiJ(m6Z0L1Xdbvn8G9zlwI?_f)0eh5a
zHZmkCG2$QcI0q+6e3>QM;t1O+QJwmA^Kqt8bxK$zR$Bt499QbTW_rjr`fBN^DS`CV
zeFJN}t1dXo#nN(Cc9upBu8@;UslVc9H9o2|D2>_Zh{#;AWz_$eGbE?Wtp8N^W&ES=
zvO(*rBlmnGV;K7wUW{oBY2W@s#|=qKpY1gS*c+`VM?+T9+92!?ePn#>Acn4CHtb0b
zjbzf?whi$iCXA1JeguZ!i$_x$2CRDP*tp)>-ZAmUu7nnT(?nYP8EI#k1O%iG^6de-
zjrnd*Xo;=q3$V$xaIPZ}&Lt$n3kCw)3G5`W>n>S}Fd{qy{R#2n5enn+a=1Xus1Pg*
z{;{$b5YwyI^rH9>NjTR!z>~K-p&8x`#^^M<OZe3TqzXjYt3GXFkCF3n0=qB9&HPdV
zdJ!h(E$Fw7jks0xV1i2BgyDZarKUzTL$;05Y1lydG$Kz<ypcS0ZX~0RjA@3Sk4%g(
zs5HD!rc`51Qae3<E~6yX^n{wEA!;g<97~O+jZjic4W)@}#P%`WqqLq@)p6BmOb(5!
zqbWU^zA%wVjism@`Jm+)N>Y6i1E35Gkp@dCHJym7WcL^iwnUmU4H(CaO{jmwvI16?
z6-qVFey<@{96Rie$Lx;B?T$X~;7QzV_t|N8?Bd$tY`r}W*LICMS{hH;9zI-eVwxKp
zXO{7)H<C(vD0MEQtEVAA#<ceDusCR7)iCo?F}!|qKVdHx!)r0llHxPopO5^?n?LwV
z`F;78|2Iz+nhq744wae?6;^+*C><_IhYQl-JJAgvMK{kyH!n0cEz~tGG&C;;gxHo(
z1S#hKoWSJ4#jxOO{z%$1CvD0<UX(gZQfEQxywmhB<^*QEGpFB)5fUrKH{-Ve7~fos
zca-8ZXHWoSlLsaQMe3NBI!aPUkwM}qQ)avtjkDMY6{gdiiE~g~ES$2_5iGp8oatMR
z)QX13UJKriZ_Kw9<C|H|x_-KYB9ph2<ShlM>sl{BLEdt$cTVmiRO%{9UGq{`iQ$}5
zX5pUPhqHj!V&s&c?yzV}2t2e}lJoJ6E>Sz>{<6qlIVC)J8v&A>#*|FyHJ$PnF2tSJ
zdaWRMK6#85gf9=tlxB)TV0I~sc`3q&H+IXKtVTAZ{%3Sb54NoBQ>V+ZrR`R^L)K*j
zD#4|8cuiUNcQU+GcG#Pf>~=pab?GZv^|0w~MxE9fEcIG;z02+u1)aw0=2OL*wAt-h
zmj#gvC^GE)KsNX~D<@{{f2v#dqad6KmP3XW8G<btC+;P-WSvg)4(4;lvPG7R4q6;x
zTj|y<YZ~)2jyVa~lJ?YCe6=N*?N-0c^sI7(Zl%YqCyZ9ArozpKxa(_hy6RvJF84U3
zp*#*z8kJSquxXXa_VI%wB$^szkJ}1#YSpyLXfzVa1@#SODU2Fj<Fs60SWs5~9Y!s`
zhg#CK%u;cdxm7AUO-PuJGe0$JNlgiqR1+-x<<(ke>09Y>>#Algap@FsdmN=^)oP5v
z9kss(*Z)f`VRn1-5p@qrfLmhWeb!@mv|e?9e4YR>!uzcUKHH7(A?tyA$6e}d4q}RW
zA4$+shfzP8x{!qZuO%naD!1{U0{uEn?Ee1i;>8}aaCemLXdo^V8@4YQ(f!<rh6%1R
zBh!m9!*z)n(dzdp9$3vg)MtU+i`h))J~Fx|-sr<(*z539M+uA*pqX%()0QR84eRI0
z>nH)T(pgmMi-esdaE!ol0w)N(MBrrtG^NEuj4(@A{Q(hvNPwBP64u}}^;L481F+kn
z+f{|ge`1ljMY>BJ1cj|(*=$gAet!cqLb)*w8+56LbCB(2^7txOw?;n>*8jW>mSXq2
z-T4EzP8EW?i^1Kc;O^UBk-iv^BL2w(U%(=*gXwv^D6K6?YYWm^w;8*39a*v=X36@<
zk_CVz8>wc=ejHvud#o7VSPE~1-7LqZv`c-{eFbUF%vpe`zL~R^`+@LxURqNs<D5ba
zz=~xL&S5#>#VKUqnX*d=_OLB3OL9uHXk5}ps4(?$CeAT)))D~+a2|sNIR(6&o%0}L
zb}5UQBe&(Y{QmM{;6S0P{7M64+jkAj$pZ!8?46eeN@d(6G9KW?vKQyEobck5c^NjT
zDZZ3q58q<5WRXCpsEuGTA-SB(Ol4OkP2hnSkHdnT0%nuHoChl^i_?WHHC?SqVVX{P
zu<4rpDwD#}P1jC<uQ(~}SNuu<Ms4V3_$o~78`k9PSD75XuMz1}V4@Cta{jC@>o-ls
zKsKO#T7Ak6(~bc)?FjBArefSNA!SV`RxuTAhHJ&7)Ggz(im7Oqus+kZCGa7RyQY_G
zr)QNTuU0Cm7y(sN;pSsTKy^-+f$xad%9Y#nRO(DqF{sol4cVY+Du#6WLYa>vBrEQ$
zYl!1CL0~F2!c3$I>;TH&tE^(n2jOh^jAd-*WGo-Z&Jv`FY3(P<YNctJ<y}j^uIY6c
zSysw1JgMlz#YYBS4Nmt2Y4z|TPCY2iSG;_JbQv0rI%S<MQ=@V&;xN2j>CeS<nkX`#
ztZR}r=Tp}FURtAbs=jvZZ*l0-s{MLvIfct^jWs1uVlB$ruUT7Mda7AlTzrUots{q(
zdi70ijWK0irFq-&TwIaR0^`|;*<1UvJ}u+Oq4kwOZ|y^Gy`H$;y|qK4qLa9@M%UnS
zx1fivc+4e+hm{T15<_qq?a4LZamH@8r#SUe$g@)?t;)t9dn!80#W9@SgyHOpZUkkc
zT`x-8icYX4<`IWZ&_}PdA*Ef}yrd0n`3|?C)=C{&>2u9K*RGRleQw7}eIZL}rO&NW
zUz~kz<U8Bv+S^dIJ~#1QZ9_^Uc5HTDd^1dQoR}m|Vt+*c_THYJu2xKH_5<l@?H*_y
zJbiBLb*=RW<nC&<;Hyk*bYv{a7)W?nKi$8b3k{8or7~Q!bu_IGrxgmS90mg6;+1Ld
z4<Eylm^Jaef~;s@MFT4uSkb_W239n%qJjHpz>o${tG#T2{P^Af!HVnM-(o~ClK%@)
zNCYZN;VP|r8>_IsJD4u7*_t+s(QK~anr>^Y+^BP#H|kjmrGa8J%^r5rCD~)FUP2>F
zNmaqG>I7)XTm2D%3k2Q(IHK+cytvP{WL{-YEB3PT&9*=)%kz%iw1yYEbg{}#yIR?X
zR`rhwn;^gz?4`l+jHc5504(}9ux+gr7+=;$oY)-vePSjt#}Ym*`9DiO-vhWS0bt9p
z*lS2EcRz#M5MR1Wn_=(LV*Xv4>PAAyLwy_Ip7<yoj|Dem$hI`Z?WRAdaI5c<M7?Wg
zty<X%!1T7<T>vjoymZ!uWg9yTXy>bIKLFne@Fsa!i~nktoV4h#P7!#GKoY>dopq;r
z$)fNg>(S~!v2PI|obOfbWZML|ZOklSY`=g_oYY2Xe=et|#&C>rctZUfghC3|LYNGm
z6c)pR+<2cItk3*1{(k({!Qc7|P0tmZo+~vyS6KagQ94?Zjuxb&?xp{RriF(#F0?+f
z(6WAEZR=v3?EwhV9smIB0cfbU2jC085P77$yuB!Reg1{u`i0<zMW46Z&)irw#&5U(
zQ^6bI6h>y?3ljKW|6%|?6)MBZV$AUg@qJFfF7^qDRj78aF9Jwbb*XAzOw=xapP;eL
zx4ol!(AB)`p~63ssEAf_-|eqZD`{c0iqnNvd|5b^z<jpAss-x`$L2(Jip>NsZN+|Q
zIX1ngAT>|znc8z{@AO_m3GjDbYA#95MF#DZDeTlBAJ909jZmTO;Y=pSUP}ZVz&RiH
z<P=v_AoBNPdF||uqP(FbZ<stl+fVdMyQg;-q?TC`U~2cQco{nqTKGFJwUo*@rw{|M
zV%dXpSWb9x3T;0zWtR}_VOw05<dkmFxTK9xVcN}^ILEALiGTw*kHLbR0$$F}d5|%?
zl*P=EMYPbm8F6~oOfMDbhS?^7sXenzbMgjY{GFFJl*%}#5C^a#^Wff7C^=%|uGuE;
z;_SdVVnI#;uN5}uw@X>f99fX#JfSm3XLeAf>n^M-zfw0U;JWTPxtqxBotL^xWt>wO
z3(z9-;r>&oVZ>*0uQNTl0K7P2K~4d)6*?E^&|<SAR<cRI>t`DQpuNU9c|9=x&P(e{
zWt>xp16YxHaBtEtv2oXIBX@Cj;2g0ar-0WAoAcYHEM|^4653s8p}bG+!S4gKN-SXI
zDloN*wA!@V@(0xFx2rPTDc9j|U1RGhtSi4#54EJ8Ik~4GneV*RQ!3+}$XI|DnGg4;
z#zuT5_d3&)3&6_?&Q1Zd6*?E^(8bK1jyn9UX>2`(7Roy{wg+jISiox6BXtZmt+xCD
zwfgO9Y`hm<x?L~`8yO=!;j{k)eDHQb$7vPUZt-fT#%rEdaqS<kcG3mg_`cq0m5>s`
zd2sn=<SGVIH)t@$p5Oh%iV~$QWzJ0<Isb@|3uOJcdFB9}J;+<}l;fk{Y*2|+7GsB9
zS10AIEiQH$`vxP($1C#A6gXX?F#U8kphy*NoMf$2>Y>eWHk1vU+ic{ljBPemPZeNO
zo{Viajl{iVn~hzfvdzY|G0zchh0C>ZOj%`avx(5=yKKbV2pDy2v$4l#ZGX!?Xxkc{
zWDntAMxL?ZbVV0j7fVh?e0U4<5BHe~TJc)Zz={S|H1Lhm0Imi+b;fpxi*;hwF|HM4
zMFT4uSkb^YTm!5V+kRaAQ&{|WY0(5Xs91+g*~*&w=b)-@5%@8I{RC*e=`M}e*y<5q
zj$#X9mx=Zh0@DO&fhiHIeHxS&ba3%mS~u_eF#M?r+_$RG^(DIPgsAgKC_=2Z`g4(D
zWQRf7YK|XAIMG8J;-LE1<U@--Dq9@-1!1%<q|u2S=OPbZ;_2WQxv*;vRXsU&ZuAtc
zb;aE)d{K~&+#=}opsrp+2(<R69fMBpcO>-8JKNsfmLK|HQz6)24EC3T{rpJiQaV;*
z*wN46gN}aM)_)eGZ1E>Ri$4HZ{Hd$9_`_!4^J{)FP?S4Ma_8iMJJI;$!G&l&u7&hH
z&cKwo5R82kY?%wT%x;|Bcjb}1rx<K21=~IfcFqMm|8d8)v$yJR>eqMQ#L3p3rQps2
zd+*>f)y>nGzHggN;S$ahK&A#}Pt3_}!1y~awUx>^rw|9QBJ<$hQ#h7LY}_?_g1b07
zaE@4zQ^0G5&H3$87Bfc{zAXvm$>kRXHXFh6eM9WDw9%5}mkn*@%ayn(XseoL*E<;@
zi$r%6Ga742Mu4vD%#55;so@%a9)RI}YGj;Wgc<<0^;r5WzX2&~y7>Lhyb05_#QX#|
zTf1e;v8Rbf2cy|qCu<KX-H*T$&RWyG;9h?H@qqeI@DTB@9S6h(rlNR<T21x8g0RN*
z-Vvgc>~BF>U2wh&LUY0SE(mP}=ey|lh@CSE?qL;i6|xhAMKO_=&;mr<*6IX7pBT?W
z3O{a8=M_6=;e{VJh(yKi{P9HrKW@;vN_^rPr10YgTOSp-ez0m$z>gdJP+TiMa!X$n
K@KX~ot^Xh0r`|dM


From af21a08044b4262f178be15f3a868949f5f8b82d Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 30 Jan 2026 15:47:18 +0000
Subject: [PATCH 08/36] Docstring updates and minor pytest refactor

---
 .../ensembl/fasta/splitfasta/split_fasta.py   | 140 ++++++++++++++++--
 tests/test_split_fasta.py                     |  87 +++++++----
 2 files changed, 186 insertions(+), 41 deletions(-)

diff --git a/modules/ensembl/fasta/splitfasta/split_fasta.py b/modules/ensembl/fasta/splitfasta/split_fasta.py
index 164ec44..5f4b0e1 100644
--- a/modules/ensembl/fasta/splitfasta/split_fasta.py
+++ b/modules/ensembl/fasta/splitfasta/split_fasta.py
@@ -1,6 +1,25 @@
 #!/usr/bin/env python3
 
-"""Split a FASTA file (possibly gzipped) into multiple smaller FASTA files."""
+"""
+Split a FASTA file into multiple FASTA files, optionally chunking long sequences.
+
+This script reads an input FASTA (optionally gzipped) and writes one or more FASTA
+files to an output directory. Records can be split across output files either by:
+
+- maximum number of records per file (``max_seqs_per_file``), and/or
+- maximum cumulative sequence length per file (``max_seq_length_per_file``).
+
+If ``force_max_seq_length`` is enabled, individual sequences longer than
+``max_seq_length_per_file`` are split into chunks. When chunking, a final remainder
+chunk shorter than ``min_chunk_length`` can be merged into the previous chunk.
+
+Optionally, an AGP v2.0 file can be written describing how each input sequence
+maps to output contigs/chunks.
+
+The implementation is designed to stream the input once and write outputs in a
+single pass.
+"""
+
 
 import inspect
 import logging
@@ -37,7 +56,19 @@ def init_logging_with_args(args):
 
 
 class Params:
-    """Class to hold parameters for splitting FASTA files."""
+    """
+    Validated configuration for splitting a FASTA file.
+
+    Attributes correspond to CLI arguments and control:
+    - output location and cleanup behaviour,
+    - how records are grouped into output FASTA files,
+    - whether long sequences are chunked, and
+    - whether to write an AGP file describing the splits.
+
+    Validation is performed in ``_validate_params()`` and will raise ``ValueError``
+    for invalid combinations (e.g. ``min_chunk_length`` without
+    ``max_seq_length_per_file``).
+    """
 
     def __init__(
         self,
@@ -70,6 +101,13 @@ def __init__(
         self._validate_params()
 
     def _validate_params(self) -> None:
+        """
+        Validate parameter values and combinations.
+
+        Raises:
+            ValueError: If any numeric limit is <= 0, or if ``min_chunk_length`` is
+                set without ``max_seq_length_per_file``.
+        """
         if self.max_dirs_per_directory is not None and self.max_dirs_per_directory <= 0:
             raise ValueError("--max-dirs-per-directory must be > 0 or None")
         if (
@@ -95,8 +133,22 @@ def _validate_params(self) -> None:
 
 class OutputWriter:
     """
-    Manages output file creation and counters, writing in a single pass.
-    Creates/cleans directories lazily as required.
+    Write split FASTA outputs and (optionally) an AGP file.
+
+    The writer manages:
+    - output directory creation/cleanup (lazy, per-directory),
+    - output file naming (optionally unique across directories),
+    - record and length counters used to decide when to roll over to a new file,
+    - an optional AGP v2.0 file describing the mapping from original sequences
+      to output contigs/chunks.
+
+    Notes:
+        Output layout is controlled by:
+        - ``max_files_per_directory``: how many FASTA files to write per directory
+          before incrementing the directory index.
+        - ``max_dirs_per_directory``: how directory indices are expanded into a
+          multi-level path (base-N style).
+        - ``unique_file_names``: whether to include directory index in filenames.
     """
 
     def __init__(self, params: Params):
@@ -135,7 +187,14 @@ def _create_or_clean_dir(self, dir_path: Path) -> None:
             raise
 
     def _get_subdir_path(self, dir_index: int) -> Path:
-        """Computes subdirectory path based on dir_index and max_dirs_per_directory."""
+        """Return the output subdirectory path for a given directory index.
+
+        Args:
+            dir_index: Zero-based directory index computed from file count.
+
+        Returns:
+            A Path under ``params.out_dir`` into which output files are written.
+        """
         parts = []
         max_dirs = self.params.max_dirs_per_directory
         if max_dirs is None:
@@ -150,9 +209,16 @@ def _get_subdir_path(self, dir_index: int) -> Path:
         return self.params.out_dir.joinpath(*parts)
 
     def _get_file_and_dir_index(self) -> Tuple[int, int]:
-        """
-        Determines index of file and directory based on file count and max files per directory.
-        Returns (file_index, dir_index).
+        """Compute the file index within a directory and the directory index.
+
+        ``file_count`` increments monotonically for each output file. If
+        ``max_files_per_directory`` is set, files are grouped into directories such
+        that each directory contains at most that many files.
+
+        Returns:
+            (file_index, dir_index) where:
+            - file_index is 1-based within the directory, and
+            - dir_index is 0-based across directories.
         """
         max_files = self.params.max_files_per_directory
         if max_files is None:
@@ -182,10 +248,18 @@ def add_agp_entry(
         part_id: str,
         part_length: int,
     ) -> None:
-        """Adds an entry to the AGP file."""
-        # AGP columns for WGS contig component type:
-        # object, object_beg, object_end, part_number, component_type,
-        # component_id, component_beg, component_end, orientation
+        """
+        Write a single AGP v2.0 component line for a chunk/contig.
+        Coordinates written to AGP are 1-based and inclusive.
+
+        Args:
+            object_id: The original input sequence ID (AGP 'object').
+            start: Start coordinate on the object (1-based, inclusive).
+            end: End coordinate on the object (1-based, inclusive).
+            part_nr: Component part number for this object (starts at 1 per object).
+            part_id: Output contig/chunk identifier (AGP 'component_id').
+            part_length: Length of the component in bases.
+        """
         if self._agp_fh is None:
             return
         try:
@@ -224,7 +298,7 @@ def open_new_file(self) -> None:
         self.file_len = 0
 
     def write_record(self, record: SeqRecord) -> None:
-        """Writes a SeqRecord to the current output file."""
+        """Writes a SeqRecord to the current output file and update counters."""
         try:
             SeqIO.write(record, self._fh, "fasta")
             self.record_count += 1
@@ -243,7 +317,11 @@ def close(self) -> None:
 
 
 def _get_param_defaults() -> dict:
-    """Retrieve default values for Params class attributes."""
+    """
+    Return default values from the ``Params`` constructor signature.
+
+    Keeps CLI help text in sync with the defaults defined in ``Params.__init__``.
+    """
     signature = inspect.signature(Params.__init__)
     defaults = {}
     for name, param in signature.parameters.items():
@@ -253,7 +331,30 @@ def _get_param_defaults() -> dict:
 
 
 def split_fasta(params: Params) -> None:
-    """Splits the input FASTA file into multiple smaller FASTA files, chunking long sequences if required."""
+    """
+    Split an input FASTA into multiple output FASTA files.
+
+    Records are streamed from the input file and written in a single pass.
+    Output file rollover can be triggered by:
+    - exceeding ``max_seqs_per_file`` (record-count based), and/or
+    - exceeding ``max_seq_length_per_file`` (cumulative sequence length per file).
+
+    If ``force_max_seq_length`` is enabled and an individual record is longer than
+    ``max_seq_length_per_file``, the sequence is split into fixed-size chunks.
+    If the final remainder chunk is shorter than ``min_chunk_length``, it is merged
+    with the previous chunk (which may exceed ``max_seq_length_per_file``).
+
+    When ``write_agp`` is enabled, an AGP v2.0 file is written describing the
+    mapping from each original sequence to its output contigs/chunks.
+
+    Args:
+        params: Validated configuration controlling splitting/chunking behaviour.
+
+    Raises:
+        FileNotFoundError: If the input FASTA does not exist.
+        ValueError: If parameter validation fails (raised when Params is created).
+        Exception: Propagates unexpected I/O or parsing errors.
+    """
     if not params.fasta_file.exists():
         logging.error(
             "DEBUG: fasta_file=%r resolved=%r cwd=%r",
@@ -363,6 +464,15 @@ def split_fasta(params: Params) -> None:
 
 
 def parse_args(argv: Optional[List[str]] = None) -> Params:
+    """
+    Parse CLI arguments and return a validated Params object.
+
+    Args:
+        argv: Optional argument list for testing. If None, uses sys.argv.
+
+    Returns:
+        A validated Params instance.
+    """
     defaults = _get_param_defaults()
     parser = ArgumentParser(
         description="Split a FASTA file into multiple FASTA files, optionally chunking long sequences."
diff --git a/tests/test_split_fasta.py b/tests/test_split_fasta.py
index 8a48af2..0b5fb20 100644
--- a/tests/test_split_fasta.py
+++ b/tests/test_split_fasta.py
@@ -8,15 +8,18 @@
 
 
 def write_fasta(path: Path, records):
+    """Write a list of SeqRecord objects to a FASTA file."""
     with open(path, "w", encoding="utf-8", newline="\n") as fh:
         SeqIO.write(records, fh, "fasta")
 
 
 def list_output_fastas(out_dir: Path):
+    """Return all FASTA files produced under the output directory."""
     return sorted(out_dir.rglob("*.fa"))
 
 
 def read_all_ids_from_fastas(out_dir: Path):
+    """Read and return all sequence IDs from all FASTA files under out_dir."""
     ids = []
     for fa in list_output_fastas(out_dir):
         with open(fa, "r", encoding="utf-8") as fh:
@@ -25,18 +28,26 @@ def read_all_ids_from_fastas(out_dir: Path):
 
 
 def parse_agp_lines(agp_path: Path):
+    """
+    Parse an AGP file into a list of column lists, excluding comments
+    and blank lines.
+    """
     lines = [l.rstrip("\n") for l in agp_path.read_text(encoding="utf-8").splitlines()]
     lines = [l for l in lines if l and not l.startswith("#")]
     return [l.split("\t") for l in lines]
 
 
 def test_no_agp_by_default(tmp_path: Path, split_fasta_module):
-    inp = tmp_path / "in.fa"
+    """
+    By default, splitting a FASTA should produce one or more FASTA outputs
+    but must NOT create an AGP file unless write_agp is explicitly enabled.
+    """
+    input_fasta = tmp_path / "in.fa"
     out = tmp_path / "out"
-    write_fasta(inp, [SeqRecord(Seq("ACGT"), id="seq1", description="")])
+    write_fasta(input_fasta, [SeqRecord(Seq("ACGT"), id="seq1", description="")])
 
     params = split_fasta_module.Params(
-        fasta_file=inp,
+        fasta_file=input_fasta,
         out_dir=out,
         write_agp=False,
     )
@@ -47,17 +58,22 @@ def test_no_agp_by_default(tmp_path: Path, split_fasta_module):
 
 
 def test_split_by_max_seqs_per_file(tmp_path: Path, split_fasta_module):
-    inp = tmp_path / "in.fa"
+    """
+    When max_seqs_per_file is set, sequences should be split across
+    multiple FASTA files while preserving original sequence order
+    and IDs.
+    """
+    input_fasta = tmp_path / "in.fa"
     out = tmp_path / "out"
     recs = [
         SeqRecord(Seq("A" * 10), id="s1", description=""),
         SeqRecord(Seq("C" * 10), id="s2", description=""),
         SeqRecord(Seq("G" * 10), id="s3", description=""),
     ]
-    write_fasta(inp, recs)
+    write_fasta(input_fasta, recs)
 
     params = split_fasta_module.Params(
-        fasta_file=inp,
+        fasta_file=input_fasta,
         out_dir=out,
         max_seqs_per_file=2,
         write_agp=False,
@@ -71,15 +87,19 @@ def test_split_by_max_seqs_per_file(tmp_path: Path, split_fasta_module):
 
 def test_chunk_merge_final_small_chunk_and_agp(tmp_path: Path, split_fasta_module):
     """
-    seq_len=2100, max=1000 -> chunks [1000, 1000, 100]
-    min_chunk_length=200 -> final chunk merged -> [1000, 1100]
+    When force_max_seq_length is enabled, long sequences are chunked.
+    If the final chunk is shorter than min_chunk_length, it should be
+    merged with the previous chunk, and the AGP file must reflect the
+    merged coordinates correctly.
     """
-    inp = tmp_path / "in.fa"
+    input_fasta = tmp_path / "in.fa"
     out = tmp_path / "out"
-    write_fasta(inp, [SeqRecord(Seq("A" * 2100), id="chr1", description="chr1")])
+    write_fasta(
+        input_fasta, [SeqRecord(Seq("A" * 2100), id="chr1", description="chr1")]
+    )
 
     params = split_fasta_module.Params(
-        fasta_file=inp,
+        fasta_file=input_fasta,
         out_dir=out,
         write_agp=True,
         force_max_seq_length=True,
@@ -101,31 +121,46 @@ def test_chunk_merge_final_small_chunk_and_agp(tmp_path: Path, split_fasta_modul
     cols = parse_agp_lines(agp)
     assert len(cols) == 2
 
-    # object, obj_beg, obj_end, part_no, type, comp_id, comp_beg, comp_end, orient
-    assert cols[0][0] == "chr1"
-    assert cols[0][1:4] == ["1", "1000", "1"]
-    assert cols[0][4] == "W"
-    assert cols[0][5] == "chr1_chunk_start_0"
-    assert cols[0][6:9] == ["1", "1000", "+"]
-
-    assert cols[1][0] == "chr1"
-    assert cols[1][1:4] == ["1001", "2100", "2"]
-    assert cols[1][4] == "W"
-    assert cols[1][5] == "chr1_chunk_start_1000"
-    assert cols[1][6:9] == ["1", "1100", "+"]
+    # object, obj_start, obj_end, part_no, type, comp_id, comp_start, comp_end, orientation
+    assert cols[0] == [
+        "chr1",
+        "1",
+        "1000",
+        "1",
+        "W",
+        "chr1_chunk_start_0",
+        "1",
+        "1000",
+        "+",
+    ]
+    assert cols[1] == [
+        "chr1",
+        "1001",
+        "2100",
+        "2",
+        "W",
+        "chr1_chunk_start_1000",
+        "1",
+        "1100",
+        "+",
+    ]
 
 
 def test_agp_part_numbers_restart_per_object(tmp_path: Path, split_fasta_module):
-    inp = tmp_path / "in.fa"
+    """
+    AGP part numbers must restart at 1 for each new input sequence
+    (object), even when multiple sequences are chunked in the same run.
+    """
+    input_fasta = tmp_path / "in.fa"
     out = tmp_path / "out"
     recs = [
         SeqRecord(Seq("A" * 1200), id="obj1", description=""),
         SeqRecord(Seq("C" * 1200), id="obj2", description=""),
     ]
-    write_fasta(inp, recs)
+    write_fasta(input_fasta, recs)
 
     params = split_fasta_module.Params(
-        fasta_file=inp,
+        fasta_file=input_fasta,
         out_dir=out,
         write_agp=True,
         force_max_seq_length=True,

From ac505b8714bd0cbf235995b1f20b1e8f7fa74cb2 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Mon, 2 Feb 2026 12:09:06 +0000
Subject: [PATCH 09/36] Header updates

---
 .../ensembl/fasta/splitfasta/split_fasta.py   | 24 ++++++++++++++++++-
 .../fasta/splitfasta/tests/main.nf.test       | 15 ++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/modules/ensembl/fasta/splitfasta/split_fasta.py b/modules/ensembl/fasta/splitfasta/split_fasta.py
index 5f4b0e1..b25846c 100644
--- a/modules/ensembl/fasta/splitfasta/split_fasta.py
+++ b/modules/ensembl/fasta/splitfasta/split_fasta.py
@@ -1,4 +1,19 @@
-#!/usr/bin/env python3
+#!env python3
+
+# See the NOTICE file distributed with this work for additional information
+# regarding copyright ownership.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 """
 Split a FASTA file into multiple FASTA files, optionally chunking long sequences.
@@ -480,11 +495,13 @@ def parse_args(argv: Optional[List[str]] = None) -> Params:
     parser.add_argument(
         "--fasta-file",
         type=Path,
+        metavar="FASTA",
         required=True,
         help="Input raw or compressed FASTA file containing sequences to split",
     )
     parser.add_argument(
         "--out-dir",
+        metavar="DIR",
         type=Path,
         help="Top-level output directory (default: input FASTA directory)",
     )
@@ -495,27 +512,32 @@ def parse_args(argv: Optional[List[str]] = None) -> Params:
     )
     parser.add_argument(
         "--max-seqs-per-file",
+        metavar="N",
         type=int,
         help=f"Max records per output file (default: {defaults['max_seqs_per_file']})",
     )
     parser.add_argument(
         "--max-seq-length-per-file",
         type=int,
+        metavar="BP",
         help=f"Max cumulative sequence length per output file (default: {defaults['max_seq_length_per_file']})",
     )
     parser.add_argument(
         "--min-chunk-length",
         type=int,
+        metavar="BP",
         help=f"Minimum length of a chunk allowed as a remainder (default: {defaults['min_chunk_length']})",
     )
     parser.add_argument(
         "--max-files-per-directory",
         type=int,
+        metavar="N",
         help=f"Max files per directory before moving to next computed dir (default: {defaults['max_files_per_directory']})",
     )
     parser.add_argument(
         "--max-dirs-per-directory",
         type=int,
+        metavar="N",
         help=f"Max subdirectories per directory level (default: {defaults['max_dirs_per_directory']})",
     )
     parser.add_argument(
diff --git a/modules/ensembl/fasta/splitfasta/tests/main.nf.test b/modules/ensembl/fasta/splitfasta/tests/main.nf.test
index 3db1283..c23c0cd 100644
--- a/modules/ensembl/fasta/splitfasta/tests/main.nf.test
+++ b/modules/ensembl/fasta/splitfasta/tests/main.nf.test
@@ -1,3 +1,18 @@
+// See the NOTICE file distributed with this work for additional information
+// regarding copyright ownership.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 // nf-core modules test fasta/splitfasta
 nextflow_process {
 

From 1ee480d5f3f806b7a3bccb9c202c8db2dd69e9b7 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Tue, 3 Feb 2026 11:18:02 +0000
Subject: [PATCH 10/36] Moved python stuff to ensembl-genomio

---
 .../ensembl/fasta/splitfasta/environment.yml  |   3 +-
 modules/ensembl/fasta/splitfasta/main.nf      |   3 +
 .../fasta/splitfasta/tests/main.nf.test.snap  | 102 ++--------
 tests/conftest.py                             |  24 ---
 tests/test_split_fasta.py                     | 179 ------------------
 5 files changed, 18 insertions(+), 293 deletions(-)
 delete mode 100644 tests/conftest.py
 delete mode 100644 tests/test_split_fasta.py

diff --git a/modules/ensembl/fasta/splitfasta/environment.yml b/modules/ensembl/fasta/splitfasta/environment.yml
index 759f3da..2d01414 100644
--- a/modules/ensembl/fasta/splitfasta/environment.yml
+++ b/modules/ensembl/fasta/splitfasta/environment.yml
@@ -4,5 +4,4 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - python=3.11.7
-  - biopython=1.86
\ No newline at end of file
+  - ensembl-genomio=1.6.1
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/main.nf b/modules/ensembl/fasta/splitfasta/main.nf
index 0a8b761..590a7ff 100644
--- a/modules/ensembl/fasta/splitfasta/main.nf
+++ b/modules/ensembl/fasta/splitfasta/main.nf
@@ -18,6 +18,9 @@ process FASTA_SPLITFASTA {
     tag "${meta.id}"
     label 'process_low'
 
+    conda "${moduleDir}/environment.yml"
+    container "ensemblorg/ensembl-genomio:v1.6.1"
+
     publishDir "${params.outdir ?: '.'}", mode: 'copy'
 
     input:
diff --git a/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap b/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
index 3390583..a27a644 100644
--- a/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
@@ -3,42 +3,16 @@
         "content": [
             {
                 "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
-                        ]
-                    ]
+                    
                 ],
                 "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
-                    ]
+                    
                 ],
                 "agp": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
-                    ]
+                    
                 ],
                 "fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
-                        ]
-                    ]
+                    
                 ]
             }
         ],
@@ -46,21 +20,13 @@
             "nf-test": "0.9.3",
             "nextflow": "25.10.3"
         },
-        "timestamp": "2026-01-30T10:38:07.606463"
+        "timestamp": "2026-02-03T11:07:14.941473"
     },
     "Stub outputs: nested directory layout contract": {
         "content": [
             {
                 "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
-                        ]
-                    ]
+                    
                 ],
                 "1": [
                     
@@ -69,15 +35,7 @@
                     
                 ],
                 "fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
-                        ]
-                    ]
+                    
                 ]
             }
         ],
@@ -85,21 +43,13 @@
             "nf-test": "0.9.3",
             "nextflow": "25.10.3"
         },
-        "timestamp": "2026-01-30T10:38:11.815126"
+        "timestamp": "2026-02-03T11:07:18.579901"
     },
     "Stub outputs: default layout, no AGP": {
         "content": [
             {
                 "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
-                        ]
-                    ]
+                    
                 ],
                 "1": [
                     
@@ -108,15 +58,7 @@
                     
                 ],
                 "fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
-                        ]
-                    ]
+                    
                 ]
             }
         ],
@@ -124,21 +66,13 @@
             "nf-test": "0.9.3",
             "nextflow": "25.10.3"
         },
-        "timestamp": "2026-01-30T10:38:05.482323"
+        "timestamp": "2026-02-03T11:07:13.112305"
     },
     "Stub outputs: unique_file_names contract": {
         "content": [
             {
                 "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
-                        ]
-                    ]
+                    
                 ],
                 "1": [
                     
@@ -147,15 +81,7 @@
                     
                 ],
                 "fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
-                        ]
-                    ]
+                    
                 ]
             }
         ],
@@ -163,6 +89,6 @@
             "nf-test": "0.9.3",
             "nextflow": "25.10.3"
         },
-        "timestamp": "2026-01-30T10:38:09.698407"
+        "timestamp": "2026-02-03T11:07:16.747928"
     }
 }
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
deleted file mode 100644
index 766dbc3..0000000
--- a/tests/conftest.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import importlib.util
-from pathlib import Path
-
-import pytest
-
-
-@pytest.fixture(scope="session")
-def split_fasta_module():
-    """
-    Load modules/ensembl/fasta/splitfasta/split_fasta.py as a Python module
-    regardless of whether 'modules/' is a Python package.
-    """
-    repo_root = Path(__file__).resolve().parents[1]
-    module_path = (
-        repo_root / "modules" / "ensembl" / "fasta" / "splitfasta" / "split_fasta.py"
-    )
-
-    spec = importlib.util.spec_from_file_location("split_fasta", module_path)
-    if spec is None or spec.loader is None:
-        raise RuntimeError(f"Could not load module spec from {module_path}")
-
-    mod = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(mod)
-    return mod
diff --git a/tests/test_split_fasta.py b/tests/test_split_fasta.py
deleted file mode 100644
index 0b5fb20..0000000
--- a/tests/test_split_fasta.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# tests/test_split_fasta.py
-from pathlib import Path
-
-import pytest
-from Bio import SeqIO
-from Bio.Seq import Seq
-from Bio.SeqRecord import SeqRecord
-
-
-def write_fasta(path: Path, records):
-    """Write a list of SeqRecord objects to a FASTA file."""
-    with open(path, "w", encoding="utf-8", newline="\n") as fh:
-        SeqIO.write(records, fh, "fasta")
-
-
-def list_output_fastas(out_dir: Path):
-    """Return all FASTA files produced under the output directory."""
-    return sorted(out_dir.rglob("*.fa"))
-
-
-def read_all_ids_from_fastas(out_dir: Path):
-    """Read and return all sequence IDs from all FASTA files under out_dir."""
-    ids = []
-    for fa in list_output_fastas(out_dir):
-        with open(fa, "r", encoding="utf-8") as fh:
-            ids.extend([r.id for r in SeqIO.parse(fh, "fasta")])
-    return ids
-
-
-def parse_agp_lines(agp_path: Path):
-    """
-    Parse an AGP file into a list of column lists, excluding comments
-    and blank lines.
-    """
-    lines = [l.rstrip("\n") for l in agp_path.read_text(encoding="utf-8").splitlines()]
-    lines = [l for l in lines if l and not l.startswith("#")]
-    return [l.split("\t") for l in lines]
-
-
-def test_no_agp_by_default(tmp_path: Path, split_fasta_module):
-    """
-    By default, splitting a FASTA should produce one or more FASTA outputs
-    but must NOT create an AGP file unless write_agp is explicitly enabled.
-    """
-    input_fasta = tmp_path / "in.fa"
-    out = tmp_path / "out"
-    write_fasta(input_fasta, [SeqRecord(Seq("ACGT"), id="seq1", description="")])
-
-    params = split_fasta_module.Params(
-        fasta_file=input_fasta,
-        out_dir=out,
-        write_agp=False,
-    )
-    split_fasta_module.split_fasta(params)
-
-    assert not (out / "in.agp").exists()
-    assert len(list_output_fastas(out)) >= 1
-
-
-def test_split_by_max_seqs_per_file(tmp_path: Path, split_fasta_module):
-    """
-    When max_seqs_per_file is set, sequences should be split across
-    multiple FASTA files while preserving original sequence order
-    and IDs.
-    """
-    input_fasta = tmp_path / "in.fa"
-    out = tmp_path / "out"
-    recs = [
-        SeqRecord(Seq("A" * 10), id="s1", description=""),
-        SeqRecord(Seq("C" * 10), id="s2", description=""),
-        SeqRecord(Seq("G" * 10), id="s3", description=""),
-    ]
-    write_fasta(input_fasta, recs)
-
-    params = split_fasta_module.Params(
-        fasta_file=input_fasta,
-        out_dir=out,
-        max_seqs_per_file=2,
-        write_agp=False,
-    )
-    split_fasta_module.split_fasta(params)
-
-    fas = list_output_fastas(out)
-    assert len(fas) == 2
-    assert read_all_ids_from_fastas(out) == ["s1", "s2", "s3"]
-
-
-def test_chunk_merge_final_small_chunk_and_agp(tmp_path: Path, split_fasta_module):
-    """
-    When force_max_seq_length is enabled, long sequences are chunked.
-    If the final chunk is shorter than min_chunk_length, it should be
-    merged with the previous chunk, and the AGP file must reflect the
-    merged coordinates correctly.
-    """
-    input_fasta = tmp_path / "in.fa"
-    out = tmp_path / "out"
-    write_fasta(
-        input_fasta, [SeqRecord(Seq("A" * 2100), id="chr1", description="chr1")]
-    )
-
-    params = split_fasta_module.Params(
-        fasta_file=input_fasta,
-        out_dir=out,
-        write_agp=True,
-        force_max_seq_length=True,
-        max_seq_length_per_file=1000,
-        min_chunk_length=200,
-        max_seqs_per_file=100000,  # avoid seq-count splitting interfering
-    )
-    split_fasta_module.split_fasta(params)
-
-    # 2 chunks expected after merge
-    assert read_all_ids_from_fastas(out) == [
-        "chr1_chunk_start_0",
-        "chr1_chunk_start_1000",
-    ]
-
-    agp = out / "in.agp"
-    assert agp.exists()
-
-    cols = parse_agp_lines(agp)
-    assert len(cols) == 2
-
-    # object, obj_start, obj_end, part_no, type, comp_id, comp_start, comp_end, orientation
-    assert cols[0] == [
-        "chr1",
-        "1",
-        "1000",
-        "1",
-        "W",
-        "chr1_chunk_start_0",
-        "1",
-        "1000",
-        "+",
-    ]
-    assert cols[1] == [
-        "chr1",
-        "1001",
-        "2100",
-        "2",
-        "W",
-        "chr1_chunk_start_1000",
-        "1",
-        "1100",
-        "+",
-    ]
-
-
-def test_agp_part_numbers_restart_per_object(tmp_path: Path, split_fasta_module):
-    """
-    AGP part numbers must restart at 1 for each new input sequence
-    (object), even when multiple sequences are chunked in the same run.
-    """
-    input_fasta = tmp_path / "in.fa"
-    out = tmp_path / "out"
-    recs = [
-        SeqRecord(Seq("A" * 1200), id="obj1", description=""),
-        SeqRecord(Seq("C" * 1200), id="obj2", description=""),
-    ]
-    write_fasta(input_fasta, recs)
-
-    params = split_fasta_module.Params(
-        fasta_file=input_fasta,
-        out_dir=out,
-        write_agp=True,
-        force_max_seq_length=True,
-        max_seq_length_per_file=1000,
-        min_chunk_length=100,  # => 2 chunks each, no merge
-    )
-    split_fasta_module.split_fasta(params)
-
-    cols = parse_agp_lines(out / "in.agp")
-
-    by_obj = {}
-    for c in cols:
-        by_obj.setdefault(c[0], []).append(int(c[3]))
-
-    assert by_obj["obj1"] == [1, 2]
-    assert by_obj["obj2"] == [1, 2]

From 66550dcc4f376a2fb9cddce5cc6e033ff0e352de Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Tue, 3 Feb 2026 11:45:43 +0000
Subject: [PATCH 11/36] Test fixes

---
 modules/ensembl/fasta/splitfasta/main.nf      |   7 --
 .../fasta/splitfasta/tests/main.nf.test.snap  | 110 +++++++++++++++---
 2 files changed, 92 insertions(+), 25 deletions(-)

diff --git a/modules/ensembl/fasta/splitfasta/main.nf b/modules/ensembl/fasta/splitfasta/main.nf
index 590a7ff..8871477 100644
--- a/modules/ensembl/fasta/splitfasta/main.nf
+++ b/modules/ensembl/fasta/splitfasta/main.nf
@@ -93,13 +93,6 @@ process FASTA_SPLITFASTA {
         mkdir -p splits
         cp -R "\$FIXTURE_DIR/splits/\$LAYOUT/." "splits/"
 
-        find splits -type f -name 'test*.fa' | while read -r f; do
-            bn=\$(basename "\$f")
-            dir=\$(dirname "\$f")
-            new_bn="\${bn/test/${meta.id}}"
-            mv "\$f" "\${dir}/\${new_bn}"
-        done
-
         if [[ "${params.write_agp ?: false}" == "true" ]]; then
             cp "\$FIXTURE_DIR/agp/test.agp" "${meta.id}.agp"
         fi
diff --git a/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap b/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
index a27a644..7c44fbc 100644
--- a/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
@@ -3,30 +3,64 @@
         "content": [
             {
                 "0": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
                 ],
                 "1": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
+                    ]
                 ],
                 "agp": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
+                    ]
                 ],
                 "fasta": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
                 ]
             }
         ],
         "meta": {
             "nf-test": "0.9.3",
-            "nextflow": "25.10.3"
+            "nextflow": "25.04.6"
         },
-        "timestamp": "2026-02-03T11:07:14.941473"
+        "timestamp": "2026-02-03T11:44:20.723299027"
     },
     "Stub outputs: nested directory layout contract": {
         "content": [
             {
                 "0": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
                 ],
                 "1": [
                     
@@ -35,21 +69,37 @@
                     
                 ],
                 "fasta": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
                 ]
             }
         ],
         "meta": {
             "nf-test": "0.9.3",
-            "nextflow": "25.10.3"
+            "nextflow": "25.04.6"
         },
-        "timestamp": "2026-02-03T11:07:18.579901"
+        "timestamp": "2026-02-03T11:44:45.167257411"
     },
     "Stub outputs: default layout, no AGP": {
         "content": [
             {
                 "0": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
                 ],
                 "1": [
                     
@@ -58,21 +108,37 @@
                     
                 ],
                 "fasta": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
+                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                        ]
+                    ]
                 ]
             }
         ],
         "meta": {
             "nf-test": "0.9.3",
-            "nextflow": "25.10.3"
+            "nextflow": "25.04.6"
         },
-        "timestamp": "2026-02-03T11:07:13.112305"
+        "timestamp": "2026-02-03T11:44:08.447183258"
     },
     "Stub outputs: unique_file_names contract": {
         "content": [
             {
                 "0": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
                 ],
                 "1": [
                     
@@ -81,14 +147,22 @@
                     
                 ],
                 "fasta": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
+                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                        ]
+                    ]
                 ]
             }
         ],
         "meta": {
             "nf-test": "0.9.3",
-            "nextflow": "25.10.3"
+            "nextflow": "25.04.6"
         },
-        "timestamp": "2026-02-03T11:07:16.747928"
+        "timestamp": "2026-02-03T11:44:33.225993321"
     }
 }
\ No newline at end of file

From da555a10b9010d46f81119c08eb12435aa60ba72 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Tue, 3 Feb 2026 20:15:04 +0000
Subject: [PATCH 12/36] Actually remove python script!

---
 .../ensembl/fasta/splitfasta/split_fasta.py   | 594 ------------------
 1 file changed, 594 deletions(-)
 delete mode 100644 modules/ensembl/fasta/splitfasta/split_fasta.py

diff --git a/modules/ensembl/fasta/splitfasta/split_fasta.py b/modules/ensembl/fasta/splitfasta/split_fasta.py
deleted file mode 100644
index b25846c..0000000
--- a/modules/ensembl/fasta/splitfasta/split_fasta.py
+++ /dev/null
@@ -1,594 +0,0 @@
-#!env python3
-
-# See the NOTICE file distributed with this work for additional information
-# regarding copyright ownership.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Split a FASTA file into multiple FASTA files, optionally chunking long sequences.
-
-This script reads an input FASTA (optionally gzipped) and writes one or more FASTA
-files to an output directory. Records can be split across output files either by:
-
-- maximum number of records per file (``max_seqs_per_file``), and/or
-- maximum cumulative sequence length per file (``max_seq_length_per_file``).
-
-If ``force_max_seq_length`` is enabled, individual sequences longer than
-``max_seq_length_per_file`` are split into chunks. When chunking, a final remainder
-chunk shorter than ``min_chunk_length`` can be merged into the previous chunk.
-
-Optionally, an AGP v2.0 file can be written describing how each input sequence
-maps to output contigs/chunks.
-
-The implementation is designed to stream the input once and write outputs in a
-single pass.
-"""
-
-
-import inspect
-import logging
-import shutil
-from pathlib import Path
-from typing import Optional, List, Set, Tuple
-
-from Bio import SeqIO
-from Bio.SeqRecord import SeqRecord
-
-try:
-    from ensembl.utils.archive import open_gz_file  # type: ignore
-except ImportError:
-    import gzip
-
-    def open_gz_file(path):
-        p = str(path)
-        return gzip.open(p, "rt") if p.endswith(".gz") else open(p, "rt")
-
-
-try:
-    from ensembl.utils.argparse import ArgumentParser  # type: ignore
-except ImportError:
-    from argparse import ArgumentParser
-
-try:
-    from ensembl.utils.logging import init_logging_with_args  # type: ignore
-except ImportError:
-    import logging
-
-    def init_logging_with_args(args):
-        level = getattr(args, "log_level", "INFO")
-        logging.basicConfig(level=level)
-
-
-class Params:
-    """
-    Validated configuration for splitting a FASTA file.
-
-    Attributes correspond to CLI arguments and control:
-    - output location and cleanup behaviour,
-    - how records are grouped into output FASTA files,
-    - whether long sequences are chunked, and
-    - whether to write an AGP file describing the splits.
-
-    Validation is performed in ``_validate_params()`` and will raise ``ValueError``
-    for invalid combinations (e.g. ``min_chunk_length`` without
-    ``max_seq_length_per_file``).
-    """
-
-    def __init__(
-        self,
-        fasta_file: Path,
-        out_dir: Optional[Path] = None,
-        write_agp: bool = False,
-        max_seqs_per_file: Optional[int] = None,
-        max_seq_length_per_file: Optional[int] = None,
-        min_chunk_length: Optional[int] = None,
-        max_files_per_directory: Optional[int] = None,
-        max_dirs_per_directory: Optional[int] = None,
-        delete_existing_files: bool = False,
-        unique_file_names: bool = False,
-        delete_original_file: bool = False,
-        force_max_seq_length: bool = False,
-    ):
-        self.fasta_file = fasta_file
-        self.out_dir = out_dir if out_dir is not None else fasta_file.parent
-        self.write_agp = write_agp
-        self.max_seqs_per_file = max_seqs_per_file
-        self.max_seq_length_per_file = max_seq_length_per_file
-        self.min_chunk_length = min_chunk_length
-        self.max_files_per_directory = max_files_per_directory
-        self.max_dirs_per_directory = max_dirs_per_directory
-        self.delete_existing_files = delete_existing_files
-        self.unique_file_names = unique_file_names
-        self.delete_original_file = delete_original_file
-        self.force_max_seq_length = force_max_seq_length
-
-        self._validate_params()
-
-    def _validate_params(self) -> None:
-        """
-        Validate parameter values and combinations.
-
-        Raises:
-            ValueError: If any numeric limit is <= 0, or if ``min_chunk_length`` is
-                set without ``max_seq_length_per_file``.
-        """
-        if self.max_dirs_per_directory is not None and self.max_dirs_per_directory <= 0:
-            raise ValueError("--max-dirs-per-directory must be > 0 or None")
-        if (
-            self.max_files_per_directory is not None
-            and self.max_files_per_directory <= 0
-        ):
-            raise ValueError("--max-files-per-directory must be > 0 or None")
-        if self.max_seqs_per_file is not None and self.max_seqs_per_file <= 0:
-            raise ValueError("--max-seqs-per-file must be > 0 or None")
-        if (
-            self.max_seq_length_per_file is not None
-            and self.max_seq_length_per_file <= 0
-        ):
-            raise ValueError("--max-seq-length-per-file must be > 0 or None")
-        if self.min_chunk_length is not None:
-            if self.max_seq_length_per_file is None:
-                raise ValueError(
-                    "--min-chunk-length requires --max-seq-length-per-file"
-                )
-            if self.min_chunk_length <= 0:
-                raise ValueError("--min-chunk-length must be > 0")
-
-
-class OutputWriter:
-    """
-    Write split FASTA outputs and (optionally) an AGP file.
-
-    The writer manages:
-    - output directory creation/cleanup (lazy, per-directory),
-    - output file naming (optionally unique across directories),
-    - record and length counters used to decide when to roll over to a new file,
-    - an optional AGP v2.0 file describing the mapping from original sequences
-      to output contigs/chunks.
-
-    Notes:
-        Output layout is controlled by:
-        - ``max_files_per_directory``: how many FASTA files to write per directory
-          before incrementing the directory index.
-        - ``max_dirs_per_directory``: how directory indices are expanded into a
-          multi-level path (base-N style).
-        - ``unique_file_names``: whether to include directory index in filenames.
-    """
-
-    def __init__(self, params: Params):
-        self.params = params
-        self.basename = (
-            params.fasta_file.name.removesuffix(".gz")
-            .removesuffix(".fa")
-            .removesuffix(".fasta")
-        )
-        self.agp_file = (
-            self.params.out_dir.joinpath(self.basename + ".agp")
-            if params.write_agp
-            else None
-        )
-        self.file_count = 0
-        self.record_count = 0
-        self.file_len = 0
-        self._fh = None
-        self._agp_fh = None
-        self._cleaned_dirs: Set[Path] = set()
-
-        self.open_new_file()
-
-    def _create_or_clean_dir(self, dir_path: Path) -> None:
-        try:
-            dir_path.mkdir(parents=True, exist_ok=True)
-            if self.params.delete_existing_files and dir_path not in self._cleaned_dirs:
-                for child in dir_path.iterdir():
-                    if child.is_dir():
-                        shutil.rmtree(child)
-                    else:
-                        child.unlink()
-                self._cleaned_dirs.add(dir_path)
-        except Exception:
-            logging.exception("Failed to prepare output directory '%s'", dir_path)
-            raise
-
-    def _get_subdir_path(self, dir_index: int) -> Path:
-        """Return the output subdirectory path for a given directory index.
-
-        Args:
-            dir_index: Zero-based directory index computed from file count.
-
-        Returns:
-            A Path under ``params.out_dir`` into which output files are written.
-        """
-        parts = []
-        max_dirs = self.params.max_dirs_per_directory
-        if max_dirs is None:
-            parts.append("1")
-        else:
-            current_index = dir_index
-            while current_index >= 0:
-                parts.append(f"{current_index % max_dirs}")
-                current_index = current_index // max_dirs - 1
-
-        parts.reverse()
-        return self.params.out_dir.joinpath(*parts)
-
-    def _get_file_and_dir_index(self) -> Tuple[int, int]:
-        """Compute the file index within a directory and the directory index.
-
-        ``file_count`` increments monotonically for each output file. If
-        ``max_files_per_directory`` is set, files are grouped into directories such
-        that each directory contains at most that many files.
-
-        Returns:
-            (file_index, dir_index) where:
-            - file_index is 1-based within the directory, and
-            - dir_index is 0-based across directories.
-        """
-        max_files = self.params.max_files_per_directory
-        if max_files is None:
-            return self.file_count, 0
-        adjusted_count = self.file_count - 1
-        return (adjusted_count % max_files + 1, adjusted_count // max_files)
-
-    def _get_path_for_next_file(self) -> Path:
-        """Computes path for the next output file."""
-        self.file_count += 1
-        file_index, dir_index = self._get_file_and_dir_index()
-        subdir_path = self._get_subdir_path(dir_index)
-        self._create_or_clean_dir(subdir_path)
-
-        if self.params.unique_file_names:
-            file_name = f"{self.basename}.{dir_index}.{file_index}.fa"
-        else:
-            file_name = f"{self.basename}.{file_index}.fa"
-        return subdir_path.joinpath(file_name)
-
-    def add_agp_entry(
-        self,
-        object_id: str,
-        start: int,
-        end: int,
-        part_nr: int,
-        part_id: str,
-        part_length: int,
-    ) -> None:
-        """
-        Write a single AGP v2.0 component line for a chunk/contig.
-        Coordinates written to AGP are 1-based and inclusive.
-
-        Args:
-            object_id: The original input sequence ID (AGP 'object').
-            start: Start coordinate on the object (1-based, inclusive).
-            end: End coordinate on the object (1-based, inclusive).
-            part_nr: Component part number for this object (starts at 1 per object).
-            part_id: Output contig/chunk identifier (AGP 'component_id').
-            part_length: Length of the component in bases.
-        """
-        if self._agp_fh is None:
-            return
-        try:
-            line = f"{object_id}\t{start}\t{end}\t{part_nr}\tW\t{part_id}\t1\t{part_length}\t+\n"
-            self._agp_fh.write(line)
-        except Exception:
-            logging.exception("Failed to write AGP entry for part '%s'", part_id)
-            raise
-
-    def create_agp_file(self) -> None:
-        """Creates the AGP file for recording sequence chunking."""
-        if self.agp_file is None:
-            return
-        try:
-            self.params.out_dir.mkdir(parents=True, exist_ok=True)
-            self._agp_fh = open(self.agp_file, "w")
-            self._agp_fh.write("# AGP-version 2.0\n")
-            logging.info("Created AGP file '%s'", self.agp_file)
-        except Exception:
-            logging.exception("Failed to open AGP file '%s'", self.agp_file)
-            raise
-
-    def open_new_file(self) -> None:
-        """Closes current file (if any) and opens a new output file."""
-        if self._fh is not None:
-            self._fh.close()
-
-        path = self._get_path_for_next_file()
-        try:
-            self._fh = open(path, "w")
-            logging.debug("Opened output file '%s'", path)
-        except Exception:
-            logging.exception("Failed to open output file '%s'", path)
-            raise
-        self.record_count = 0
-        self.file_len = 0
-
-    def write_record(self, record: SeqRecord) -> None:
-        """Writes a SeqRecord to the current output file and update counters."""
-        try:
-            SeqIO.write(record, self._fh, "fasta")
-            self.record_count += 1
-            self.file_len += len(record.seq)
-        except Exception:
-            logging.exception("Failed to write record '%s' to output file", record.id)
-            raise
-
-    def close(self) -> None:
-        if self._fh is not None:
-            self._fh.close()
-            self._fh = None
-        if self._agp_fh is not None:
-            self._agp_fh.close()
-            self._agp_fh = None
-
-
-def _get_param_defaults() -> dict:
-    """
-    Return default values from the ``Params`` constructor signature.
-
-    Keeps CLI help text in sync with the defaults defined in ``Params.__init__``.
-    """
-    signature = inspect.signature(Params.__init__)
-    defaults = {}
-    for name, param in signature.parameters.items():
-        if name != "self" and param.default is not inspect.Parameter.empty:
-            defaults[name] = param.default
-    return defaults
-
-
-def split_fasta(params: Params) -> None:
-    """
-    Split an input FASTA into multiple output FASTA files.
-
-    Records are streamed from the input file and written in a single pass.
-    Output file rollover can be triggered by:
-    - exceeding ``max_seqs_per_file`` (record-count based), and/or
-    - exceeding ``max_seq_length_per_file`` (cumulative sequence length per file).
-
-    If ``force_max_seq_length`` is enabled and an individual record is longer than
-    ``max_seq_length_per_file``, the sequence is split into fixed-size chunks.
-    If the final remainder chunk is shorter than ``min_chunk_length``, it is merged
-    with the previous chunk (which may exceed ``max_seq_length_per_file``).
-
-    When ``write_agp`` is enabled, an AGP v2.0 file is written describing the
-    mapping from each original sequence to its output contigs/chunks.
-
-    Args:
-        params: Validated configuration controlling splitting/chunking behaviour.
-
-    Raises:
-        FileNotFoundError: If the input FASTA does not exist.
-        ValueError: If parameter validation fails (raised when Params is created).
-        Exception: Propagates unexpected I/O or parsing errors.
-    """
-    if not params.fasta_file.exists():
-        logging.error(
-            "DEBUG: fasta_file=%r resolved=%r cwd=%r",
-            str(params.fasta_file),
-            str(Path(params.fasta_file).resolve()),
-            str(Path.cwd()),
-        )
-        raise FileNotFoundError(f"Fasta file '{params.fasta_file}' does not exist")
-
-    # Do nothing if file size is 0
-    if params.fasta_file.stat().st_size == 0:
-        logging.info("Input FASTA '%s' is empty; nothing to do", params.fasta_file)
-        return
-
-    params.out_dir.mkdir(parents=True, exist_ok=True)
-
-    writer = OutputWriter(params)
-
-    try:
-        if params.write_agp:
-            writer.create_agp_file()
-
-        with open_gz_file(params.fasta_file) as fh:
-            for record in SeqIO.parse(fh, "fasta"):
-                seq_len = len(record.seq)
-                max_seq_len = params.max_seq_length_per_file
-                max_seqs = params.max_seqs_per_file
-
-                if max_seqs is not None and writer.record_count >= max_seqs:
-                    writer.open_new_file()
-
-                if max_seq_len is None or writer.file_len + seq_len <= max_seq_len:
-                    writer.write_record(record)
-                    if params.write_agp:
-                        writer.add_agp_entry(
-                            record.id, 1, seq_len, 1, record.id, seq_len
-                        )
-                    continue
-
-                if params.force_max_seq_length and seq_len > max_seq_len:
-                    starts = list(range(0, seq_len, max_seq_len))
-                    ends = [min(s + max_seq_len, seq_len) for s in starts]
-
-                    if params.min_chunk_length is not None and len(starts) > 1:
-                        last_chunk_len = ends[-1] - starts[-1]
-                        if last_chunk_len < params.min_chunk_length:
-                            logging.warning(
-                                "Length of last chunk of record '%s' is %d, lower than min_chunk_length: %d;"
-                                + "merging with previous chunk",
-                                record.id,
-                                last_chunk_len,
-                                params.min_chunk_length,
-                            )
-                            ends[-2] = seq_len
-                            starts.pop()
-                            ends.pop()
-
-                    for i, (start, end) in enumerate(zip(starts, ends), start=1):
-                        chunk_seq = record.seq[start:end]
-                        chunk_record = SeqRecord(
-                            chunk_seq,
-                            id=f"{record.id}_chunk_start_{start}",
-                            description=f"{record.description} (part {i})",
-                        )
-                        if writer.record_count > 0:
-                            writer.open_new_file()
-                        writer.write_record(chunk_record)
-
-                        if params.write_agp:
-                            writer.add_agp_entry(
-                                record.id,
-                                start + 1,
-                                end,
-                                i,
-                                chunk_record.id,
-                                len(chunk_seq),
-                            )
-                else:
-                    logging.warning(
-                        "Record '%s' length %d exceeds max_seq_length_per_file %d but chunking not enabled",
-                        record.id,
-                        seq_len,
-                        max_seq_len,
-                    )
-                    if writer.record_count > 0:
-                        writer.open_new_file()
-                    writer.write_record(record)
-                    if params.write_agp:
-                        writer.add_agp_entry(
-                            record.id, 1, seq_len, 1, record.id, seq_len
-                        )
-    except Exception:
-        logging.exception("Error processing FASTA file '%s'", params.fasta_file)
-        raise
-    finally:
-        writer.close()
-
-    if params.delete_original_file:
-        try:
-            params.fasta_file.unlink(missing_ok=True)
-        except Exception:
-            logging.warning(
-                "Failed to delete original FASTA file '%s'",
-                params.fasta_file,
-                exc_info=True,
-            )
-
-
-def parse_args(argv: Optional[List[str]] = None) -> Params:
-    """
-    Parse CLI arguments and return a validated Params object.
-
-    Args:
-        argv: Optional argument list for testing. If None, uses sys.argv.
-
-    Returns:
-        A validated Params instance.
-    """
-    defaults = _get_param_defaults()
-    parser = ArgumentParser(
-        description="Split a FASTA file into multiple FASTA files, optionally chunking long sequences."
-    )
-    parser.add_argument(
-        "--fasta-file",
-        type=Path,
-        metavar="FASTA",
-        required=True,
-        help="Input raw or compressed FASTA file containing sequences to split",
-    )
-    parser.add_argument(
-        "--out-dir",
-        metavar="DIR",
-        type=Path,
-        help="Top-level output directory (default: input FASTA directory)",
-    )
-    parser.add_argument(
-        "--write-agp",
-        action="store_true",
-        help=f"Write AGP file describing the splits (default: {defaults['write_agp']})",
-    )
-    parser.add_argument(
-        "--max-seqs-per-file",
-        metavar="N",
-        type=int,
-        help=f"Max records per output file (default: {defaults['max_seqs_per_file']})",
-    )
-    parser.add_argument(
-        "--max-seq-length-per-file",
-        type=int,
-        metavar="BP",
-        help=f"Max cumulative sequence length per output file (default: {defaults['max_seq_length_per_file']})",
-    )
-    parser.add_argument(
-        "--min-chunk-length",
-        type=int,
-        metavar="BP",
-        help=f"Minimum length of a chunk allowed as a remainder (default: {defaults['min_chunk_length']})",
-    )
-    parser.add_argument(
-        "--max-files-per-directory",
-        type=int,
-        metavar="N",
-        help=f"Max files per directory before moving to next computed dir (default: {defaults['max_files_per_directory']})",
-    )
-    parser.add_argument(
-        "--max-dirs-per-directory",
-        type=int,
-        metavar="N",
-        help=f"Max subdirectories per directory level (default: {defaults['max_dirs_per_directory']})",
-    )
-    parser.add_argument(
-        "--delete-existing-files",
-        action="store_true",
-        help=f"Delete existing files within computed output dirs (default: {defaults['delete_existing_files']})",
-    )
-    parser.add_argument(
-        "--unique-file-names",
-        action="store_true",
-        help=f"Make output file names unique across dirs by including dir_index (default: {defaults['unique_file_names']})",
-    )
-    parser.add_argument(
-        "--delete-original-file",
-        action="store_true",
-        help=f"Delete original input FASTA after splitting (default: {defaults['delete_original_file']})",
-    )
-    parser.add_argument(
-        "--force-max-seq-length",
-        action="store_true",
-        help=f"Chunk single sequences longer than max-seq-length-per-file (default: {defaults['force_max_seq_length']})",
-    )
-
-    args = parser.parse_args(argv)
-    init_logging_with_args(args)
-
-    params = Params(
-        fasta_file=args.fasta_file,
-        out_dir=args.out_dir,
-        write_agp=args.write_agp,
-        max_seqs_per_file=args.max_seqs_per_file,
-        max_seq_length_per_file=args.max_seq_length_per_file,
-        min_chunk_length=args.min_chunk_length,
-        max_files_per_directory=args.max_files_per_directory,
-        max_dirs_per_directory=args.max_dirs_per_directory,
-        delete_existing_files=args.delete_existing_files,
-        unique_file_names=args.unique_file_names,
-        delete_original_file=args.delete_original_file,
-        force_max_seq_length=args.force_max_seq_length,
-    )
-    return params
-
-
-def main(argv: Optional[List[str]] = None) -> None:
-    try:
-        params = parse_args(argv)
-        split_fasta(params)
-    except Exception:
-        logging.exception("Error processing FASTA file '%s'", params.fasta_file)
-        raise
-
-
-if __name__ == "__main__":
-    main()

From ad779fd0c66073eb589054b155f7bc1ad80d45b3 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Tue, 3 Feb 2026 21:27:30 +0000
Subject: [PATCH 13/36] Update call to splitting script

---
 modules/ensembl/fasta/splitfasta/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ensembl/fasta/splitfasta/main.nf b/modules/ensembl/fasta/splitfasta/main.nf
index 8871477..1d11362 100644
--- a/modules/ensembl/fasta/splitfasta/main.nf
+++ b/modules/ensembl/fasta/splitfasta/main.nf
@@ -71,7 +71,7 @@ process FASTA_SPLITFASTA {
 
         """
         python \\
-            ${moduleDir}/split_fasta.py \\
+            fasta_split \\
             --fasta-file \$PWD/${fasta} \\
             --out-dir \$PWD \\
             ${args.join(' ')}

From 1934c1f692430cf906d5a3b7d3d0ed8e108529b2 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Tue, 10 Feb 2026 15:18:33 +0000
Subject: [PATCH 14/36] Add FASTA recombination tests

---
 .../{splitfasta => recombine}/environment.yml |   2 +-
 modules/ensembl/fasta/recombine/main.nf       |  78 ++++++++++
 .../recombine/tests/data/agp/output/test.fa   |   2 +
 .../recombine/tests/data/agp/splits/part1.fa  |   2 +
 .../recombine/tests/data/agp/splits/part2.fa  |   2 +
 .../fasta/recombine/tests/data/agp/test.agp   |   3 +
 .../tests/data/custom_regex/output/test.fa    |   2 +
 .../tests/data/custom_regex/splits/seq1_1.fa  |   2 +
 .../tests/data/custom_regex/splits/seq1_5.fa  |   2 +
 .../tests/data/extra_suffix/output/test.fa    |   2 +
 .../splits/seq1_chunk_start_1.fsa             |   2 +
 .../splits/seq1_chunk_start_5.fsa             |   2 +
 .../tests/data/header/output/test.fa          |   4 +
 .../data/header/splits/seq1_chunk_start_1.fa  |   2 +
 .../data/header/splits/seq1_chunk_start_5.fa  |   2 +
 .../tests/data/header/splits/seq2.fa          |   2 +
 .../fasta/recombine/tests/main.nf.test        | 140 ++++++++++++++++++
 .../fasta/recombine/tests/main.nf.test.snap   | 104 +++++++++++++
 modules/ensembl/fasta/split/environment.yml   |   7 +
 .../fasta/{splitfasta => split}/main.nf       |  21 ++-
 .../tests/data/agp/test.agp                   |   0
 .../tests/data/real/in.fa                     |   0
 .../tests/data/splits/default/0/test.1.fa     |   0
 .../tests/data/splits/default/0/test.2.fa     |   0
 .../tests/data/splits/multi_dir/0/0/test.1.fa |   0
 .../tests/data/splits/multi_dir/0/1/test.2.fa |   0
 .../tests/data/splits/unique/0/test.0.1.fa    |   0
 .../tests/data/splits/unique/0/test.0.2.fa    |   0
 .../{splitfasta => split}/tests/main.nf.test  |  10 +-
 .../tests/main.nf.test.snap                   |   0
 30 files changed, 376 insertions(+), 17 deletions(-)
 rename modules/ensembl/fasta/{splitfasta => recombine}/environment.yml (76%)
 create mode 100644 modules/ensembl/fasta/recombine/main.nf
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/agp/output/test.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/agp/splits/part1.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/agp/splits/part2.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/agp/test.agp
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/custom_regex/output/test.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_1.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_5.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/extra_suffix/output/test.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_1.fsa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_5.fsa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/header/output/test.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_1.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_5.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/header/splits/seq2.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/main.nf.test
 create mode 100644 modules/ensembl/fasta/recombine/tests/main.nf.test.snap
 create mode 100644 modules/ensembl/fasta/split/environment.yml
 rename modules/ensembl/fasta/{splitfasta => split}/main.nf (86%)
 rename modules/ensembl/fasta/{splitfasta => split}/tests/data/agp/test.agp (100%)
 rename modules/ensembl/fasta/{splitfasta => split}/tests/data/real/in.fa (100%)
 rename modules/ensembl/fasta/{splitfasta => split}/tests/data/splits/default/0/test.1.fa (100%)
 rename modules/ensembl/fasta/{splitfasta => split}/tests/data/splits/default/0/test.2.fa (100%)
 rename modules/ensembl/fasta/{splitfasta => split}/tests/data/splits/multi_dir/0/0/test.1.fa (100%)
 rename modules/ensembl/fasta/{splitfasta => split}/tests/data/splits/multi_dir/0/1/test.2.fa (100%)
 rename modules/ensembl/fasta/{splitfasta => split}/tests/data/splits/unique/0/test.0.1.fa (100%)
 rename modules/ensembl/fasta/{splitfasta => split}/tests/data/splits/unique/0/test.0.2.fa (100%)
 rename modules/ensembl/fasta/{splitfasta => split}/tests/main.nf.test (97%)
 rename modules/ensembl/fasta/{splitfasta => split}/tests/main.nf.test.snap (100%)

diff --git a/modules/ensembl/fasta/splitfasta/environment.yml b/modules/ensembl/fasta/recombine/environment.yml
similarity index 76%
rename from modules/ensembl/fasta/splitfasta/environment.yml
rename to modules/ensembl/fasta/recombine/environment.yml
index 2d01414..52b218c 100644
--- a/modules/ensembl/fasta/splitfasta/environment.yml
+++ b/modules/ensembl/fasta/recombine/environment.yml
@@ -1,5 +1,5 @@
 ---
-name: "fasta_splitfasta"
+name: "fasta_recombine"
 channels:
   - conda-forge
   - bioconda
diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
new file mode 100644
index 0000000..064558c
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -0,0 +1,78 @@
+// See the NOTICE file distributed with this work for additional information
+// regarding copyright ownership.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+process FASTA_RECOMBINE {
+
+    tag "${meta.id}"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "ensemblorg/ensembl-genomio:v1.6.1"
+
+    publishDir "${params.outdir ?: '.'}", mode: 'copy'
+
+    input:
+        tuple val(meta), path(fasta_dir), path(agp)
+
+    output:
+        tuple val(meta), path("*.fa"), emit: fasta
+
+    script:
+        def args = []
+
+        if (params.extra_suffixes) {
+            args << "--extra-suffixes ${params.extra_suffixes}"
+        }
+
+        if (params.chunk_id_regex) {
+            args << "--chunk-id-regex ${params.chunk_id_regex}"
+        }
+
+        if (params.allow_revcomp) {
+            args << "--allow-revcomp"
+        }
+
+        if (agp) {
+            args << "--agp-file '${agp}'"
+        }
+
+        def out_fasta = "${meta.id}.fa"
+
+        """
+        fasta_recombine \\
+            --in-dir ${fasta_dir} \\
+            --out-fasta ${out_fasta} \\
+            ${args.join(' ')}
+        """
+
+    stub:
+        """
+        set -euo pipefail
+
+        test_data_dir="${moduleDir}/tests/data"
+
+        out_fasta="${meta.id}.fa"
+
+        mode="header"
+        if [[ -n "${agp ?: ''}" ]]; then
+            MODE="agp"
+        fi
+
+        cp "\$test_data_dir/\$mode/output/${meta.id}.fa" "\$OUT_FASTA"
+        
+        """
+
+        
+}
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/output/test.fa b/modules/ensembl/fasta/recombine/tests/data/agp/output/test.fa
new file mode 100644
index 0000000..b53532e
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/agp/output/test.fa
@@ -0,0 +1,2 @@
+>seq1
+AAAAAACCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/splits/part1.fa b/modules/ensembl/fasta/recombine/tests/data/agp/splits/part1.fa
new file mode 100644
index 0000000..dafb755
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/agp/splits/part1.fa
@@ -0,0 +1,2 @@
+>part1
+AAAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/splits/part2.fa b/modules/ensembl/fasta/recombine/tests/data/agp/splits/part2.fa
new file mode 100644
index 0000000..0fc377e
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/agp/splits/part2.fa
@@ -0,0 +1,2 @@
+>part2
+CCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/test.agp b/modules/ensembl/fasta/recombine/tests/data/agp/test.agp
new file mode 100644
index 0000000..a73c8db
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/agp/test.agp
@@ -0,0 +1,3 @@
+##agp-version 2.0
+seq1	1	6	1	W	part1	1	6	+
+seq1	7	12	2	W	part2	1	6	+
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/custom_regex/output/test.fa b/modules/ensembl/fasta/recombine/tests/data/custom_regex/output/test.fa
new file mode 100644
index 0000000..46d11a6
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/custom_regex/output/test.fa
@@ -0,0 +1,2 @@
+>seq1
+CCCCGGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_1.fa b/modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_1.fa
new file mode 100644
index 0000000..0af2767
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_1.fa
@@ -0,0 +1,2 @@
+>seqY_1
+CCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_5.fa b/modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_5.fa
new file mode 100644
index 0000000..c722026
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_5.fa
@@ -0,0 +1,2 @@
+>seqY_5
+GGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/extra_suffix/output/test.fa b/modules/ensembl/fasta/recombine/tests/data/extra_suffix/output/test.fa
new file mode 100644
index 0000000..121d453
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/extra_suffix/output/test.fa
@@ -0,0 +1,2 @@
+>seq1
+TTTTAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_1.fsa b/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_1.fsa
new file mode 100644
index 0000000..17d88e1
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_1.fsa
@@ -0,0 +1,2 @@
+>seq1_chunk_start_1
+AAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_5.fsa b/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_5.fsa
new file mode 100644
index 0000000..b6646f2
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_5.fsa
@@ -0,0 +1,2 @@
+>seq1_chunk_start_5
+CCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/output/test.fa b/modules/ensembl/fasta/recombine/tests/data/header/output/test.fa
new file mode 100644
index 0000000..d3bbb3d
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/header/output/test.fa
@@ -0,0 +1,4 @@
+>seq1
+AAAACCCC
+>seq2
+GGGGTT
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_1.fa b/modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_1.fa
new file mode 100644
index 0000000..17d88e1
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_1.fa
@@ -0,0 +1,2 @@
+>seq1_chunk_start_1
+AAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_5.fa b/modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_5.fa
new file mode 100644
index 0000000..b6646f2
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_5.fa
@@ -0,0 +1,2 @@
+>seq1_chunk_start_5
+CCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/splits/seq2.fa b/modules/ensembl/fasta/recombine/tests/data/header/splits/seq2.fa
new file mode 100644
index 0000000..70d86fb
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/header/splits/seq2.fa
@@ -0,0 +1,2 @@
+>seq2
+GGGGTT
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test b/modules/ensembl/fasta/recombine/tests/main.nf.test
new file mode 100644
index 0000000..b965d38
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test
@@ -0,0 +1,140 @@
+// See the NOTICE file distributed with this work for additional information
+// regarding copyright ownership.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// nf-core modules test fasta/recombine
+nextflow_process {
+
+    name "Test Process FASTA_RECOMBINE"
+    script "../main.nf"
+    process "FASTA_RECOMBINE"
+
+    tag "modules"
+    tag "modules_ensembl"
+    tag "fasta"
+    tag "fasta/recombine"
+
+
+    test("Stub outputs: header mode") {
+
+        when {
+            options "-stub"
+
+            process {
+                """
+                input[0] = [[ id:'test' ], file('${moduleDir}/tests/data/header/splits'), []]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+        }
+    }
+
+
+    test("Stub outputs: AGP mode") {
+
+        when {
+            options "-stub"
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/agp/splits'),
+                            file('${moduleDir}/tests/data/agp/test.agp')]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+        }
+    }
+
+
+    test("Real run: header recombination") {
+
+        when {
+            process {
+                """
+                input[0] = [[ id:'test' ], file('${moduleDir}/tests/data/header/splits'), []]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+        }
+    }
+
+
+    test("Real run: AGP recombination") {
+
+        when {
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/agp/splits'),
+                            file('${moduleDir}/tests/data/agp/test.agp')]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+        }
+    }
+
+
+    test("Real run: extra suffix support") {
+
+        when {
+            params.extra_suffixes = ".fsa"
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/extra_suffix/splits'),
+                            []]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+        }
+    }
+
+
+    test("Real run: custom chunk regex") {
+
+        when {
+            params.chunk_id_regex = '^(?P<base>.+)_(?P<start>\\d+)$'
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/custom_regex/splits'),
+                            []]
+                """
+            }
+        }
+
+        then {
+            assert snapshot(process.out).match()
+        }
+    }
+}
+
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
new file mode 100644
index 0000000..bf1e160
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
@@ -0,0 +1,104 @@
+{
+    "Stub outputs: AGP mode": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "fasta": [
+                    
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-02-10T15:17:42.590604"
+    },
+    "Real run: header recombination": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "fasta": [
+                    
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-02-10T15:17:44.398141"
+    },
+    "Real run: AGP recombination": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "fasta": [
+                    
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-02-10T15:17:46.212088"
+    },
+    "Stub outputs: header mode": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "fasta": [
+                    
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-02-10T15:17:40.786056"
+    },
+    "Real run: extra suffix support": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "fasta": [
+                    
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-02-10T15:17:47.991813"
+    },
+    "Real run: custom chunk regex": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "fasta": [
+                    
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-02-10T15:17:49.822476"
+    }
+}
\ No newline at end of file
diff --git a/modules/ensembl/fasta/split/environment.yml b/modules/ensembl/fasta/split/environment.yml
new file mode 100644
index 0000000..208dc35
--- /dev/null
+++ b/modules/ensembl/fasta/split/environment.yml
@@ -0,0 +1,7 @@
+---
+name: "fasta_split"
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - ensembl-genomio=1.6.1
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/main.nf b/modules/ensembl/fasta/split/main.nf
similarity index 86%
rename from modules/ensembl/fasta/splitfasta/main.nf
rename to modules/ensembl/fasta/split/main.nf
index 1d11362..fd53d9f 100644
--- a/modules/ensembl/fasta/splitfasta/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -13,10 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-process FASTA_SPLITFASTA {
+process FASTA_SPLIT {
 
     tag "${meta.id}"
-    label 'process_low'
+    label 'process_medium'
 
     conda "${moduleDir}/environment.yml"
     container "ensemblorg/ensembl-genomio:v1.6.1"
@@ -70,9 +70,8 @@ process FASTA_SPLITFASTA {
         }
 
         """
-        python \\
-            fasta_split \\
-            --fasta-file \$PWD/${fasta} \\
+        fasta_split \\
+            --fasta-file ${fasta} \\
             --out-dir \$PWD \\
             ${args.join(' ')}
         """
@@ -81,20 +80,20 @@ process FASTA_SPLITFASTA {
         """
         set -euo pipefail
 
-        FIXTURE_DIR="${moduleDir}/tests/data"
+        test_data_dir="${moduleDir}/tests/data"
 
-        LAYOUT="default"
+        layout="default"
         if [[ "${params.unique_file_names ?: false}" == "true" ]]; then
-            LAYOUT="unique"
+            layout="unique"
         elif [[ -n "${params.max_dirs_per_directory ?: ''}" || -n "${params.max_files_per_directory ?: ''}" ]]; then
-        LAYOUT="multi_dir"
+            layout="multi_dir"
         fi
 
         mkdir -p splits
-        cp -R "\$FIXTURE_DIR/splits/\$LAYOUT/." "splits/"
+        cp -R "\$test_data_dir/splits/\$layout/." "splits/"
 
         if [[ "${params.write_agp ?: false}" == "true" ]]; then
-            cp "\$FIXTURE_DIR/agp/test.agp" "${meta.id}.agp"
+            cp "\$test_data_dir/agp/test.agp" "${meta.id}.agp"
         fi
         """
 
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp b/modules/ensembl/fasta/split/tests/data/agp/test.agp
similarity index 100%
rename from modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp
rename to modules/ensembl/fasta/split/tests/data/agp/test.agp
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/real/in.fa b/modules/ensembl/fasta/split/tests/data/real/in.fa
similarity index 100%
rename from modules/ensembl/fasta/splitfasta/tests/data/real/in.fa
rename to modules/ensembl/fasta/split/tests/data/real/in.fa
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa b/modules/ensembl/fasta/split/tests/data/splits/default/0/test.1.fa
similarity index 100%
rename from modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa
rename to modules/ensembl/fasta/split/tests/data/splits/default/0/test.1.fa
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa b/modules/ensembl/fasta/split/tests/data/splits/default/0/test.2.fa
similarity index 100%
rename from modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa
rename to modules/ensembl/fasta/split/tests/data/splits/default/0/test.2.fa
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa b/modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/0/test.1.fa
similarity index 100%
rename from modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa
rename to modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/0/test.1.fa
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa b/modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/1/test.2.fa
similarity index 100%
rename from modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa
rename to modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/1/test.2.fa
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa b/modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.1.fa
similarity index 100%
rename from modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa
rename to modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.1.fa
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa b/modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.2.fa
similarity index 100%
rename from modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa
rename to modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.2.fa
diff --git a/modules/ensembl/fasta/splitfasta/tests/main.nf.test b/modules/ensembl/fasta/split/tests/main.nf.test
similarity index 97%
rename from modules/ensembl/fasta/splitfasta/tests/main.nf.test
rename to modules/ensembl/fasta/split/tests/main.nf.test
index c23c0cd..cf4206f 100644
--- a/modules/ensembl/fasta/splitfasta/tests/main.nf.test
+++ b/modules/ensembl/fasta/split/tests/main.nf.test
@@ -13,20 +13,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// nf-core modules test fasta/splitfasta
+// nf-core modules test fasta/split
 nextflow_process {
 
-    name "Test Process FASTA_SPLITFASTA"
+    name "Test Process FASTA_SPLIT"
     script "../main.nf"
-    process "FASTA_SPLITFASTA"
+    process "FASTA_SPLIT"
 
     tag "modules"
     tag "modules_ensembl"
     tag "fasta"
-    tag "fasta/splitfasta"
+    tag "fasta/split"
 
 
-    def real_fa = new File("modules/ensembl/fasta/splitfasta/tests/data/real/in.fa").canonicalFile
+    def real_fa = new File("modules/ensembl/fasta/split/tests/data/real/in.fa").canonicalFile
 
     test("Stub outputs: default layout, no AGP") {
 
diff --git a/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap b/modules/ensembl/fasta/split/tests/main.nf.test.snap
similarity index 100%
rename from modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
rename to modules/ensembl/fasta/split/tests/main.nf.test.snap

From 225b68abc6a0fc07398ddc77264e61c800179b3e Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Thu, 12 Feb 2026 13:46:37 +0000
Subject: [PATCH 15/36] Refactor for manifest input to recombine module

---
 modules/ensembl/fasta/recombine/main.nf       | 16 ++++++-------
 .../data/agp/{splits => inputs}/part1.fa      |  0
 .../data/agp/{splits => inputs}/part2.fa      |  0
 .../recombine/tests/data/agp/manifest.txt     |  2 ++
 .../custom_regex/{splits => inputs}/seq1_1.fa |  0
 .../custom_regex/{splits => inputs}/seq1_5.fa |  0
 .../tests/data/custom_regex/manifest.txt      |  2 ++
 .../tests/data/extra_suffix/output/test.fa    |  2 --
 .../splits/seq1_chunk_start_1.fsa             |  2 --
 .../splits/seq1_chunk_start_5.fsa             |  2 --
 .../data/header/{output => inputs}/test.fa    |  0
 .../recombine/tests/data/header/manifest.txt  |  1 +
 .../tests/data/order/inputs/01_second.fa      |  2 ++
 .../tests/data/order/inputs/02_first.fa       |  2 ++
 .../recombine/tests/data/order/manifest.txt   |  2 ++
 .../recombine/tests/data/order/output/test.fa |  4 ++++
 .../fasta/recombine/tests/main.nf.test        | 23 +++++++++----------
 .../fasta/recombine/tests/main.nf.test.snap   |  8 +++----
 18 files changed, 37 insertions(+), 31 deletions(-)
 rename modules/ensembl/fasta/recombine/tests/data/agp/{splits => inputs}/part1.fa (100%)
 rename modules/ensembl/fasta/recombine/tests/data/agp/{splits => inputs}/part2.fa (100%)
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/agp/manifest.txt
 rename modules/ensembl/fasta/recombine/tests/data/custom_regex/{splits => inputs}/seq1_1.fa (100%)
 rename modules/ensembl/fasta/recombine/tests/data/custom_regex/{splits => inputs}/seq1_5.fa (100%)
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/custom_regex/manifest.txt
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/extra_suffix/output/test.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_1.fsa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_5.fsa
 rename modules/ensembl/fasta/recombine/tests/data/header/{output => inputs}/test.fa (100%)
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/header/manifest.txt
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/order/inputs/01_second.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/order/inputs/02_first.fa
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/order/manifest.txt
 create mode 100644 modules/ensembl/fasta/recombine/tests/data/order/output/test.fa

diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 064558c..50a7560 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -24,7 +24,7 @@ process FASTA_RECOMBINE {
     publishDir "${params.outdir ?: '.'}", mode: 'copy'
 
     input:
-        tuple val(meta), path(fasta_dir), path(agp)
+        tuple val(meta), path(fasta_manifest), path(agp)
 
     output:
         tuple val(meta), path("*.fa"), emit: fasta
@@ -32,10 +32,6 @@ process FASTA_RECOMBINE {
     script:
         def args = []
 
-        if (params.extra_suffixes) {
-            args << "--extra-suffixes ${params.extra_suffixes}"
-        }
-
         if (params.chunk_id_regex) {
             args << "--chunk-id-regex ${params.chunk_id_regex}"
         }
@@ -45,14 +41,14 @@ process FASTA_RECOMBINE {
         }
 
         if (agp) {
-            args << "--agp-file '${agp}'"
+            args << "--agp-file ${agp}"
         }
 
         def out_fasta = "${meta.id}.fa"
 
         """
         fasta_recombine \\
-            --in-dir ${fasta_dir} \\
+            --fasta-manifest ${fasta_manifest} \\
             --out-fasta ${out_fasta} \\
             ${args.join(' ')}
         """
@@ -65,12 +61,14 @@ process FASTA_RECOMBINE {
 
         out_fasta="${meta.id}.fa"
 
+        test -s "${fasta_manifest}"
+
         mode="header"
         if [[ -n "${agp ?: ''}" ]]; then
-            MODE="agp"
+            mode="agp"
         fi
 
-        cp "\$test_data_dir/\$mode/output/${meta.id}.fa" "\$OUT_FASTA"
+        cp "\$test_data_dir/\$mode/output/${meta.id}.fa" "\$out_fasta"
         
         """
 
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/splits/part1.fa b/modules/ensembl/fasta/recombine/tests/data/agp/inputs/part1.fa
similarity index 100%
rename from modules/ensembl/fasta/recombine/tests/data/agp/splits/part1.fa
rename to modules/ensembl/fasta/recombine/tests/data/agp/inputs/part1.fa
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/splits/part2.fa b/modules/ensembl/fasta/recombine/tests/data/agp/inputs/part2.fa
similarity index 100%
rename from modules/ensembl/fasta/recombine/tests/data/agp/splits/part2.fa
rename to modules/ensembl/fasta/recombine/tests/data/agp/inputs/part2.fa
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/manifest.txt b/modules/ensembl/fasta/recombine/tests/data/agp/manifest.txt
new file mode 100644
index 0000000..b128cbe
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/agp/manifest.txt
@@ -0,0 +1,2 @@
+inputs/part1.fa
+inputs/part2.fa
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_1.fa b/modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_1.fa
similarity index 100%
rename from modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_1.fa
rename to modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_1.fa
diff --git a/modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_5.fa b/modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_5.fa
similarity index 100%
rename from modules/ensembl/fasta/recombine/tests/data/custom_regex/splits/seq1_5.fa
rename to modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_5.fa
diff --git a/modules/ensembl/fasta/recombine/tests/data/custom_regex/manifest.txt b/modules/ensembl/fasta/recombine/tests/data/custom_regex/manifest.txt
new file mode 100644
index 0000000..a125950
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/custom_regex/manifest.txt
@@ -0,0 +1,2 @@
+inputs/seq1_1.fa
+inputs/seq1_5.fa
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/extra_suffix/output/test.fa b/modules/ensembl/fasta/recombine/tests/data/extra_suffix/output/test.fa
deleted file mode 100644
index 121d453..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/extra_suffix/output/test.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1
-TTTTAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_1.fsa b/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_1.fsa
deleted file mode 100644
index 17d88e1..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_1.fsa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1_chunk_start_1
-AAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_5.fsa b/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_5.fsa
deleted file mode 100644
index b6646f2..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/extra_suffix/splits/seq1_chunk_start_5.fsa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1_chunk_start_5
-CCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/output/test.fa b/modules/ensembl/fasta/recombine/tests/data/header/inputs/test.fa
similarity index 100%
rename from modules/ensembl/fasta/recombine/tests/data/header/output/test.fa
rename to modules/ensembl/fasta/recombine/tests/data/header/inputs/test.fa
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/manifest.txt b/modules/ensembl/fasta/recombine/tests/data/header/manifest.txt
new file mode 100644
index 0000000..ee698b4
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/header/manifest.txt
@@ -0,0 +1 @@
+inputs/test.fa
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/order/inputs/01_second.fa b/modules/ensembl/fasta/recombine/tests/data/order/inputs/01_second.fa
new file mode 100644
index 0000000..d06c158
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/order/inputs/01_second.fa
@@ -0,0 +1,2 @@
+>second second_record
+TTTT
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/order/inputs/02_first.fa b/modules/ensembl/fasta/recombine/tests/data/order/inputs/02_first.fa
new file mode 100644
index 0000000..1e20e1f
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/order/inputs/02_first.fa
@@ -0,0 +1,2 @@
+>first first_record
+AAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/order/manifest.txt b/modules/ensembl/fasta/recombine/tests/data/order/manifest.txt
new file mode 100644
index 0000000..dae8a10
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/order/manifest.txt
@@ -0,0 +1,2 @@
+inputs/02_first.fa
+inputs/01_second.fa
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/order/output/test.fa b/modules/ensembl/fasta/recombine/tests/data/order/output/test.fa
new file mode 100644
index 0000000..b3b6d1e
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/tests/data/order/output/test.fa
@@ -0,0 +1,4 @@
+>first first_record
+AAAA
+>second second_record
+TTTT
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test b/modules/ensembl/fasta/recombine/tests/main.nf.test
index b965d38..2e06993 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test
@@ -33,7 +33,8 @@ nextflow_process {
 
             process {
                 """
-                input[0] = [[ id:'test' ], file('${moduleDir}/tests/data/header/splits'), []]
+                input[0] = [[ id:'test' ], file('${moduleDir}/tests/data/header/manifest.txt'), []]
+
                 """
             }
         }
@@ -52,7 +53,7 @@ nextflow_process {
             process {
                 """
                 input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/agp/splits'),
+                            file('${moduleDir}/tests/data/agp/manifest.txt'),
                             file('${moduleDir}/tests/data/agp/test.agp')]
                 """
             }
@@ -69,7 +70,7 @@ nextflow_process {
         when {
             process {
                 """
-                input[0] = [[ id:'test' ], file('${moduleDir}/tests/data/header/splits'), []]
+                input[0] = [[ id:'test' ], file('${moduleDir}/tests/data/header/manifest.txt'), []]
                 """
             }
         }
@@ -86,7 +87,7 @@ nextflow_process {
             process {
                 """
                 input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/agp/splits'),
+                            file('${moduleDir}/tests/data/agp/manifest.txt'),
                             file('${moduleDir}/tests/data/agp/test.agp')]
                 """
             }
@@ -98,15 +99,15 @@ nextflow_process {
     }
 
 
-    test("Real run: extra suffix support") {
+    test("Real run: custom chunk regex") {
 
         when {
-            params.extra_suffixes = ".fsa"
+            params.chunk_id_regex = '^(?P<base>.+)_(?P<start>\\d+)$'
 
             process {
                 """
                 input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/extra_suffix/splits'),
+                            file('${moduleDir}/tests/data/custom_regex/manifest.txt'),
                             []]
                 """
             }
@@ -118,15 +119,13 @@ nextflow_process {
     }
 
 
-    test("Real run: custom chunk regex") {
+    test("Real run: manifest order is preserved") {
 
         when {
-            params.chunk_id_regex = '^(?P<base>.+)_(?P<start>\\d+)$'
-
             process {
                 """
                 input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/custom_regex/splits'),
+                            file('${moduleDir}/tests/data/order/manifest.txt'),
                             []]
                 """
             }
@@ -135,6 +134,6 @@ nextflow_process {
         then {
             assert snapshot(process.out).match()
         }
-    }
+}
 }
 
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
index bf1e160..75786c9 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
@@ -67,7 +67,7 @@
         },
         "timestamp": "2026-02-10T15:17:40.786056"
     },
-    "Real run: extra suffix support": {
+    "Real run: custom chunk regex": {
         "content": [
             {
                 "0": [
@@ -82,9 +82,9 @@
             "nf-test": "0.9.3",
             "nextflow": "25.10.3"
         },
-        "timestamp": "2026-02-10T15:17:47.991813"
+        "timestamp": "2026-02-10T15:17:49.822476"
     },
-    "Real run: custom chunk regex": {
+    "Real run: manifest order is preserved": {
         "content": [
             {
                 "0": [
@@ -99,6 +99,6 @@
             "nf-test": "0.9.3",
             "nextflow": "25.10.3"
         },
-        "timestamp": "2026-02-10T15:17:49.822476"
+        "timestamp": "2026-02-12T13:45:22.67052"
     }
 }
\ No newline at end of file

From 40ed5239e0e809933763fd4fb12900bb06f96a6f Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 13 Feb 2026 15:39:24 +0000
Subject: [PATCH 16/36] Various fixes

---
 .../ensembl/fasta/recombine/assets/NO_FILE    |   0
 modules/ensembl/fasta/recombine/main.nf       |  13 +-
 .../{splits => inputs}/seq1_chunk_start_1.fa  |   0
 .../{splits => inputs}/seq1_chunk_start_5.fa  |   0
 .../data/header/{splits => inputs}/seq2.fa    |   0
 .../recombine/tests/data/header/manifest.txt  |   4 +-
 .../data/header/{inputs => output}/test.fa    |   0
 .../fasta/recombine/tests/main.nf.test        |  32 ++++-
 .../fasta/recombine/tests/main.nf.test.snap   | 124 +++++++++++++-----
 modules/ensembl/fasta/split/main.nf           |  10 +-
 .../fasta/split/tests/main.nf.test.snap       |  32 ++---
 11 files changed, 150 insertions(+), 65 deletions(-)
 create mode 100644 modules/ensembl/fasta/recombine/assets/NO_FILE
 rename modules/ensembl/fasta/recombine/tests/data/header/{splits => inputs}/seq1_chunk_start_1.fa (100%)
 rename modules/ensembl/fasta/recombine/tests/data/header/{splits => inputs}/seq1_chunk_start_5.fa (100%)
 rename modules/ensembl/fasta/recombine/tests/data/header/{splits => inputs}/seq2.fa (100%)
 rename modules/ensembl/fasta/recombine/tests/data/header/{inputs => output}/test.fa (100%)

diff --git a/modules/ensembl/fasta/recombine/assets/NO_FILE b/modules/ensembl/fasta/recombine/assets/NO_FILE
new file mode 100644
index 0000000..e69de29
diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 50a7560..4e8e13a 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -21,8 +21,6 @@ process FASTA_RECOMBINE {
     conda "${moduleDir}/environment.yml"
     container "ensemblorg/ensembl-genomio:v1.6.1"
 
-    publishDir "${params.outdir ?: '.'}", mode: 'copy'
-
     input:
         tuple val(meta), path(fasta_manifest), path(agp)
 
@@ -33,14 +31,16 @@ process FASTA_RECOMBINE {
         def args = []
 
         if (params.chunk_id_regex) {
-            args << "--chunk-id-regex ${params.chunk_id_regex}"
+            def rx = params.chunk_id_regex.replace("'", "'\"'\"'")
+            args << "--chunk-id-regex '${rx}'"
         }
 
         if (params.allow_revcomp) {
             args << "--allow-revcomp"
         }
 
-        if (agp) {
+        def has_agp = agp && agp.baseName != 'NO_FILE'
+        if (has_agp) {
             args << "--agp-file ${agp}"
         }
 
@@ -64,10 +64,13 @@ process FASTA_RECOMBINE {
         test -s "${fasta_manifest}"
 
         mode="header"
-        if [[ -n "${agp ?: ''}" ]]; then
+        agp_path="${agp}"
+        agp_name="\${agp_path##*/}"
+        if [[ "\$agp_name" != "NO_FILE" ]]; then
             mode="agp"
         fi
 
+
         cp "\$test_data_dir/\$mode/output/${meta.id}.fa" "\$out_fasta"
         
         """
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_1.fa b/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_1.fa
similarity index 100%
rename from modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_1.fa
rename to modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_1.fa
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_5.fa b/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_5.fa
similarity index 100%
rename from modules/ensembl/fasta/recombine/tests/data/header/splits/seq1_chunk_start_5.fa
rename to modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_5.fa
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/splits/seq2.fa b/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq2.fa
similarity index 100%
rename from modules/ensembl/fasta/recombine/tests/data/header/splits/seq2.fa
rename to modules/ensembl/fasta/recombine/tests/data/header/inputs/seq2.fa
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/manifest.txt b/modules/ensembl/fasta/recombine/tests/data/header/manifest.txt
index ee698b4..a34084d 100644
--- a/modules/ensembl/fasta/recombine/tests/data/header/manifest.txt
+++ b/modules/ensembl/fasta/recombine/tests/data/header/manifest.txt
@@ -1 +1,3 @@
-inputs/test.fa
\ No newline at end of file
+inputs/seq1_chunk_start_1.fa
+inputs/seq1_chunk_start_5.fa
+inputs/seq2.fa
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/inputs/test.fa b/modules/ensembl/fasta/recombine/tests/data/header/output/test.fa
similarity index 100%
rename from modules/ensembl/fasta/recombine/tests/data/header/inputs/test.fa
rename to modules/ensembl/fasta/recombine/tests/data/header/output/test.fa
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test b/modules/ensembl/fasta/recombine/tests/main.nf.test
index 2e06993..ba4bfc6 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test
@@ -33,13 +33,18 @@ nextflow_process {
 
             process {
                 """
-                input[0] = [[ id:'test' ], file('${moduleDir}/tests/data/header/manifest.txt'), []]
+                input[0] = [[ id:'test' ],
+                        file('${moduleDir}/tests/data/header/manifest.txt'),
+                        file('${moduleDir}/assets/NO_FILE')]
 
                 """
             }
         }
 
         then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.fasta.size() == 1
+            assert process.success
             assert snapshot(process.out).match()
         }
     }
@@ -60,6 +65,9 @@ nextflow_process {
         }
 
         then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.fasta.size() == 1
+            assert process.success
             assert snapshot(process.out).match()
         }
     }
@@ -70,12 +78,17 @@ nextflow_process {
         when {
             process {
                 """
-                input[0] = [[ id:'test' ], file('${moduleDir}/tests/data/header/manifest.txt'), []]
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/header/manifest.txt'),
+                            file('${moduleDir}/assets/NO_FILE')]
                 """
             }
         }
 
         then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.fasta.size() == 1
+            assert process.success
             assert snapshot(process.out).match()
         }
     }
@@ -94,6 +107,9 @@ nextflow_process {
         }
 
         then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.fasta.size() == 1
+            assert process.success
             assert snapshot(process.out).match()
         }
     }
@@ -108,12 +124,15 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/custom_regex/manifest.txt'),
-                            []]
+                            file('${moduleDir}/assets/NO_FILE')]
                 """
             }
         }
 
         then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.fasta.size() == 1
+            assert process.success
             assert snapshot(process.out).match()
         }
     }
@@ -126,14 +145,17 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/order/manifest.txt'),
-                            []]
+                            file('${moduleDir}/assets/NO_FILE')]
                 """
             }
         }
 
         then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.fasta.size() == 1
+            assert process.success
             assert snapshot(process.out).match()
         }
-}
+    }
 }
 
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
index 75786c9..30b0258 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
@@ -3,102 +3,162 @@
         "content": [
             {
                 "0": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,3ec81eef9dd73dc86ff01621dbacc7a0"
+                    ]
                 ],
                 "fasta": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,3ec81eef9dd73dc86ff01621dbacc7a0"
+                    ]
                 ]
             }
         ],
+        "timestamp": "2026-02-13T15:20:39.92005",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.3"
-        },
-        "timestamp": "2026-02-10T15:17:42.590604"
+        }
     },
-    "Real run: header recombination": {
+    "Real run: AGP recombination": {
         "content": [
             {
                 "0": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,5f81df5939251499ea60e666d8a306b3"
+                    ]
                 ],
                 "fasta": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,5f81df5939251499ea60e666d8a306b3"
+                    ]
                 ]
             }
         ],
+        "timestamp": "2026-02-13T15:20:44.283073",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.3"
-        },
-        "timestamp": "2026-02-10T15:17:44.398141"
+        }
     },
-    "Real run: AGP recombination": {
+    "Real run: header recombination": {
         "content": [
             {
                 "0": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,709337303b43192f7647b77c170adac7"
+                    ]
                 ],
                 "fasta": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,709337303b43192f7647b77c170adac7"
+                    ]
                 ]
             }
         ],
+        "timestamp": "2026-02-13T15:23:27.996406",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.3"
-        },
-        "timestamp": "2026-02-10T15:17:46.212088"
+        }
     },
     "Stub outputs: header mode": {
         "content": [
             {
                 "0": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,93d1870d020e197708753501e57db68f"
+                    ]
                 ],
                 "fasta": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,93d1870d020e197708753501e57db68f"
+                    ]
                 ]
             }
         ],
+        "timestamp": "2026-02-13T15:20:37.864233",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.3"
-        },
-        "timestamp": "2026-02-10T15:17:40.786056"
+        }
     },
     "Real run: custom chunk regex": {
         "content": [
             {
                 "0": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,90d526c36d03ae9e226d09655f22f00e"
+                    ]
                 ],
                 "fasta": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,90d526c36d03ae9e226d09655f22f00e"
+                    ]
                 ]
             }
         ],
+        "timestamp": "2026-02-13T15:26:49.012219",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.3"
-        },
-        "timestamp": "2026-02-10T15:17:49.822476"
+        }
     },
     "Real run: manifest order is preserved": {
         "content": [
             {
                 "0": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,52fa2054da674f0a5ebc263e724cf4a4"
+                    ]
                 ],
                 "fasta": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.fa:md5,52fa2054da674f0a5ebc263e724cf4a4"
+                    ]
                 ]
             }
         ],
+        "timestamp": "2026-02-13T15:20:48.517972",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.3"
-        },
-        "timestamp": "2026-02-12T13:45:22.67052"
+        }
     }
 }
\ No newline at end of file
diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index fd53d9f..5d2a347 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -21,14 +21,12 @@ process FASTA_SPLIT {
     conda "${moduleDir}/environment.yml"
     container "ensemblorg/ensembl-genomio:v1.6.1"
 
-    publishDir "${params.outdir ?: '.'}", mode: 'copy'
-
     input:
         tuple val(meta), path(fasta)
 
     output:
-        tuple val(meta), path("**/*.fa"), emit: fasta
-        tuple val(meta), path("*.agp"), emit: agp, optional: true
+        tuple val(meta), path("splits/**/*.fa"), emit: fasta
+        tuple val(meta), path("splits/*.agp"), emit: agp, optional: true
 
     script:
         def args = []
@@ -72,7 +70,7 @@ process FASTA_SPLIT {
         """
         fasta_split \\
             --fasta-file ${fasta} \\
-            --out-dir \$PWD \\
+            --out-dir splits \\
             ${args.join(' ')}
         """
 
@@ -93,7 +91,7 @@ process FASTA_SPLIT {
         cp -R "\$test_data_dir/splits/\$layout/." "splits/"
 
         if [[ "${params.write_agp ?: false}" == "true" ]]; then
-            cp "\$test_data_dir/agp/test.agp" "${meta.id}.agp"
+            cp "\$test_data_dir/agp/test.agp" "splits/${meta.id}.agp"
         fi
         """
 
diff --git a/modules/ensembl/fasta/split/tests/main.nf.test.snap b/modules/ensembl/fasta/split/tests/main.nf.test.snap
index 7c44fbc..167ba7c 100644
--- a/modules/ensembl/fasta/split/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/split/tests/main.nf.test.snap
@@ -42,11 +42,11 @@
                 ]
             }
         ],
+        "timestamp": "2026-02-13T15:27:15.469156",
         "meta": {
-            "nf-test": "0.9.3",
-            "nextflow": "25.04.6"
-        },
-        "timestamp": "2026-02-03T11:44:20.723299027"
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
     },
     "Stub outputs: nested directory layout contract": {
         "content": [
@@ -81,11 +81,11 @@
                 ]
             }
         ],
+        "timestamp": "2026-02-13T15:27:19.735631",
         "meta": {
-            "nf-test": "0.9.3",
-            "nextflow": "25.04.6"
-        },
-        "timestamp": "2026-02-03T11:44:45.167257411"
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
     },
     "Stub outputs: default layout, no AGP": {
         "content": [
@@ -120,11 +120,11 @@
                 ]
             }
         ],
+        "timestamp": "2026-02-13T15:27:13.38194",
         "meta": {
-            "nf-test": "0.9.3",
-            "nextflow": "25.04.6"
-        },
-        "timestamp": "2026-02-03T11:44:08.447183258"
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
     },
     "Stub outputs: unique_file_names contract": {
         "content": [
@@ -159,10 +159,10 @@
                 ]
             }
         ],
+        "timestamp": "2026-02-13T15:27:17.614981",
         "meta": {
-            "nf-test": "0.9.3",
-            "nextflow": "25.04.6"
-        },
-        "timestamp": "2026-02-03T11:44:33.225993321"
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
     }
 }
\ No newline at end of file

From 410a94400a832dc42a4d4f72b9a8454695041a61 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Thu, 19 Feb 2026 00:12:48 +0000
Subject: [PATCH 17/36] Add repeats/combine_json module

---
 .../fasta/recombine => }/assets/NO_FILE       |   0
 modules/ensembl/fasta/recombine/main.nf       |   3 +-
 .../fasta/recombine/tests/main.nf.test        |  20 +--
 .../fasta/recombine/tests/main.nf.test.snap   |  36 ++--
 modules/ensembl/fasta/split/main.nf           |   2 +-
 .../ensembl/fasta/split/tests/main.nf.test    |  22 +--
 .../fasta/split/tests/main.nf.test.snap       |  16 +-
 .../repeats/combine_json/environment.yml      |   7 +
 modules/ensembl/repeats/combine_json/main.nf  |  79 +++++++++
 .../tests/data/agp/inputs/in.json             |  34 ++++
 .../combine_json/tests/data/agp/manifest.txt  |   1 +
 .../tests/data/agp/output/test.repeat.json    |  34 ++++
 .../combine_json/tests/data/agp/test.agp      |   1 +
 .../tests/data/custom_regex/inputs/in.json    |  34 ++++
 .../tests/data/custom_regex/manifest.txt      |   1 +
 .../data/custom_regex/output/test.repeat.json |  34 ++++
 .../tests/data/header/inputs/a.json           |  34 ++++
 .../tests/data/header/inputs/b.json           |  34 ++++
 .../tests/data/header/manifest.txt            |   2 +
 .../tests/data/header/output/test.repeat.json |  43 +++++
 .../tests/data/order/inputs/01.json           |  34 ++++
 .../tests/data/order/inputs/02.json           |  34 ++++
 .../tests/data/order/manifest.txt             |   2 +
 .../tests/data/order/output/test.repeat.json  |  43 +++++
 .../repeats/combine_json/tests/main.nf.test   | 153 ++++++++++++++++
 .../combine_json/tests/main.nf.test.snap      | 164 ++++++++++++++++++
 26 files changed, 817 insertions(+), 50 deletions(-)
 rename modules/{ensembl/fasta/recombine => }/assets/NO_FILE (100%)
 create mode 100644 modules/ensembl/repeats/combine_json/environment.yml
 create mode 100644 modules/ensembl/repeats/combine_json/main.nf
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/agp/inputs/in.json
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/agp/manifest.txt
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/agp/output/test.repeat.json
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/agp/test.agp
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/custom_regex/inputs/in.json
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/custom_regex/manifest.txt
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/custom_regex/output/test.repeat.json
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/header/inputs/a.json
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/header/inputs/b.json
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/header/manifest.txt
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/header/output/test.repeat.json
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/order/inputs/01.json
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/order/inputs/02.json
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/order/manifest.txt
 create mode 100644 modules/ensembl/repeats/combine_json/tests/data/order/output/test.repeat.json
 create mode 100644 modules/ensembl/repeats/combine_json/tests/main.nf.test
 create mode 100644 modules/ensembl/repeats/combine_json/tests/main.nf.test.snap

diff --git a/modules/ensembl/fasta/recombine/assets/NO_FILE b/modules/assets/NO_FILE
similarity index 100%
rename from modules/ensembl/fasta/recombine/assets/NO_FILE
rename to modules/assets/NO_FILE
diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 4e8e13a..9a7fd97 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -25,7 +25,7 @@ process FASTA_RECOMBINE {
         tuple val(meta), path(fasta_manifest), path(agp)
 
     output:
-        tuple val(meta), path("*.fa"), emit: fasta
+        tuple val(meta), path("${meta.id}.fa"), emit: recombined_fasta
 
     script:
         def args = []
@@ -74,6 +74,5 @@ process FASTA_RECOMBINE {
         cp "\$test_data_dir/\$mode/output/${meta.id}.fa" "\$out_fasta"
         
         """
-
         
 }
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test b/modules/ensembl/fasta/recombine/tests/main.nf.test
index ba4bfc6..ef81bd9 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test
@@ -35,7 +35,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                         file('${moduleDir}/tests/data/header/manifest.txt'),
-                        file('${moduleDir}/assets/NO_FILE')]
+                        file('${projectDir}/modules/assets/NO_FILE')]
 
                 """
             }
@@ -43,7 +43,7 @@ nextflow_process {
 
         then {
             assert process.trace.tasks().size() == 1
-            assert process.out.fasta.size() == 1
+            assert process.out.recombined_fasta.size() == 1
             assert process.success
             assert snapshot(process.out).match()
         }
@@ -66,7 +66,7 @@ nextflow_process {
 
         then {
             assert process.trace.tasks().size() == 1
-            assert process.out.fasta.size() == 1
+            assert process.out.recombined_fasta.size() == 1
             assert process.success
             assert snapshot(process.out).match()
         }
@@ -80,14 +80,14 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/header/manifest.txt'),
-                            file('${moduleDir}/assets/NO_FILE')]
+                            file('${projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
 
         then {
             assert process.trace.tasks().size() == 1
-            assert process.out.fasta.size() == 1
+            assert process.out.recombined_fasta.size() == 1
             assert process.success
             assert snapshot(process.out).match()
         }
@@ -108,7 +108,7 @@ nextflow_process {
 
         then {
             assert process.trace.tasks().size() == 1
-            assert process.out.fasta.size() == 1
+            assert process.out.recombined_fasta.size() == 1
             assert process.success
             assert snapshot(process.out).match()
         }
@@ -124,14 +124,14 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/custom_regex/manifest.txt'),
-                            file('${moduleDir}/assets/NO_FILE')]
+                            file('${projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
 
         then {
             assert process.trace.tasks().size() == 1
-            assert process.out.fasta.size() == 1
+            assert process.out.recombined_fasta.size() == 1
             assert process.success
             assert snapshot(process.out).match()
         }
@@ -145,14 +145,14 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/order/manifest.txt'),
-                            file('${moduleDir}/assets/NO_FILE')]
+                            file('${projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
 
         then {
             assert process.trace.tasks().size() == 1
-            assert process.out.fasta.size() == 1
+            assert process.out.recombined_fasta.size() == 1
             assert process.success
             assert snapshot(process.out).match()
         }
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
index 30b0258..3a27deb 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
@@ -10,7 +10,7 @@
                         "test.fa:md5,3ec81eef9dd73dc86ff01621dbacc7a0"
                     ]
                 ],
-                "fasta": [
+                "recombined_fasta": [
                     [
                         {
                             "id": "test"
@@ -20,7 +20,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-02-13T15:20:39.92005",
+        "timestamp": "2026-02-18T23:12:05.089688",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -34,20 +34,20 @@
                         {
                             "id": "test"
                         },
-                        "test.fa:md5,5f81df5939251499ea60e666d8a306b3"
+                        "test.fa:md5,f32bc79faea4bc05dd4675e0d4ededa1"
                     ]
                 ],
-                "fasta": [
+                "recombined_fasta": [
                     [
                         {
                             "id": "test"
                         },
-                        "test.fa:md5,5f81df5939251499ea60e666d8a306b3"
+                        "test.fa:md5,f32bc79faea4bc05dd4675e0d4ededa1"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-02-13T15:20:44.283073",
+        "timestamp": "2026-02-18T23:12:09.601838",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -61,20 +61,20 @@
                         {
                             "id": "test"
                         },
-                        "test.fa:md5,709337303b43192f7647b77c170adac7"
+                        "test.fa:md5,700550164316730d1145b7bde2ae3eb7"
                     ]
                 ],
-                "fasta": [
+                "recombined_fasta": [
                     [
                         {
                             "id": "test"
                         },
-                        "test.fa:md5,709337303b43192f7647b77c170adac7"
+                        "test.fa:md5,700550164316730d1145b7bde2ae3eb7"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-02-13T15:23:27.996406",
+        "timestamp": "2026-02-18T23:12:07.342405",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -91,7 +91,7 @@
                         "test.fa:md5,93d1870d020e197708753501e57db68f"
                     ]
                 ],
-                "fasta": [
+                "recombined_fasta": [
                     [
                         {
                             "id": "test"
@@ -101,7 +101,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-02-13T15:20:37.864233",
+        "timestamp": "2026-02-18T23:12:03.015143",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -115,20 +115,20 @@
                         {
                             "id": "test"
                         },
-                        "test.fa:md5,90d526c36d03ae9e226d09655f22f00e"
+                        "test.fa:md5,a589b60028be69f01622a61cc78fa1ae"
                     ]
                 ],
-                "fasta": [
+                "recombined_fasta": [
                     [
                         {
                             "id": "test"
                         },
-                        "test.fa:md5,90d526c36d03ae9e226d09655f22f00e"
+                        "test.fa:md5,a589b60028be69f01622a61cc78fa1ae"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-02-13T15:26:49.012219",
+        "timestamp": "2026-02-18T23:12:11.852053",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -145,7 +145,7 @@
                         "test.fa:md5,52fa2054da674f0a5ebc263e724cf4a4"
                     ]
                 ],
-                "fasta": [
+                "recombined_fasta": [
                     [
                         {
                             "id": "test"
@@ -155,7 +155,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-02-13T15:20:48.517972",
+        "timestamp": "2026-02-18T23:12:14.083842",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index 5d2a347..845628b 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -25,7 +25,7 @@ process FASTA_SPLIT {
         tuple val(meta), path(fasta)
 
     output:
-        tuple val(meta), path("splits/**/*.fa"), emit: fasta
+        tuple val(meta), path("splits/**/*.fa"), emit: fastas
         tuple val(meta), path("splits/*.agp"), emit: agp, optional: true
 
     script:
diff --git a/modules/ensembl/fasta/split/tests/main.nf.test b/modules/ensembl/fasta/split/tests/main.nf.test
index cf4206f..37211ae 100644
--- a/modules/ensembl/fasta/split/tests/main.nf.test
+++ b/modules/ensembl/fasta/split/tests/main.nf.test
@@ -50,10 +50,10 @@ nextflow_process {
             assert snapshot(process.out).match()
 
             // fasta: tuple(meta, fa_paths)
-            assert process.out.fasta != null
-            assert process.out.fasta.size() == 1
+            assert process.out.fastas != null
+            assert process.out.fastas.size() == 1
 
-            def fasta_out = process.out.fasta[0]
+            def fasta_out = process.out.fastas[0]
             def meta = fasta_out[0]
             def fas  = fasta_out[1]
 
@@ -98,8 +98,8 @@ nextflow_process {
         then {
             assert snapshot(process.out).match()
 
-            assert process.out.fasta.size() == 1
-            def fasta_out = process.out.fasta[0]
+            assert process.out.fastas.size() == 1
+            def fasta_out = process.out.fastas[0]
             def fas = fasta_out[1]
             assert fas.size() == 2
 
@@ -146,7 +146,7 @@ nextflow_process {
         then {
             assert snapshot(process.out).match()
 
-            def fasta_out = process.out.fasta[0]
+            def fasta_out = process.out.fastas[0]
             def fas = fasta_out[1]
 
             assert fas.size() == 2
@@ -183,7 +183,7 @@ nextflow_process {
         then {
             assert snapshot(process.out).match()
 
-            def fastas = process.out.fasta[0][1]
+            def fastas = process.out.fastas[0][1]
             assert fastas.size() == 2
             assert process.out.agp.size() == 0
 
@@ -218,10 +218,10 @@ nextflow_process {
         then {
             assert process.success
 
-            assert process.out.fasta != null
-            assert process.out.fasta.size() == 1
+            assert process.out.fastas != null
+            assert process.out.fastas.size() == 1
 
-            def out = process.out.fasta[0]
+            def out = process.out.fastas[0]
             def meta = out[0]
             def fas  = out[1]
 
@@ -303,7 +303,7 @@ nextflow_process {
         then {
             assert process.success
 
-            def fas = process.out.fasta[0][1]
+            def fas = process.out.fastas[0][1]
             assert fas.size() == 2
 
             def merged = fas
diff --git a/modules/ensembl/fasta/split/tests/main.nf.test.snap b/modules/ensembl/fasta/split/tests/main.nf.test.snap
index 167ba7c..eb12321 100644
--- a/modules/ensembl/fasta/split/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/split/tests/main.nf.test.snap
@@ -29,7 +29,7 @@
                         "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
                     ]
                 ],
-                "fasta": [
+                "fastas": [
                     [
                         {
                             "id": "test"
@@ -42,7 +42,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-02-13T15:27:15.469156",
+        "timestamp": "2026-02-18T23:21:51.036982",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -68,7 +68,7 @@
                 "agp": [
                     
                 ],
-                "fasta": [
+                "fastas": [
                     [
                         {
                             "id": "test"
@@ -81,7 +81,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-02-13T15:27:19.735631",
+        "timestamp": "2026-02-18T23:06:24.284416",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -107,7 +107,7 @@
                 "agp": [
                     
                 ],
-                "fasta": [
+                "fastas": [
                     [
                         {
                             "id": "test"
@@ -120,7 +120,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-02-13T15:27:13.38194",
+        "timestamp": "2026-02-18T23:06:18.00303",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -146,7 +146,7 @@
                 "agp": [
                     
                 ],
-                "fasta": [
+                "fastas": [
                     [
                         {
                             "id": "test"
@@ -159,7 +159,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-02-13T15:27:17.614981",
+        "timestamp": "2026-02-18T23:06:22.194395",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
diff --git a/modules/ensembl/repeats/combine_json/environment.yml b/modules/ensembl/repeats/combine_json/environment.yml
new file mode 100644
index 0000000..8bdd6b1
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/environment.yml
@@ -0,0 +1,7 @@
+---
+name: "repeats_combine_json"
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - ensembl-genomio=1.6.1
\ No newline at end of file
diff --git a/modules/ensembl/repeats/combine_json/main.nf b/modules/ensembl/repeats/combine_json/main.nf
new file mode 100644
index 0000000..ff2177b
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/main.nf
@@ -0,0 +1,79 @@
+// See the NOTICE file distributed with this work for additional information
+// regarding copyright ownership.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+process REPEATS_COMBINE_JSON {
+
+    tag "${meta.id}"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "ensemblorg/ensembl-genomio:v1.6.1"
+
+    input:
+        tuple val(meta), path(json_manifest), path(agp)
+
+    output:
+        tuple val(meta), path("${meta.id}.repeat.json"), emit: combined_json
+
+    script:
+        def args = []
+
+        if (params.chunk_id_regex) {
+            def rx = params.chunk_id_regex.replace("'", "'\"'\"'")
+            args << "--chunk-id-regex '${rx}'"
+        }
+
+        if (params.allow_revcomp) {
+            args << "--allow-revcomp"
+        }
+
+        def has_agp = agp && agp.baseName != 'NO_FILE'
+        if (has_agp) {
+            args << "--agp-file ${agp}"
+        }
+
+        def out_json = "${meta.id}.repeat.json"
+
+        """
+        python -m ensembl.io.genomio.repeats.combine_json \\
+            --json-manifest ${json_manifest} \\
+            --out-json ${out_json} \\
+            ${args.join(' ')}
+        """
+
+    stub:
+        """
+        set -euo pipefail
+
+        test_data_dir="${moduleDir}/tests/data"
+
+        out_json="${meta.id}.repeat.json"
+
+        test -s "${json_manifest}"
+
+        mode="header"
+        agp_path="${agp}"
+        agp_name="\${agp_path##*/}"
+        if [[ "\$agp_name" != "NO_FILE" ]]; then
+            mode="agp"
+        fi
+
+        # Provide a schema-valid combined JSON fixture.
+        # Arrange fixtures under:
+        #   tests/data/header/output/<id>.repeat.json
+        #   tests/data/agp/output/<id>.repeat.json
+        cp "\$test_data_dir/\$mode/output/${meta.id}.repeat.json" "\$out_json"
+        """
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/agp/inputs/in.json b/modules/ensembl/repeats/combine_json/tests/data/agp/inputs/in.json
new file mode 100644
index 0000000..8228fd3
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/agp/inputs/in.json
@@ -0,0 +1,34 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "rm",
+    "display_label": "rm",
+    "description": "rm analysis (nf-test)",
+    "program": "stub",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "repeat_consensus": [
+    {
+      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
+      "repeat_name": "Alu",
+      "repeat_class": "SINE",
+      "repeat_type": "Alu",
+      "repeat_consensus": "ACGT"
+    }
+  ],
+  "repeat_features": [
+    {
+      "seq_region": "comp1",
+      "seq_region_start": 10,
+      "seq_region_end": 20,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 11,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/agp/manifest.txt b/modules/ensembl/repeats/combine_json/tests/data/agp/manifest.txt
new file mode 100644
index 0000000..1ac93e6
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/agp/manifest.txt
@@ -0,0 +1 @@
+inputs/in.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/agp/output/test.repeat.json b/modules/ensembl/repeats/combine_json/tests/data/agp/output/test.repeat.json
new file mode 100644
index 0000000..cfc4cd2
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/agp/output/test.repeat.json
@@ -0,0 +1,34 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "rm",
+    "display_label": "rm",
+    "description": "rm analysis (nf-test)",
+    "program": "stub",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "repeat_consensus": [
+    {
+      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
+      "repeat_name": "Alu",
+      "repeat_class": "SINE",
+      "repeat_type": "Alu",
+      "repeat_consensus": "ACGT"
+    }
+  ],
+  "repeat_features": [
+    {
+      "seq_region": "chr1",
+      "seq_region_start": 109,
+      "seq_region_end": 119,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 11,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/agp/test.agp b/modules/ensembl/repeats/combine_json/tests/data/agp/test.agp
new file mode 100644
index 0000000..86dddab
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/agp/test.agp
@@ -0,0 +1 @@
+chr1	100	199	1	W	comp1	1	100	+
diff --git a/modules/ensembl/repeats/combine_json/tests/data/custom_regex/inputs/in.json b/modules/ensembl/repeats/combine_json/tests/data/custom_regex/inputs/in.json
new file mode 100644
index 0000000..69bfad7
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/custom_regex/inputs/in.json
@@ -0,0 +1,34 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "rm",
+    "display_label": "rm",
+    "description": "rm analysis (nf-test)",
+    "program": "stub",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "repeat_consensus": [
+    {
+      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
+      "repeat_name": "Alu",
+      "repeat_class": "SINE",
+      "repeat_type": "Alu",
+      "repeat_consensus": "ACGT"
+    }
+  ],
+  "repeat_features": [
+    {
+      "seq_region": "chr1_11",
+      "seq_region_start": 1,
+      "seq_region_end": 5,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 5,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/custom_regex/manifest.txt b/modules/ensembl/repeats/combine_json/tests/data/custom_regex/manifest.txt
new file mode 100644
index 0000000..1ac93e6
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/custom_regex/manifest.txt
@@ -0,0 +1 @@
+inputs/in.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/custom_regex/output/test.repeat.json b/modules/ensembl/repeats/combine_json/tests/data/custom_regex/output/test.repeat.json
new file mode 100644
index 0000000..0d8eff4
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/custom_regex/output/test.repeat.json
@@ -0,0 +1,34 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "rm",
+    "display_label": "rm",
+    "description": "rm analysis (nf-test)",
+    "program": "stub",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "repeat_consensus": [
+    {
+      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
+      "repeat_name": "Alu",
+      "repeat_class": "SINE",
+      "repeat_type": "Alu",
+      "repeat_consensus": "ACGT"
+    }
+  ],
+  "repeat_features": [
+    {
+      "seq_region": "chr1",
+      "seq_region_start": 11,
+      "seq_region_end": 15,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 5,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/header/inputs/a.json b/modules/ensembl/repeats/combine_json/tests/data/header/inputs/a.json
new file mode 100644
index 0000000..b33f05c
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/header/inputs/a.json
@@ -0,0 +1,34 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "rm",
+    "display_label": "rm",
+    "description": "rm analysis (nf-test)",
+    "program": "stub",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "repeat_consensus": [
+    {
+      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
+      "repeat_name": "Alu",
+      "repeat_class": "SINE",
+      "repeat_type": "Alu",
+      "repeat_consensus": "ACGT"
+    }
+  ],
+  "repeat_features": [
+    {
+      "seq_region": "chr1_chunk_start_1",
+      "seq_region_start": 1,
+      "seq_region_end": 3,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 3,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/header/inputs/b.json b/modules/ensembl/repeats/combine_json/tests/data/header/inputs/b.json
new file mode 100644
index 0000000..e6787cb
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/header/inputs/b.json
@@ -0,0 +1,34 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "rm",
+    "display_label": "rm",
+    "description": "rm analysis (nf-test)",
+    "program": "stub",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "repeat_consensus": [
+    {
+      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
+      "repeat_name": "Alu",
+      "repeat_class": "SINE",
+      "repeat_type": "Alu",
+      "repeat_consensus": "ACGT"
+    }
+  ],
+  "repeat_features": [
+    {
+      "seq_region": "chr1_chunk_start_4",
+      "seq_region_start": 1,
+      "seq_region_end": 2,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 2,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/header/manifest.txt b/modules/ensembl/repeats/combine_json/tests/data/header/manifest.txt
new file mode 100644
index 0000000..419c5fd
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/header/manifest.txt
@@ -0,0 +1,2 @@
+inputs/a.json
+inputs/b.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/header/output/test.repeat.json b/modules/ensembl/repeats/combine_json/tests/data/header/output/test.repeat.json
new file mode 100644
index 0000000..c69532b
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/header/output/test.repeat.json
@@ -0,0 +1,43 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "rm",
+    "display_label": "rm",
+    "description": "rm analysis (nf-test)",
+    "program": "stub",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "repeat_consensus": [
+    {
+      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
+      "repeat_name": "Alu",
+      "repeat_class": "SINE",
+      "repeat_type": "Alu",
+      "repeat_consensus": "ACGT"
+    }
+  ],
+  "repeat_features": [
+    {
+      "seq_region": "chr1",
+      "seq_region_start": 1,
+      "seq_region_end": 3,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 3,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    },
+    {
+      "seq_region": "chr1",
+      "seq_region_start": 4,
+      "seq_region_end": 5,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 2,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/order/inputs/01.json b/modules/ensembl/repeats/combine_json/tests/data/order/inputs/01.json
new file mode 100644
index 0000000..269ac0b
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/order/inputs/01.json
@@ -0,0 +1,34 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "rm",
+    "display_label": "rm",
+    "description": "rm analysis (nf-test)",
+    "program": "stub",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "repeat_consensus": [
+    {
+      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
+      "repeat_name": "Alu",
+      "repeat_class": "SINE",
+      "repeat_type": "Alu",
+      "repeat_consensus": "ACGT"
+    }
+  ],
+  "repeat_features": [
+    {
+      "seq_region": "chr2_chunk_start_1",
+      "seq_region_start": 1,
+      "seq_region_end": 2,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 2,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/order/inputs/02.json b/modules/ensembl/repeats/combine_json/tests/data/order/inputs/02.json
new file mode 100644
index 0000000..8256fd2
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/order/inputs/02.json
@@ -0,0 +1,34 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "rm",
+    "display_label": "rm",
+    "description": "rm analysis (nf-test)",
+    "program": "stub",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "repeat_consensus": [
+    {
+      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
+      "repeat_name": "Alu",
+      "repeat_class": "SINE",
+      "repeat_type": "Alu",
+      "repeat_consensus": "ACGT"
+    }
+  ],
+  "repeat_features": [
+    {
+      "seq_region": "chr2_chunk_start_3",
+      "seq_region_start": 1,
+      "seq_region_end": 1,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 1,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/order/manifest.txt b/modules/ensembl/repeats/combine_json/tests/data/order/manifest.txt
new file mode 100644
index 0000000..dad42b0
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/order/manifest.txt
@@ -0,0 +1,2 @@
+inputs/02.json
+inputs/01.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/order/output/test.repeat.json b/modules/ensembl/repeats/combine_json/tests/data/order/output/test.repeat.json
new file mode 100644
index 0000000..0442952
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/data/order/output/test.repeat.json
@@ -0,0 +1,43 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "rm",
+    "display_label": "rm",
+    "description": "rm analysis (nf-test)",
+    "program": "stub",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "repeat_consensus": [
+    {
+      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
+      "repeat_name": "Alu",
+      "repeat_class": "SINE",
+      "repeat_type": "Alu",
+      "repeat_consensus": "ACGT"
+    }
+  ],
+  "repeat_features": [
+    {
+      "seq_region": "chr2",
+      "seq_region_start": 3,
+      "seq_region_end": 3,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 1,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    },
+    {
+      "seq_region": "chr2",
+      "seq_region_start": 1,
+      "seq_region_end": 2,
+      "seq_region_strand": 1,
+      "repeat_start": 1,
+      "repeat_end": 2,
+      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/main.nf.test b/modules/ensembl/repeats/combine_json/tests/main.nf.test
new file mode 100644
index 0000000..5a6eff4
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/main.nf.test
@@ -0,0 +1,153 @@
+// See the NOTICE file distributed with this work for additional information
+// regarding copyright ownership.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// nf-core modules test repeats/combine_json
+nextflow_process {
+
+    name "Test Process REPEATS_COMBINE_JSON"
+    script "../main.nf"
+    process "REPEATS_COMBINE_JSON"
+
+    tag "modules"
+    tag "modules_ensembl"
+    tag "repeats"
+    tag "repeats/combine_json"
+
+    test("Stub outputs: header mode") {
+
+        when {
+            options "-stub"
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/header/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Stub outputs: AGP mode") {
+
+        when {
+            options "-stub"
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/agp/manifest.txt'),
+                            file('${moduleDir}/tests/data/agp/test.agp')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: header combine + header-driven liftover") {
+
+        when {
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/header/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: AGP-driven liftover") {
+
+        when {
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/agp/manifest.txt'),
+                            file('${moduleDir}/tests/data/agp/test.agp')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: custom chunk regex") {
+
+        when {
+            params.chunk_id_regex = '^(?P<base>.+)_(?P<start>\\d+)$'
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/custom_regex/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: manifest order is preserved") {
+
+        when {
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/order/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/main.nf.test.snap b/modules/ensembl/repeats/combine_json/tests/main.nf.test.snap
new file mode 100644
index 0000000..b3fae3f
--- /dev/null
+++ b/modules/ensembl/repeats/combine_json/tests/main.nf.test.snap
@@ -0,0 +1,164 @@
+{
+    "Real run: AGP-driven liftover": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-19T00:11:13.232239",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Stub outputs: AGP mode": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-19T00:11:08.721986",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: header combine + header-driven liftover": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-19T00:11:11.007889",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Stub outputs: header mode": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-19T00:11:06.662964",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: custom chunk regex": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,f410544c71be74f7a8a7eab5e494b258"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,f410544c71be74f7a8a7eab5e494b258"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-19T00:11:15.43463",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: manifest order is preserved": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,1b68c1371265dad11839769a5e776b33"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.repeat.json:md5,1b68c1371265dad11839769a5e776b33"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-19T00:11:17.627989",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    }
+}
\ No newline at end of file

From 7bfe4c60450ea95001f20b3236974db2f25efe25 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Mon, 23 Feb 2026 19:22:51 +0000
Subject: [PATCH 18/36] Handle ncRNA features as well as repeats

---
 .../combine_json/environment.yml              |   2 +-
 modules/ensembl/features/combine_json/main.nf | 116 +++++
 .../tests/data/ncrna/agp/.DS_Store            | Bin 0 -> 6148 bytes
 .../tests/data/ncrna/agp/inputs/in.json       |  27 +
 .../tests/data/ncrna}/agp/manifest.txt        |   0
 .../data/ncrna/agp/output/test.features.json  |  27 +
 .../tests/data/ncrna}/agp/test.agp            |   0
 .../tests/data/ncrna/custom_regex/.DS_Store   | Bin 0 -> 6148 bytes
 .../data/ncrna/custom_regex/inputs/a.json     |  27 +
 .../data/ncrna/custom_regex/inputs/b.json     |  27 +
 .../data/ncrna/custom_regex}/manifest.txt     |   0
 .../custom_regex/output/test.features.json    |  37 ++
 .../tests/data/ncrna/header/.DS_Store         | Bin 0 -> 6148 bytes
 .../tests/data/ncrna/header/inputs/a.json     |  27 +
 .../tests/data/ncrna/header/inputs/b.json     |  27 +
 .../tests/data/ncrna/header/manifest.txt      |   2 +
 .../ncrna/header/output/test.features.json    |  37 ++
 .../tests/data/ncrna/order/.DS_Store          | Bin 0 -> 6148 bytes
 .../tests/data/ncrna/order/inputs/01.json     |  27 +
 .../tests/data/ncrna/order/inputs/02.json     |  27 +
 .../tests/data/ncrna}/order/manifest.txt      |   0
 .../ncrna/order/output/test.features.json     |  37 ++
 .../tests/data/repeat}/agp/inputs/in.json     |   0
 .../tests/data/repeat/agp}/manifest.txt       |   0
 .../repeat/agp/output/test.features.json}     |   0
 .../tests/data/repeat/agp/test.agp            |   1 +
 .../data/repeat}/custom_regex/inputs/in.json  |   0
 .../data/repeat/custom_regex/manifest.txt     |   1 +
 .../custom_regex/output/test.features.json}   |   0
 .../tests/data/repeat}/header/inputs/a.json   |   0
 .../tests/data/repeat}/header/inputs/b.json   |   0
 .../tests/data/repeat/header/manifest.txt     |   2 +
 .../repeat/header/output/test.features.json}  |   0
 .../tests/data/repeat}/order/inputs/01.json   |   0
 .../tests/data/repeat}/order/inputs/02.json   |   0
 .../tests/data/repeat/order/manifest.txt      |   2 +
 .../repeat/order/output/test.features.json}   |   0
 .../features/combine_json/tests/main.nf.test  | 280 ++++++++++
 .../combine_json/tests/main.nf.test.snap      | 488 ++++++++++++++++++
 modules/ensembl/repeats/combine_json/main.nf  |  79 ---
 .../repeats/combine_json/tests/main.nf.test   | 153 ------
 .../combine_json/tests/main.nf.test.snap      | 164 ------
 42 files changed, 1220 insertions(+), 397 deletions(-)
 rename modules/ensembl/{repeats => features}/combine_json/environment.yml (73%)
 create mode 100644 modules/ensembl/features/combine_json/main.nf
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/agp/.DS_Store
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/agp/inputs/in.json
 rename modules/ensembl/{repeats/combine_json/tests/data => features/combine_json/tests/data/ncrna}/agp/manifest.txt (100%)
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/agp/output/test.features.json
 rename modules/ensembl/{repeats/combine_json/tests/data => features/combine_json/tests/data/ncrna}/agp/test.agp (100%)
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/.DS_Store
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/a.json
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/b.json
 rename modules/ensembl/{repeats/combine_json/tests/data/header => features/combine_json/tests/data/ncrna/custom_regex}/manifest.txt (100%)
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/output/test.features.json
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/header/.DS_Store
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/a.json
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/b.json
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/header/manifest.txt
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/header/output/test.features.json
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/order/.DS_Store
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/01.json
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/02.json
 rename modules/ensembl/{repeats/combine_json/tests/data => features/combine_json/tests/data/ncrna}/order/manifest.txt (100%)
 create mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/order/output/test.features.json
 rename modules/ensembl/{repeats/combine_json/tests/data => features/combine_json/tests/data/repeat}/agp/inputs/in.json (100%)
 rename modules/ensembl/{repeats/combine_json/tests/data/custom_regex => features/combine_json/tests/data/repeat/agp}/manifest.txt (100%)
 rename modules/ensembl/{repeats/combine_json/tests/data/agp/output/test.repeat.json => features/combine_json/tests/data/repeat/agp/output/test.features.json} (100%)
 create mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/agp/test.agp
 rename modules/ensembl/{repeats/combine_json/tests/data => features/combine_json/tests/data/repeat}/custom_regex/inputs/in.json (100%)
 create mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/manifest.txt
 rename modules/ensembl/{repeats/combine_json/tests/data/custom_regex/output/test.repeat.json => features/combine_json/tests/data/repeat/custom_regex/output/test.features.json} (100%)
 rename modules/ensembl/{repeats/combine_json/tests/data => features/combine_json/tests/data/repeat}/header/inputs/a.json (100%)
 rename modules/ensembl/{repeats/combine_json/tests/data => features/combine_json/tests/data/repeat}/header/inputs/b.json (100%)
 create mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/header/manifest.txt
 rename modules/ensembl/{repeats/combine_json/tests/data/header/output/test.repeat.json => features/combine_json/tests/data/repeat/header/output/test.features.json} (100%)
 rename modules/ensembl/{repeats/combine_json/tests/data => features/combine_json/tests/data/repeat}/order/inputs/01.json (100%)
 rename modules/ensembl/{repeats/combine_json/tests/data => features/combine_json/tests/data/repeat}/order/inputs/02.json (100%)
 create mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/order/manifest.txt
 rename modules/ensembl/{repeats/combine_json/tests/data/order/output/test.repeat.json => features/combine_json/tests/data/repeat/order/output/test.features.json} (100%)
 create mode 100644 modules/ensembl/features/combine_json/tests/main.nf.test
 create mode 100644 modules/ensembl/features/combine_json/tests/main.nf.test.snap
 delete mode 100644 modules/ensembl/repeats/combine_json/main.nf
 delete mode 100644 modules/ensembl/repeats/combine_json/tests/main.nf.test
 delete mode 100644 modules/ensembl/repeats/combine_json/tests/main.nf.test.snap

diff --git a/modules/ensembl/repeats/combine_json/environment.yml b/modules/ensembl/features/combine_json/environment.yml
similarity index 73%
rename from modules/ensembl/repeats/combine_json/environment.yml
rename to modules/ensembl/features/combine_json/environment.yml
index 8bdd6b1..5f1cb32 100644
--- a/modules/ensembl/repeats/combine_json/environment.yml
+++ b/modules/ensembl/features/combine_json/environment.yml
@@ -1,5 +1,5 @@
 ---
-name: "repeats_combine_json"
+name: "features_combine_json"
 channels:
   - conda-forge
   - bioconda
diff --git a/modules/ensembl/features/combine_json/main.nf b/modules/ensembl/features/combine_json/main.nf
new file mode 100644
index 0000000..18425e5
--- /dev/null
+++ b/modules/ensembl/features/combine_json/main.nf
@@ -0,0 +1,116 @@
+// See the NOTICE file distributed with this work for additional information
+// regarding copyright ownership.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+process FEATURES_COMBINE_JSON {
+
+    tag "${meta.id}"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "ensemblorg/ensembl-genomio:v1.6.1"
+
+    input:
+        tuple val(meta), path(json_manifest), path(agp)
+
+    output:
+        tuple val(meta), path("${meta.id}.features.json"), emit: combined_json
+
+    script:
+        def args = []
+
+        if (params.chunk_id_regex) {
+            def rx = params.chunk_id_regex.replace("'", "'\"'\"'")
+            args << "--chunk-id-regex '${rx}'"
+        }
+
+        if (params.allow_revcomp) {
+            args << "--allow-revcomp"
+        }
+
+        def has_agp = agp && agp.baseName != 'NO_FILE'
+        if (has_agp) {
+            args << "--agp-file '${agp}'"
+        }
+
+        def out_json = "${meta.id}.features.json"
+
+        """
+        python -m ensembl.io.genomio.features.combine_json \\
+            --json-manifest '${json_manifest}' \\
+            --out-json '${out_json}' \\
+            ${args.join(' ')}
+        """
+
+    stub:
+        """
+        set -euo pipefail
+
+        test_data_dir="${moduleDir}/tests/data"
+
+        out_json="${meta.id}.features.json"
+
+        test -s "${json_manifest}"
+
+        mode="header"
+        agp_path="${agp}"
+        agp_name="\${agp_path##*/}"
+        if [[ "\$agp_name" != "NO_FILE" ]]; then
+            mode="agp"
+        fi
+
+        manifest_real="\$(python -c 'from pathlib import Path; import sys; print(Path(sys.argv[1]).resolve())' "${json_manifest}")"
+        manifest_dir="\$(dirname "\$manifest_real")"
+
+        first_json="\$(head -n 1 "${json_manifest}")"
+        if [[ -z "\$first_json" ]]; then
+            echo "ERROR: manifest is empty: ${json_manifest}" >&2
+            exit 1
+        fi
+        if [[ "\$first_json" != /* ]]; then
+            first_json="\${manifest_dir}/\${first_json}"
+        fi
+        if [[ ! -s "\$first_json" ]]; then
+            echo "ERROR: first JSON in manifest does not exist or is empty: \$first_json" >&2
+            exit 1
+        fi
+
+        
+        if grep -q '"ncrna_features"' "\$first_json"; then
+            load_type="ncrna"
+        elif grep -q '"repeat_features"' "\$first_json"; then
+            load_type="repeat"
+        else
+            echo "ERROR: cannot detect load type from first JSON: \$first_json" >&2
+            echo "Expected top-level key: 'repeat_features' or 'ncrna_features'." >&2
+            exit 1
+        fi
+
+        # Provide a schema-valid combined JSON fixture.
+        # Fixtures are arranged under:
+        #   tests/data/repeat/header/output/<id>.features.json
+        #   tests/data/repeat/agp/output/<id>.features.json
+        #   tests/data/ncrna/header/output/<id>.features.json
+        #   tests/data/ncrna/agp/output/<id>.features.json
+        fixture="\$test_data_dir/\$load_type/\$mode/output/${meta.id}.features.json"
+
+        if [[ ! -s "\$fixture" ]]; then
+            echo "ERROR: missing stub fixture: \$fixture" >&2
+            echo "Make sure you created output fixture for meta.id='${meta.id}' under \$load_type/\$mode/output/." >&2
+            exit 1
+        fi
+
+        cp "\$fixture" "\$out_json"
+        """
+}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/.DS_Store b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..46ebb6833f86b8f68ba1c38bb8339c81f2e59428
GIT binary patch
literal 6148
zcmeHKQBK1!40Xzciul<lpSi+LK$yx2b^$adF{G7LgZ<|SJHoyY7vKzBi)T9;?XrnK
zgeF^ZUh3G1`x4a+5xMDRF(DcgQ4M9RuFxD29%n72$`&4UGRGUeuAU~d$Jlm-w;BE;
z1H5;a^h__*(T0lst8Nd++t@VoRh+|KuXmr@<@?uN>9hFNr(5>V37<dY5hVx7WO$CM
za5&!2!+-Sh=*`i_pFB?a9LWPqnHU4cfH82^44`JS)YppE8Ux0FF|cQVzYiYD7&0bB
z`E;O>D*$i=vk3a~F9-B&0EUc75f+HkP@smoe8q4Y4!h5OA!Ab1aB{LT=E*C&d_!@v
zI_zHD$puAgjR9j|$iRg@j=BEdpT7SOgY3x|Fb2+w0XJ%9%@nVcYisdxTx&h_0m{OD
lNpT;7j+A2faw)!nioou<0}L6HA}kR55fB=zF$VsWfp4#8P9XpQ

literal 0
HcmV?d00001

diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/inputs/in.json b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/inputs/in.json
new file mode 100644
index 0000000..d1701a4
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/inputs/in.json
@@ -0,0 +1,27 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "cmscan",
+    "display_label": "cmscan",
+    "description": "cmscan analysis",
+    "program": "test",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "ncrna_tool": "cmscan",
+  "ncrna_features": [
+    {
+      "seq_region": "comp1",
+      "seq_region_start": 10,
+      "seq_region_end": 20,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/agp/manifest.txt b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/manifest.txt
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/agp/manifest.txt
rename to modules/ensembl/features/combine_json/tests/data/ncrna/agp/manifest.txt
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/output/test.features.json
new file mode 100644
index 0000000..3479d90
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/output/test.features.json
@@ -0,0 +1,27 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "cmscan",
+    "display_label": "cmscan",
+    "description": "cmscan analysis",
+    "program": "test",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "ncrna_tool": "cmscan",
+  "ncrna_features": [
+    {
+      "seq_region": "chr1",
+      "seq_region_start": 109,
+      "seq_region_end": 119,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/agp/test.agp b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/test.agp
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/agp/test.agp
rename to modules/ensembl/features/combine_json/tests/data/ncrna/agp/test.agp
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/.DS_Store b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..fe6726456fbd1a5c01fb943de0bb1315c8847206
GIT binary patch
literal 6148
zcmeHKJ5Iwu5S>XZWRxZ)<%)tnH!zVoK`wwoB!U!UOCcSFkH9Io0R<H$*W%4b!H$XO
z5Q=7`*|(mZS&!e^o?Rm1`EE5Mni5eC6&#(T`9Wk{bRZ+Q$RO(+Pqf|K&lY!e)eOAD
z@E;kFwL7B+dZdPSG+4jv`Eb0d%W}D?m$2uT?{6>b*U#&r&+1p7X5D>H5~e^NwdSCi
z^xvcL)!}$IkI6?TkIo!b{hh}#pMD<l9fSk}!9Xx@@(iG6i{ul-XoG=ZAQ;#)ApeI1
z6)YTEL;ZE2(IWsbgINWA-9rJB7=VRiYX}R3Efi>>>_-f?aM+Xk6^^Z;g%j(YF^}Jw
z{e;54gX5&`#D!tB!9Xz3XJBlXQ>p(q$It)$ptuSKf`OA_fU|N@&hbcITL%v(wKhR-
pp(5hf8ty~T(NYXwDa99171)z>fQ4gg2n!@O0!l+P!N8v~@C8=rP3-^x

literal 0
HcmV?d00001

diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/a.json b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/a.json
new file mode 100644
index 0000000..82f7bb1
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/a.json
@@ -0,0 +1,27 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "cmscan",
+    "display_label": "cmscan",
+    "description": "cmscan analysis",
+    "program": "test",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "ncrna_tool": "cmscan",
+  "ncrna_features": [
+    {
+      "seq_region": "chr1_1",
+      "seq_region_start": 1,
+      "seq_region_end": 3,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    }
+  ]
+}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/b.json b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/b.json
new file mode 100644
index 0000000..d6c2349
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/b.json
@@ -0,0 +1,27 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "cmscan",
+    "display_label": "cmscan",
+    "description": "cmscan analysis",
+    "program": "test",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "ncrna_tool": "cmscan",
+  "ncrna_features": [
+    {
+      "seq_region": "chr1_4",
+      "seq_region_start": 1,
+      "seq_region_end": 2,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/header/manifest.txt b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/manifest.txt
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/header/manifest.txt
rename to modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/manifest.txt
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/output/test.features.json
new file mode 100644
index 0000000..995f408
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/output/test.features.json
@@ -0,0 +1,37 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "cmscan",
+    "display_label": "cmscan",
+    "description": "cmscan analysis",
+    "program": "test",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "ncrna_tool": "cmscan",
+  "ncrna_features": [
+    {
+      "seq_region": "chr1",
+      "seq_region_start": 1,
+      "seq_region_end": 3,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    },
+    {
+      "seq_region": "chr1",
+      "seq_region_start": 4,
+      "seq_region_end": 5,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    }
+  ]
+}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/header/.DS_Store b/modules/ensembl/features/combine_json/tests/data/ncrna/header/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..34af3ace395b1fabfb7621ad8be3aaa77d8fada9
GIT binary patch
literal 6148
zcmeHKOHRW;47J+`1$EOU%UQAS4MG)8&<mgvi6S*55_F#}D_C#_ZonD17SA7*q@iMm
zP-RP=H}TjL=S>pN5D_nT)q-eFL^)J&a*pN)k#W(HOxz-ataCik<MwW`x^3!q<Q<0p
z$bhWf8Qs$ZwX~zr`ekp2<8@P(>us}!J)b>)yl&n<uf{&ruRiUj|DGgFfjk<`K{FY?
zN7Kv0@qQlFOD~V!9Ch=Z$1$H_9`YT81Ovf9FmUn=pk|BYGs9?ufnXpQ*fSvihXfTY
z96Lk(b)eBB05F4D1%2H^0h1Vjg=1$33xq8cXrb&!47PCCllv8pouP#j>zy%=-<kb{
z!oGv!r0&FpVYI<OFfe3bYL|1V|JTRQ|HGiT3I>9KlVX6ga#b$zNM2h<4=1%YL2sZU
n;@27OL(tJu3|}e57f==0lXQTEV`m5pBsKy{Lo~s_pEB?TBx_B{

literal 0
HcmV?d00001

diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/a.json b/modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/a.json
new file mode 100644
index 0000000..8cee59d
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/a.json
@@ -0,0 +1,27 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "cmscan",
+    "display_label": "cmscan",
+    "description": "cmscan analysis",
+    "program": "test",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "ncrna_tool": "cmscan",
+  "ncrna_features": [
+    {
+      "seq_region": "chr1_chunk_start_1",
+      "seq_region_start": 1,
+      "seq_region_end": 3,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    }
+  ]
+}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/b.json b/modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/b.json
new file mode 100644
index 0000000..a76e76e
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/b.json
@@ -0,0 +1,27 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "cmscan",
+    "display_label": "cmscan",
+    "description": "cmscan analysis",
+    "program": "test",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "ncrna_tool": "cmscan",
+  "ncrna_features": [
+    {
+      "seq_region": "chr1_chunk_start_4",
+      "seq_region_start": 1,
+      "seq_region_end": 2,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    }
+  ]
+}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/header/manifest.txt b/modules/ensembl/features/combine_json/tests/data/ncrna/header/manifest.txt
new file mode 100644
index 0000000..cada44b
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/header/manifest.txt
@@ -0,0 +1,2 @@
+inputs/a.json
+inputs/b.json
\ No newline at end of file
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/header/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/ncrna/header/output/test.features.json
new file mode 100644
index 0000000..995f408
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/header/output/test.features.json
@@ -0,0 +1,37 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "cmscan",
+    "display_label": "cmscan",
+    "description": "cmscan analysis",
+    "program": "test",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "ncrna_tool": "cmscan",
+  "ncrna_features": [
+    {
+      "seq_region": "chr1",
+      "seq_region_start": 1,
+      "seq_region_end": 3,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    },
+    {
+      "seq_region": "chr1",
+      "seq_region_start": 4,
+      "seq_region_end": 5,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    }
+  ]
+}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/order/.DS_Store b/modules/ensembl/features/combine_json/tests/data/ncrna/order/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..66ff2cf9ee6c96bb1913fed247a844fe61ae8b34
GIT binary patch
literal 6148
zcmeHKJ5Iwu5S>XZWJHsaax41Wz(nQ*xc~~02vUMALHez!C^!Q*;0&}>oQF3b1v@69
zLnxY&X5V^tW<7pudv=M4XWQkJXhK9KRB&{P<_D2+(SeNIB7>}RJkrDZb~?XlHtoPW
z4F8b<S-TUuqkC#;OM~?*UiQZ~O<gb6%>wo^f4n`fUf(Z=KFeQy+Ew>GNtgn8G@65E
z(tnS}7yIMgJf1!~d35I3G~am~^6BRx-$6()5DWwZN6!Fiwn%wq7;P{R3<Lu^2IT*c
zpn{cSHq>7S8a)C43z${V*F6+4i2+zSW<yvYY@t94Wj|uDg~OiQuX4<W7EY{p#yoy!
z_7e*G4vv$$6IX`O1_QxBpMkMmPNe={9X|i}gW@U}2nLRd0WRu!J;Nh;Z5=$E)Y=5S
phKh(^Hr$1vqoo+WQi{)^DzGQ%04v9A2n!@O0!l+P!N8v~@CEkLP#XXM

literal 0
HcmV?d00001

diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/01.json b/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/01.json
new file mode 100644
index 0000000..8cee59d
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/01.json
@@ -0,0 +1,27 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "cmscan",
+    "display_label": "cmscan",
+    "description": "cmscan analysis",
+    "program": "test",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "ncrna_tool": "cmscan",
+  "ncrna_features": [
+    {
+      "seq_region": "chr1_chunk_start_1",
+      "seq_region_start": 1,
+      "seq_region_end": 3,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    }
+  ]
+}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/02.json b/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/02.json
new file mode 100644
index 0000000..a76e76e
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/02.json
@@ -0,0 +1,27 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "cmscan",
+    "display_label": "cmscan",
+    "description": "cmscan analysis",
+    "program": "test",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "ncrna_tool": "cmscan",
+  "ncrna_features": [
+    {
+      "seq_region": "chr1_chunk_start_4",
+      "seq_region_start": 1,
+      "seq_region_end": 2,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/order/manifest.txt b/modules/ensembl/features/combine_json/tests/data/ncrna/order/manifest.txt
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/order/manifest.txt
rename to modules/ensembl/features/combine_json/tests/data/ncrna/order/manifest.txt
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/order/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/ncrna/order/output/test.features.json
new file mode 100644
index 0000000..87be453
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/ncrna/order/output/test.features.json
@@ -0,0 +1,37 @@
+{
+  "analysis": {
+    "run_date": "2026-02-18T00:00:00Z",
+    "logic_name": "cmscan",
+    "display_label": "cmscan",
+    "description": "cmscan analysis",
+    "program": "test",
+    "program_version": "0.0"
+  },
+  "source": {
+    "source_provider": "prov",
+    "is_primary": true
+  },
+  "ncrna_tool": "cmscan",
+  "ncrna_features": [
+    {
+      "seq_region": "chr1",
+      "seq_region_start": 4,
+      "seq_region_end": 5,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    },
+    {
+      "seq_region": "chr1",
+      "seq_region_start": 1,
+      "seq_region_end": 3,
+      "seq_region_strand": 1,
+      "biotype": "miRNA",
+      "score": 1.0,
+      "target_name": "MIRTEST",
+      "is_significant": true
+    }
+  ]
+}
diff --git a/modules/ensembl/repeats/combine_json/tests/data/agp/inputs/in.json b/modules/ensembl/features/combine_json/tests/data/repeat/agp/inputs/in.json
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/agp/inputs/in.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/agp/inputs/in.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/custom_regex/manifest.txt b/modules/ensembl/features/combine_json/tests/data/repeat/agp/manifest.txt
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/custom_regex/manifest.txt
rename to modules/ensembl/features/combine_json/tests/data/repeat/agp/manifest.txt
diff --git a/modules/ensembl/repeats/combine_json/tests/data/agp/output/test.repeat.json b/modules/ensembl/features/combine_json/tests/data/repeat/agp/output/test.features.json
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/agp/output/test.repeat.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/agp/output/test.features.json
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/agp/test.agp b/modules/ensembl/features/combine_json/tests/data/repeat/agp/test.agp
new file mode 100644
index 0000000..86dddab
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/repeat/agp/test.agp
@@ -0,0 +1 @@
+chr1	100	199	1	W	comp1	1	100	+
diff --git a/modules/ensembl/repeats/combine_json/tests/data/custom_regex/inputs/in.json b/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/inputs/in.json
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/custom_regex/inputs/in.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/inputs/in.json
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/manifest.txt b/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/manifest.txt
new file mode 100644
index 0000000..1ac93e6
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/manifest.txt
@@ -0,0 +1 @@
+inputs/in.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/custom_regex/output/test.repeat.json b/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/output/test.features.json
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/custom_regex/output/test.repeat.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/output/test.features.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/header/inputs/a.json b/modules/ensembl/features/combine_json/tests/data/repeat/header/inputs/a.json
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/header/inputs/a.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/header/inputs/a.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/header/inputs/b.json b/modules/ensembl/features/combine_json/tests/data/repeat/header/inputs/b.json
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/header/inputs/b.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/header/inputs/b.json
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/header/manifest.txt b/modules/ensembl/features/combine_json/tests/data/repeat/header/manifest.txt
new file mode 100644
index 0000000..419c5fd
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/repeat/header/manifest.txt
@@ -0,0 +1,2 @@
+inputs/a.json
+inputs/b.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/header/output/test.repeat.json b/modules/ensembl/features/combine_json/tests/data/repeat/header/output/test.features.json
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/header/output/test.repeat.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/header/output/test.features.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/order/inputs/01.json b/modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/01.json
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/order/inputs/01.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/01.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/order/inputs/02.json b/modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/02.json
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/order/inputs/02.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/02.json
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/order/manifest.txt b/modules/ensembl/features/combine_json/tests/data/repeat/order/manifest.txt
new file mode 100644
index 0000000..dad42b0
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/data/repeat/order/manifest.txt
@@ -0,0 +1,2 @@
+inputs/02.json
+inputs/01.json
diff --git a/modules/ensembl/repeats/combine_json/tests/data/order/output/test.repeat.json b/modules/ensembl/features/combine_json/tests/data/repeat/order/output/test.features.json
similarity index 100%
rename from modules/ensembl/repeats/combine_json/tests/data/order/output/test.repeat.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/order/output/test.features.json
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test b/modules/ensembl/features/combine_json/tests/main.nf.test
new file mode 100644
index 0000000..a7490d8
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test
@@ -0,0 +1,280 @@
+// See the NOTICE file distributed with this work for additional information
+// regarding copyright ownership.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// nf-core modules test features/combine_json
+nextflow_process {
+
+    name "Test Process FEATURES_COMBINE_JSON"
+    script "../main.nf"
+    process "FEATURES_COMBINE_JSON"
+
+    tag "modules"
+    tag "modules_ensembl"
+    tag "features"
+    tag "features/combine_json"
+
+    test("Stub outputs: repeat header mode") {
+
+        when {
+            options "-stub"
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/repeat/header/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Stub outputs: ncRNA header mode") {
+
+        when {
+            options "-stub"
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/ncrna/header/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Stub outputs: repeat AGP mode") {
+
+        when {
+            options "-stub"
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/repeat/agp/manifest.txt'),
+                            file('${moduleDir}/tests/data/repeat/agp/test.agp')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Stub outputs: ncRNA AGP mode") {
+
+        when {
+            options "-stub"
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/ncrna/agp/manifest.txt'),
+                            file('${moduleDir}/tests/data/ncrna/agp/test.agp')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: repeat header combine + header-driven liftover") {
+
+        when {
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/repeat/header/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: ncRNA header combine + header-driven liftover") {
+
+        when {
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/ncrna/header/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: repeat AGP-driven liftover") {
+
+        when {
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/repeat/agp/manifest.txt'),
+                            file('${moduleDir}/tests/data/repeat/agp/test.agp')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: ncRNA AGP-driven liftover") {
+
+        when {
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/ncrna/agp/manifest.txt'),
+                            file('${moduleDir}/tests/data/ncrna/agp/test.agp')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: repeat custom chunk regex") {
+
+        when {
+            params.chunk_id_regex = '^(?P<base>.+)_(?P<start>\\d+)$'
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/repeat/custom_regex/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: ncRNA custom chunk regex") {
+
+        when {
+            // Matches the ncRNA custom_regex inputs: <base>_<start>
+            params.chunk_id_regex = '^(?P<base>.+)_(?P<start>\\d+)$'
+
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/ncrna/custom_regex/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: repeat manifest order is preserved") {
+
+        when {
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/repeat/order/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+
+    test("Real run: ncRNA manifest order is preserved") {
+
+        when {
+            process {
+                """
+                input[0] = [[ id:'test' ],
+                            file('${moduleDir}/tests/data/ncrna/order/manifest.txt'),
+                            file('${projectDir}/modules/assets/NO_FILE')]
+                """
+            }
+        }
+
+        then {
+            assert process.trace.tasks().size() == 1
+            assert process.out.combined_json.size() == 1
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+}
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test.snap b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
new file mode 100644
index 0000000..e61eea6
--- /dev/null
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
@@ -0,0 +1,488 @@
+{
+    "Real run: AGP-driven liftover": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T17:54:02.625791",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: ncRNA custom chunk regex": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T19:15:58.553743",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: header combine + header-driven liftover": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T17:54:00.401674",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: repeat custom chunk regex": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,f410544c71be74f7a8a7eab5e494b258"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,f410544c71be74f7a8a7eab5e494b258"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T18:38:58.589502",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: repeat manifest order is preserved": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,1b68c1371265dad11839769a5e776b33"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,1b68c1371265dad11839769a5e776b33"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T18:39:03.129965",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Stub outputs: ncRNA header mode": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,556a240063931bcbba8ee21d6efc373d"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,556a240063931bcbba8ee21d6efc373d"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T19:21:33.771238",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: ncRNA header combine + header-driven liftover": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T19:15:49.744214",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Stub outputs: ncRNA AGP mode": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,435c4d8f4008e57685ff951bbe81df0e"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,435c4d8f4008e57685ff951bbe81df0e"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T19:21:38.112104",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: ncRNA manifest order is preserved": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,c5b36cf499f0d111684f91372469154f"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,c5b36cf499f0d111684f91372469154f"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T19:16:02.962026",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Stub outputs: repeat AGP mode": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T19:21:35.954494",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Stub outputs: AGP mode": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T17:53:58.199351",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Stub outputs: header mode": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T17:53:56.112251",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: ncRNA AGP-driven liftover": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,837dcba57ebd00c1b8adbce528b8f1b0"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,837dcba57ebd00c1b8adbce528b8f1b0"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T19:15:54.146861",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: repeat header combine + header-driven liftover": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T18:38:49.606314",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: custom chunk regex": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,f410544c71be74f7a8a7eab5e494b258"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,f410544c71be74f7a8a7eab5e494b258"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T17:54:04.861554",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Stub outputs: repeat header mode": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T19:21:31.584701",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: manifest order is preserved": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,1b68c1371265dad11839769a5e776b33"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,1b68c1371265dad11839769a5e776b33"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T17:54:07.074875",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Real run: repeat AGP-driven liftover": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T18:38:54.140158",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/ensembl/repeats/combine_json/main.nf b/modules/ensembl/repeats/combine_json/main.nf
deleted file mode 100644
index ff2177b..0000000
--- a/modules/ensembl/repeats/combine_json/main.nf
+++ /dev/null
@@ -1,79 +0,0 @@
-// See the NOTICE file distributed with this work for additional information
-// regarding copyright ownership.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-process REPEATS_COMBINE_JSON {
-
-    tag "${meta.id}"
-    label 'process_medium'
-
-    conda "${moduleDir}/environment.yml"
-    container "ensemblorg/ensembl-genomio:v1.6.1"
-
-    input:
-        tuple val(meta), path(json_manifest), path(agp)
-
-    output:
-        tuple val(meta), path("${meta.id}.repeat.json"), emit: combined_json
-
-    script:
-        def args = []
-
-        if (params.chunk_id_regex) {
-            def rx = params.chunk_id_regex.replace("'", "'\"'\"'")
-            args << "--chunk-id-regex '${rx}'"
-        }
-
-        if (params.allow_revcomp) {
-            args << "--allow-revcomp"
-        }
-
-        def has_agp = agp && agp.baseName != 'NO_FILE'
-        if (has_agp) {
-            args << "--agp-file ${agp}"
-        }
-
-        def out_json = "${meta.id}.repeat.json"
-
-        """
-        python -m ensembl.io.genomio.repeats.combine_json \\
-            --json-manifest ${json_manifest} \\
-            --out-json ${out_json} \\
-            ${args.join(' ')}
-        """
-
-    stub:
-        """
-        set -euo pipefail
-
-        test_data_dir="${moduleDir}/tests/data"
-
-        out_json="${meta.id}.repeat.json"
-
-        test -s "${json_manifest}"
-
-        mode="header"
-        agp_path="${agp}"
-        agp_name="\${agp_path##*/}"
-        if [[ "\$agp_name" != "NO_FILE" ]]; then
-            mode="agp"
-        fi
-
-        # Provide a schema-valid combined JSON fixture.
-        # Arrange fixtures under:
-        #   tests/data/header/output/<id>.repeat.json
-        #   tests/data/agp/output/<id>.repeat.json
-        cp "\$test_data_dir/\$mode/output/${meta.id}.repeat.json" "\$out_json"
-        """
-}
diff --git a/modules/ensembl/repeats/combine_json/tests/main.nf.test b/modules/ensembl/repeats/combine_json/tests/main.nf.test
deleted file mode 100644
index 5a6eff4..0000000
--- a/modules/ensembl/repeats/combine_json/tests/main.nf.test
+++ /dev/null
@@ -1,153 +0,0 @@
-// See the NOTICE file distributed with this work for additional information
-// regarding copyright ownership.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// nf-core modules test repeats/combine_json
-nextflow_process {
-
-    name "Test Process REPEATS_COMBINE_JSON"
-    script "../main.nf"
-    process "REPEATS_COMBINE_JSON"
-
-    tag "modules"
-    tag "modules_ensembl"
-    tag "repeats"
-    tag "repeats/combine_json"
-
-    test("Stub outputs: header mode") {
-
-        when {
-            options "-stub"
-
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/header/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-    test("Stub outputs: AGP mode") {
-
-        when {
-            options "-stub"
-
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/agp/manifest.txt'),
-                            file('${moduleDir}/tests/data/agp/test.agp')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-    test("Real run: header combine + header-driven liftover") {
-
-        when {
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/header/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-    test("Real run: AGP-driven liftover") {
-
-        when {
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/agp/manifest.txt'),
-                            file('${moduleDir}/tests/data/agp/test.agp')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-    test("Real run: custom chunk regex") {
-
-        when {
-            params.chunk_id_regex = '^(?P<base>.+)_(?P<start>\\d+)$'
-
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/custom_regex/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-    test("Real run: manifest order is preserved") {
-
-        when {
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/order/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-}
diff --git a/modules/ensembl/repeats/combine_json/tests/main.nf.test.snap b/modules/ensembl/repeats/combine_json/tests/main.nf.test.snap
deleted file mode 100644
index b3fae3f..0000000
--- a/modules/ensembl/repeats/combine_json/tests/main.nf.test.snap
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "Real run: AGP-driven liftover": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,5fc5a0cd8050982334ada4bca1a55950"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,5fc5a0cd8050982334ada4bca1a55950"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-19T00:11:13.232239",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Stub outputs: AGP mode": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,5fc5a0cd8050982334ada4bca1a55950"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,5fc5a0cd8050982334ada4bca1a55950"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-19T00:11:08.721986",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: header combine + header-driven liftover": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-19T00:11:11.007889",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Stub outputs: header mode": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-19T00:11:06.662964",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: custom chunk regex": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,f410544c71be74f7a8a7eab5e494b258"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,f410544c71be74f7a8a7eab5e494b258"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-19T00:11:15.43463",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: manifest order is preserved": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,1b68c1371265dad11839769a5e776b33"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.repeat.json:md5,1b68c1371265dad11839769a5e776b33"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-19T00:11:17.627989",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    }
-}
\ No newline at end of file

From e5bdeb2cd2d5183190e28979222a3263b85f7911 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Mon, 23 Feb 2026 23:33:22 +0000
Subject: [PATCH 19/36] Naming update

---
 modules/ensembl/features/combine_json/main.nf |   6 +-
 .../ncrna/{header => seq_region}/.DS_Store    | Bin
 .../{header => seq_region}/inputs/a.json      |   0
 .../{header => seq_region}/inputs/b.json      |   0
 .../ncrna/{header => seq_region}/manifest.txt |   0
 .../output/test.features.json                 |   0
 .../{header => seq_region}/inputs/a.json      |   0
 .../{header => seq_region}/inputs/b.json      |   0
 .../{header => seq_region}/manifest.txt       |   0
 .../output/test.features.json                 |   0
 .../features/combine_json/tests/main.nf.test  |  16 +--
 .../combine_json/tests/main.nf.test.snap      | 108 ++++++++++++++++++
 12 files changed, 119 insertions(+), 11 deletions(-)
 rename modules/ensembl/features/combine_json/tests/data/ncrna/{header => seq_region}/.DS_Store (100%)
 rename modules/ensembl/features/combine_json/tests/data/ncrna/{header => seq_region}/inputs/a.json (100%)
 rename modules/ensembl/features/combine_json/tests/data/ncrna/{header => seq_region}/inputs/b.json (100%)
 rename modules/ensembl/features/combine_json/tests/data/ncrna/{header => seq_region}/manifest.txt (100%)
 rename modules/ensembl/features/combine_json/tests/data/ncrna/{header => seq_region}/output/test.features.json (100%)
 rename modules/ensembl/features/combine_json/tests/data/repeat/{header => seq_region}/inputs/a.json (100%)
 rename modules/ensembl/features/combine_json/tests/data/repeat/{header => seq_region}/inputs/b.json (100%)
 rename modules/ensembl/features/combine_json/tests/data/repeat/{header => seq_region}/manifest.txt (100%)
 rename modules/ensembl/features/combine_json/tests/data/repeat/{header => seq_region}/output/test.features.json (100%)

diff --git a/modules/ensembl/features/combine_json/main.nf b/modules/ensembl/features/combine_json/main.nf
index 18425e5..d895040 100644
--- a/modules/ensembl/features/combine_json/main.nf
+++ b/modules/ensembl/features/combine_json/main.nf
@@ -63,7 +63,7 @@ process FEATURES_COMBINE_JSON {
 
         test -s "${json_manifest}"
 
-        mode="header"
+        mode="seq_region"
         agp_path="${agp}"
         agp_name="\${agp_path##*/}"
         if [[ "\$agp_name" != "NO_FILE" ]]; then
@@ -99,9 +99,9 @@ process FEATURES_COMBINE_JSON {
 
         # Provide a schema-valid combined JSON fixture.
         # Fixtures are arranged under:
-        #   tests/data/repeat/header/output/<id>.features.json
+        #   tests/data/repeat/seq_region/output/<id>.features.json
         #   tests/data/repeat/agp/output/<id>.features.json
-        #   tests/data/ncrna/header/output/<id>.features.json
+        #   tests/data/ncrna/seq_region/output/<id>.features.json
         #   tests/data/ncrna/agp/output/<id>.features.json
         fixture="\$test_data_dir/\$load_type/\$mode/output/${meta.id}.features.json"
 
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/header/.DS_Store b/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/.DS_Store
similarity index 100%
rename from modules/ensembl/features/combine_json/tests/data/ncrna/header/.DS_Store
rename to modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/.DS_Store
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/a.json b/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/a.json
similarity index 100%
rename from modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/a.json
rename to modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/a.json
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/b.json b/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/b.json
similarity index 100%
rename from modules/ensembl/features/combine_json/tests/data/ncrna/header/inputs/b.json
rename to modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/b.json
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/header/manifest.txt b/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/manifest.txt
similarity index 100%
rename from modules/ensembl/features/combine_json/tests/data/ncrna/header/manifest.txt
rename to modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/manifest.txt
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/header/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/output/test.features.json
similarity index 100%
rename from modules/ensembl/features/combine_json/tests/data/ncrna/header/output/test.features.json
rename to modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/output/test.features.json
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/header/inputs/a.json b/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/a.json
similarity index 100%
rename from modules/ensembl/features/combine_json/tests/data/repeat/header/inputs/a.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/a.json
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/header/inputs/b.json b/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/b.json
similarity index 100%
rename from modules/ensembl/features/combine_json/tests/data/repeat/header/inputs/b.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/b.json
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/header/manifest.txt b/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/manifest.txt
similarity index 100%
rename from modules/ensembl/features/combine_json/tests/data/repeat/header/manifest.txt
rename to modules/ensembl/features/combine_json/tests/data/repeat/seq_region/manifest.txt
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/header/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/output/test.features.json
similarity index 100%
rename from modules/ensembl/features/combine_json/tests/data/repeat/header/output/test.features.json
rename to modules/ensembl/features/combine_json/tests/data/repeat/seq_region/output/test.features.json
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test b/modules/ensembl/features/combine_json/tests/main.nf.test
index a7490d8..360aa79 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test
@@ -25,7 +25,7 @@ nextflow_process {
     tag "features"
     tag "features/combine_json"
 
-    test("Stub outputs: repeat header mode") {
+    test("Stub outputs: repeat seq_region mode") {
 
         when {
             options "-stub"
@@ -33,7 +33,7 @@ nextflow_process {
             process {
                 """
                 input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/repeat/header/manifest.txt'),
+                            file('${moduleDir}/tests/data/repeat/seq_region/manifest.txt'),
                             file('${projectDir}/modules/assets/NO_FILE')]
                 """
             }
@@ -47,7 +47,7 @@ nextflow_process {
         }
     }
 
-    test("Stub outputs: ncRNA header mode") {
+    test("Stub outputs: ncRNA seq_region mode") {
 
         when {
             options "-stub"
@@ -55,7 +55,7 @@ nextflow_process {
             process {
                 """
                 input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/ncrna/header/manifest.txt'),
+                            file('${moduleDir}/tests/data/ncrna/seq_region/manifest.txt'),
                             file('${projectDir}/modules/assets/NO_FILE')]
                 """
             }
@@ -113,13 +113,13 @@ nextflow_process {
         }
     }
 
-    test("Real run: repeat header combine + header-driven liftover") {
+    test("Real run: repeat seq_region combine + seq_region-driven liftover") {
 
         when {
             process {
                 """
                 input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/repeat/header/manifest.txt'),
+                            file('${moduleDir}/tests/data/repeat/seq_region/manifest.txt'),
                             file('${projectDir}/modules/assets/NO_FILE')]
                 """
             }
@@ -133,13 +133,13 @@ nextflow_process {
         }
     }
 
-    test("Real run: ncRNA header combine + header-driven liftover") {
+    test("Real run: ncRNA seq_region combine + seq_region-driven liftover") {
 
         when {
             process {
                 """
                 input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/ncrna/header/manifest.txt'),
+                            file('${moduleDir}/tests/data/ncrna/seq_region/manifest.txt'),
                             file('${projectDir}/modules/assets/NO_FILE')]
                 """
             }
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test.snap b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
index e61eea6..9ff7391 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test.snap
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
@@ -161,6 +161,60 @@
             "nextflow": "25.10.3"
         }
     },
+    "Real run: repeat seq_region combine + seq_region-driven liftover": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T23:31:17.929825",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
+    "Stub outputs: ncRNA seq_region mode": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,556a240063931bcbba8ee21d6efc373d"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,556a240063931bcbba8ee21d6efc373d"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T23:32:28.865106",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
     "Real run: ncRNA header combine + header-driven liftover": {
         "content": [
             {
@@ -188,6 +242,33 @@
             "nextflow": "25.10.3"
         }
     },
+    "Real run: ncRNA seq_region combine + seq_region-driven liftover": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T23:31:20.204864",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
     "Stub outputs: ncRNA AGP mode": {
         "content": [
             {
@@ -458,6 +539,33 @@
             "nextflow": "25.10.3"
         }
     },
+    "Stub outputs: repeat seq_region mode": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ],
+                "combined_json": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-02-23T23:32:26.754167",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.3"
+        }
+    },
     "Real run: repeat AGP-driven liftover": {
         "content": [
             {

From fda3137afa5f3992c23db47656e07ccf056e66ec Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Mon, 2 Mar 2026 23:23:36 +0000
Subject: [PATCH 20/36] Add version.yml to output

---
 modules/ensembl/fasta/recombine/main.nf          | 10 ++++++++++
 .../ensembl/fasta/recombine/tests/main.nf.test   |  8 ++++----
 modules/ensembl/fasta/split/main.nf              | 11 +++++++++++
 modules/ensembl/features/combine_json/main.nf    | 13 ++++++++++++-
 .../features/combine_json/tests/main.nf.test     | 16 ++++++++--------
 5 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 9a7fd97..01b53dc 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -26,6 +26,7 @@ process FASTA_RECOMBINE {
 
     output:
         tuple val(meta), path("${meta.id}.fa"), emit: recombined_fasta
+        path "versions.yml", emit: versions
 
     script:
         def args = []
@@ -51,6 +52,11 @@ process FASTA_RECOMBINE {
             --fasta-manifest ${fasta_manifest} \\
             --out-fasta ${out_fasta} \\
             ${args.join(' ')}
+
+        cat <<-END_VERSIONS > versions.yml
+        ${task.process}:
+        fasta_recombine: $(fasta_recombine --version 2>/dev/null | head -n 1)
+        END_VERSIONS
         """
 
     stub:
@@ -73,6 +79,10 @@ process FASTA_RECOMBINE {
 
         cp "\$test_data_dir/\$mode/output/${meta.id}.fa" "\$out_fasta"
         
+        cat <<-END_VERSIONS > versions.yml
+        ${task.process}:
+        fasta_recombine: $(fasta_recombine --version 2>/dev/null | head -n 1)
+        END_VERSIONS
         """
         
 }
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test b/modules/ensembl/fasta/recombine/tests/main.nf.test
index ef81bd9..91844ba 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test
@@ -35,7 +35,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                         file('${moduleDir}/tests/data/header/manifest.txt'),
-                        file('${projectDir}/modules/assets/NO_FILE')]
+                        file('${workflow.projectDir}/modules/assets/NO_FILE')]
 
                 """
             }
@@ -80,7 +80,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/header/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
+                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
@@ -124,7 +124,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/custom_regex/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
+                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
@@ -145,7 +145,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/order/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
+                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index 845628b..8b5a845 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -27,6 +27,7 @@ process FASTA_SPLIT {
     output:
         tuple val(meta), path("splits/**/*.fa"), emit: fastas
         tuple val(meta), path("splits/*.agp"), emit: agp, optional: true
+        path "versions.yml", emit: versions
 
     script:
         def args = []
@@ -72,6 +73,11 @@ process FASTA_SPLIT {
             --fasta-file ${fasta} \\
             --out-dir splits \\
             ${args.join(' ')}
+
+        cat <<-END_VERSIONS > versions.yml
+        ${task.process}:
+        fasta_split: $(fasta_split --version 2>/dev/null | head -n 1)
+        END_VERSIONS
         """
 
     stub:
@@ -93,6 +99,11 @@ process FASTA_SPLIT {
         if [[ "${params.write_agp ?: false}" == "true" ]]; then
             cp "\$test_data_dir/agp/test.agp" "splits/${meta.id}.agp"
         fi
+
+        cat <<-END_VERSIONS > versions.yml
+        ${task.process}:
+        fasta_split: $(fasta_split --version 2>/dev/null | head -n 1)
+        END_VERSIONS
         """
 
         
diff --git a/modules/ensembl/features/combine_json/main.nf b/modules/ensembl/features/combine_json/main.nf
index d895040..6d92340 100644
--- a/modules/ensembl/features/combine_json/main.nf
+++ b/modules/ensembl/features/combine_json/main.nf
@@ -26,6 +26,7 @@ process FEATURES_COMBINE_JSON {
 
     output:
         tuple val(meta), path("${meta.id}.features.json"), emit: combined_json
+        path "versions.yml", emit: versions
 
     script:
         def args = []
@@ -47,10 +48,15 @@ process FEATURES_COMBINE_JSON {
         def out_json = "${meta.id}.features.json"
 
         """
-        python -m ensembl.io.genomio.features.combine_json \\
+        features_combine_json \\
             --json-manifest '${json_manifest}' \\
             --out-json '${out_json}' \\
             ${args.join(' ')}
+
+        cat <<-END_VERSIONS > versions.yml
+        ${task.process}:
+        features_combine_json: $(features_combine_json --version 2>/dev/null | head -n 1)
+        END_VERSIONS
         """
 
     stub:
@@ -112,5 +118,10 @@ process FEATURES_COMBINE_JSON {
         fi
 
         cp "\$fixture" "\$out_json"
+
+        cat <<-END_VERSIONS > versions.yml
+        ${task.process}:
+        features_combine_json: $(features_combine_json --version 2>/dev/null | head -n 1)
+        END_VERSIONS
         """
 }
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test b/modules/ensembl/features/combine_json/tests/main.nf.test
index 360aa79..153f75b 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test
@@ -34,7 +34,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/repeat/seq_region/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
+                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
@@ -56,7 +56,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/ncrna/seq_region/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
+                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
@@ -120,7 +120,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/repeat/seq_region/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
+                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
@@ -140,7 +140,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/ncrna/seq_region/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
+                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
@@ -202,7 +202,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/repeat/custom_regex/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
+                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
@@ -225,7 +225,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/ncrna/custom_regex/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
+                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
@@ -245,7 +245,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/repeat/order/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
+                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }
@@ -265,7 +265,7 @@ nextflow_process {
                 """
                 input[0] = [[ id:'test' ],
                             file('${moduleDir}/tests/data/ncrna/order/manifest.txt'),
-                            file('${projectDir}/modules/assets/NO_FILE')]
+                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
                 """
             }
         }

From b1147731de77ff495a8555a4cb64e6d1dbd616fc Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Wed, 11 Mar 2026 09:34:57 +0000
Subject: [PATCH 21/36] Remove outdated files

---
 modules/ensembl/fasta/split/main.nf           |  13 +-
 .../ensembl/fasta/splitfasta/environment.yml  |   8 -
 modules/ensembl/fasta/splitfasta/main.nf      | 106 ----
 .../ensembl/fasta/splitfasta/split_fasta.py   | 462 ------------------
 .../fasta/splitfasta/tests/data/agp/test.agp  |   4 -
 .../fasta/splitfasta/tests/data/real/in.fa    |   6 -
 .../tests/data/splits/default/0/test.1.fa     |   4 -
 .../tests/data/splits/default/0/test.2.fa     |   2 -
 .../tests/data/splits/multi_dir/0/0/test.1.fa |   2 -
 .../tests/data/splits/multi_dir/0/1/test.2.fa |   2 -
 .../tests/data/splits/unique/0/test.0.1.fa    |   2 -
 .../tests/data/splits/unique/0/test.0.2.fa    |   2 -
 .../fasta/splitfasta/tests/main.nf.test       | 301 ------------
 .../fasta/splitfasta/tests/main.nf.test.snap  | 168 -------
 tests/config/nextflow.config                  |   2 +-
 tests/test_split_fasta.py                     | 144 ------
 16 files changed, 13 insertions(+), 1215 deletions(-)
 delete mode 100644 modules/ensembl/fasta/splitfasta/environment.yml
 delete mode 100644 modules/ensembl/fasta/splitfasta/main.nf
 delete mode 100644 modules/ensembl/fasta/splitfasta/split_fasta.py
 delete mode 100644 modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp
 delete mode 100644 modules/ensembl/fasta/splitfasta/tests/data/real/in.fa
 delete mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa
 delete mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa
 delete mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa
 delete mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa
 delete mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa
 delete mode 100644 modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa
 delete mode 100644 modules/ensembl/fasta/splitfasta/tests/main.nf.test
 delete mode 100644 modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
 delete mode 100644 tests/test_split_fasta.py

diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index 8b5a845..8cc900b 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -13,6 +13,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+def fasta_split_mem(longest_seq_bp) {
+    if( !longest_seq_bp || longest_seq_bp <= 0 ) return 8.GB
+
+    // Heuristic: ~2.5 bytes/base peak => ~1 GB per 400 Mbp of the *longest* sequence
+    // Add 2GB base memory to account for overhead
+    def mem_gb = 2 + Math.ceil(longest_seq_bp as double / 400_000_000d)
+    return mem_gb.GB
+}
+
 process FASTA_SPLIT {
 
     tag "${meta.id}"
@@ -21,8 +30,10 @@ process FASTA_SPLIT {
     conda "${moduleDir}/environment.yml"
     container "ensemblorg/ensembl-genomio:v1.6.1"
 
+    memory { fasta_split_mem(longest_seq_bp) }
+
     input:
-        tuple val(meta), path(fasta)
+        tuple val(meta), path(fasta), val(longest_seq_bp)
 
     output:
         tuple val(meta), path("splits/**/*.fa"), emit: fastas
diff --git a/modules/ensembl/fasta/splitfasta/environment.yml b/modules/ensembl/fasta/splitfasta/environment.yml
deleted file mode 100644
index 759f3da..0000000
--- a/modules/ensembl/fasta/splitfasta/environment.yml
+++ /dev/null
@@ -1,8 +0,0 @@
----
-name: "fasta_splitfasta"
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - python=3.11.7
-  - biopython=1.86
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/main.nf b/modules/ensembl/fasta/splitfasta/main.nf
deleted file mode 100644
index 0a8b761..0000000
--- a/modules/ensembl/fasta/splitfasta/main.nf
+++ /dev/null
@@ -1,106 +0,0 @@
-// See the NOTICE file distributed with this work for additional information
-// regarding copyright ownership.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-process FASTA_SPLITFASTA {
-
-    tag "${meta.id}"
-    label 'process_low'
-
-    publishDir "${params.outdir ?: '.'}", mode: 'copy'
-
-    input:
-        tuple val(meta), path(fasta)
-
-    output:
-        tuple val(meta), path("**/*.fa"), emit: fasta
-        tuple val(meta), path("*.agp"), emit: agp, optional: true
-
-    script:
-        def args = []
-
-        if (params.max_seqs_per_file) {
-            args << "--max-seqs-per-file ${params.max_seqs_per_file}"
-        }
-
-        if (params.max_seq_length_per_file) {
-            args << "--max-seq-length-per-file ${params.max_seq_length_per_file}"
-        }
-
-        if (params.min_chunk_length) {
-            args << "--min-chunk-length ${params.min_chunk_length}"
-        }
-
-        if (params.max_files_per_directory) {
-            args << "--max-files-per-directory ${params.max_files_per_directory}"
-        }
-
-        if (params.max_dirs_per_directory) {
-            args << "--max-dirs-per-directory ${params.max_dirs_per_directory}"
-        }
-
-        if (params.force_max_seq_length) {
-            args << "--force-max-seq-length"
-        }
-
-        if (params.write_agp) {
-            args << "--write-agp"
-        }
-
-        if (params.unique_file_names) {
-            args << "--unique-file-names"
-        }
-
-        if (params.delete_existing_files) {
-            args << "--delete-existing-files"
-        }
-
-        """
-        python \\
-            ${moduleDir}/split_fasta.py \\
-            --fasta-file \$PWD/${fasta} \\
-            --out-dir \$PWD \\
-            ${args.join(' ')}
-        """
-
-    stub:
-        """
-        set -euo pipefail
-
-        FIXTURE_DIR="${moduleDir}/tests/data"
-
-        LAYOUT="default"
-        if [[ "${params.unique_file_names ?: false}" == "true" ]]; then
-            LAYOUT="unique"
-        elif [[ -n "${params.max_dirs_per_directory ?: ''}" || -n "${params.max_files_per_directory ?: ''}" ]]; then
-        LAYOUT="multi_dir"
-        fi
-
-        mkdir -p splits
-        cp -R "\$FIXTURE_DIR/splits/\$LAYOUT/." "splits/"
-
-        find splits -type f -name 'test*.fa' | while read -r f; do
-            bn=\$(basename "\$f")
-            dir=\$(dirname "\$f")
-            new_bn="\${bn/test/${meta.id}}"
-            mv "\$f" "\${dir}/\${new_bn}"
-        done
-
-        if [[ "${params.write_agp ?: false}" == "true" ]]; then
-            cp "\$FIXTURE_DIR/agp/test.agp" "${meta.id}.agp"
-        fi
-        """
-
-        
-}
diff --git a/modules/ensembl/fasta/splitfasta/split_fasta.py b/modules/ensembl/fasta/splitfasta/split_fasta.py
deleted file mode 100644
index 164ec44..0000000
--- a/modules/ensembl/fasta/splitfasta/split_fasta.py
+++ /dev/null
@@ -1,462 +0,0 @@
-#!/usr/bin/env python3
-
-"""Split a FASTA file (possibly gzipped) into multiple smaller FASTA files."""
-
-import inspect
-import logging
-import shutil
-from pathlib import Path
-from typing import Optional, List, Set, Tuple
-
-from Bio import SeqIO
-from Bio.SeqRecord import SeqRecord
-
-try:
-    from ensembl.utils.archive import open_gz_file  # type: ignore
-except ImportError:
-    import gzip
-
-    def open_gz_file(path):
-        p = str(path)
-        return gzip.open(p, "rt") if p.endswith(".gz") else open(p, "rt")
-
-
-try:
-    from ensembl.utils.argparse import ArgumentParser  # type: ignore
-except ImportError:
-    from argparse import ArgumentParser
-
-try:
-    from ensembl.utils.logging import init_logging_with_args  # type: ignore
-except ImportError:
-    import logging
-
-    def init_logging_with_args(args):
-        level = getattr(args, "log_level", "INFO")
-        logging.basicConfig(level=level)
-
-
-class Params:
-    """Class to hold parameters for splitting FASTA files."""
-
-    def __init__(
-        self,
-        fasta_file: Path,
-        out_dir: Optional[Path] = None,
-        write_agp: bool = False,
-        max_seqs_per_file: Optional[int] = None,
-        max_seq_length_per_file: Optional[int] = None,
-        min_chunk_length: Optional[int] = None,
-        max_files_per_directory: Optional[int] = None,
-        max_dirs_per_directory: Optional[int] = None,
-        delete_existing_files: bool = False,
-        unique_file_names: bool = False,
-        delete_original_file: bool = False,
-        force_max_seq_length: bool = False,
-    ):
-        self.fasta_file = fasta_file
-        self.out_dir = out_dir if out_dir is not None else fasta_file.parent
-        self.write_agp = write_agp
-        self.max_seqs_per_file = max_seqs_per_file
-        self.max_seq_length_per_file = max_seq_length_per_file
-        self.min_chunk_length = min_chunk_length
-        self.max_files_per_directory = max_files_per_directory
-        self.max_dirs_per_directory = max_dirs_per_directory
-        self.delete_existing_files = delete_existing_files
-        self.unique_file_names = unique_file_names
-        self.delete_original_file = delete_original_file
-        self.force_max_seq_length = force_max_seq_length
-
-        self._validate_params()
-
-    def _validate_params(self) -> None:
-        if self.max_dirs_per_directory is not None and self.max_dirs_per_directory <= 0:
-            raise ValueError("--max-dirs-per-directory must be > 0 or None")
-        if (
-            self.max_files_per_directory is not None
-            and self.max_files_per_directory <= 0
-        ):
-            raise ValueError("--max-files-per-directory must be > 0 or None")
-        if self.max_seqs_per_file is not None and self.max_seqs_per_file <= 0:
-            raise ValueError("--max-seqs-per-file must be > 0 or None")
-        if (
-            self.max_seq_length_per_file is not None
-            and self.max_seq_length_per_file <= 0
-        ):
-            raise ValueError("--max-seq-length-per-file must be > 0 or None")
-        if self.min_chunk_length is not None:
-            if self.max_seq_length_per_file is None:
-                raise ValueError(
-                    "--min-chunk-length requires --max-seq-length-per-file"
-                )
-            if self.min_chunk_length <= 0:
-                raise ValueError("--min-chunk-length must be > 0")
-
-
-class OutputWriter:
-    """
-    Manages output file creation and counters, writing in a single pass.
-    Creates/cleans directories lazily as required.
-    """
-
-    def __init__(self, params: Params):
-        self.params = params
-        self.basename = (
-            params.fasta_file.name.removesuffix(".gz")
-            .removesuffix(".fa")
-            .removesuffix(".fasta")
-        )
-        self.agp_file = (
-            self.params.out_dir.joinpath(self.basename + ".agp")
-            if params.write_agp
-            else None
-        )
-        self.file_count = 0
-        self.record_count = 0
-        self.file_len = 0
-        self._fh = None
-        self._agp_fh = None
-        self._cleaned_dirs: Set[Path] = set()
-
-        self.open_new_file()
-
-    def _create_or_clean_dir(self, dir_path: Path) -> None:
-        try:
-            dir_path.mkdir(parents=True, exist_ok=True)
-            if self.params.delete_existing_files and dir_path not in self._cleaned_dirs:
-                for child in dir_path.iterdir():
-                    if child.is_dir():
-                        shutil.rmtree(child)
-                    else:
-                        child.unlink()
-                self._cleaned_dirs.add(dir_path)
-        except Exception:
-            logging.exception("Failed to prepare output directory '%s'", dir_path)
-            raise
-
-    def _get_subdir_path(self, dir_index: int) -> Path:
-        """Computes subdirectory path based on dir_index and max_dirs_per_directory."""
-        parts = []
-        max_dirs = self.params.max_dirs_per_directory
-        if max_dirs is None:
-            parts.append("1")
-        else:
-            current_index = dir_index
-            while current_index >= 0:
-                parts.append(f"{current_index % max_dirs}")
-                current_index = current_index // max_dirs - 1
-
-        parts.reverse()
-        return self.params.out_dir.joinpath(*parts)
-
-    def _get_file_and_dir_index(self) -> Tuple[int, int]:
-        """
-        Determines index of file and directory based on file count and max files per directory.
-        Returns (file_index, dir_index).
-        """
-        max_files = self.params.max_files_per_directory
-        if max_files is None:
-            return self.file_count, 0
-        adjusted_count = self.file_count - 1
-        return (adjusted_count % max_files + 1, adjusted_count // max_files)
-
-    def _get_path_for_next_file(self) -> Path:
-        """Computes path for the next output file."""
-        self.file_count += 1
-        file_index, dir_index = self._get_file_and_dir_index()
-        subdir_path = self._get_subdir_path(dir_index)
-        self._create_or_clean_dir(subdir_path)
-
-        if self.params.unique_file_names:
-            file_name = f"{self.basename}.{dir_index}.{file_index}.fa"
-        else:
-            file_name = f"{self.basename}.{file_index}.fa"
-        return subdir_path.joinpath(file_name)
-
-    def add_agp_entry(
-        self,
-        object_id: str,
-        start: int,
-        end: int,
-        part_nr: int,
-        part_id: str,
-        part_length: int,
-    ) -> None:
-        """Adds an entry to the AGP file."""
-        # AGP columns for WGS contig component type:
-        # object, object_beg, object_end, part_number, component_type,
-        # component_id, component_beg, component_end, orientation
-        if self._agp_fh is None:
-            return
-        try:
-            line = f"{object_id}\t{start}\t{end}\t{part_nr}\tW\t{part_id}\t1\t{part_length}\t+\n"
-            self._agp_fh.write(line)
-        except Exception:
-            logging.exception("Failed to write AGP entry for part '%s'", part_id)
-            raise
-
-    def create_agp_file(self) -> None:
-        """Creates the AGP file for recording sequence chunking."""
-        if self.agp_file is None:
-            return
-        try:
-            self.params.out_dir.mkdir(parents=True, exist_ok=True)
-            self._agp_fh = open(self.agp_file, "w")
-            self._agp_fh.write("# AGP-version 2.0\n")
-            logging.info("Created AGP file '%s'", self.agp_file)
-        except Exception:
-            logging.exception("Failed to open AGP file '%s'", self.agp_file)
-            raise
-
-    def open_new_file(self) -> None:
-        """Closes current file (if any) and opens a new output file."""
-        if self._fh is not None:
-            self._fh.close()
-
-        path = self._get_path_for_next_file()
-        try:
-            self._fh = open(path, "w")
-            logging.debug("Opened output file '%s'", path)
-        except Exception:
-            logging.exception("Failed to open output file '%s'", path)
-            raise
-        self.record_count = 0
-        self.file_len = 0
-
-    def write_record(self, record: SeqRecord) -> None:
-        """Writes a SeqRecord to the current output file."""
-        try:
-            SeqIO.write(record, self._fh, "fasta")
-            self.record_count += 1
-            self.file_len += len(record.seq)
-        except Exception:
-            logging.exception("Failed to write record '%s' to output file", record.id)
-            raise
-
-    def close(self) -> None:
-        if self._fh is not None:
-            self._fh.close()
-            self._fh = None
-        if self._agp_fh is not None:
-            self._agp_fh.close()
-            self._agp_fh = None
-
-
-def _get_param_defaults() -> dict:
-    """Retrieve default values for Params class attributes."""
-    signature = inspect.signature(Params.__init__)
-    defaults = {}
-    for name, param in signature.parameters.items():
-        if name != "self" and param.default is not inspect.Parameter.empty:
-            defaults[name] = param.default
-    return defaults
-
-
-def split_fasta(params: Params) -> None:
-    """Splits the input FASTA file into multiple smaller FASTA files, chunking long sequences if required."""
-    if not params.fasta_file.exists():
-        logging.error(
-            "DEBUG: fasta_file=%r resolved=%r cwd=%r",
-            str(params.fasta_file),
-            str(Path(params.fasta_file).resolve()),
-            str(Path.cwd()),
-        )
-        raise FileNotFoundError(f"Fasta file '{params.fasta_file}' does not exist")
-
-    # Do nothing if file size is 0
-    if params.fasta_file.stat().st_size == 0:
-        logging.info("Input FASTA '%s' is empty; nothing to do", params.fasta_file)
-        return
-
-    params.out_dir.mkdir(parents=True, exist_ok=True)
-
-    writer = OutputWriter(params)
-
-    try:
-        if params.write_agp:
-            writer.create_agp_file()
-
-        with open_gz_file(params.fasta_file) as fh:
-            for record in SeqIO.parse(fh, "fasta"):
-                seq_len = len(record.seq)
-                max_seq_len = params.max_seq_length_per_file
-                max_seqs = params.max_seqs_per_file
-
-                if max_seqs is not None and writer.record_count >= max_seqs:
-                    writer.open_new_file()
-
-                if max_seq_len is None or writer.file_len + seq_len <= max_seq_len:
-                    writer.write_record(record)
-                    if params.write_agp:
-                        writer.add_agp_entry(
-                            record.id, 1, seq_len, 1, record.id, seq_len
-                        )
-                    continue
-
-                if params.force_max_seq_length and seq_len > max_seq_len:
-                    starts = list(range(0, seq_len, max_seq_len))
-                    ends = [min(s + max_seq_len, seq_len) for s in starts]
-
-                    if params.min_chunk_length is not None and len(starts) > 1:
-                        last_chunk_len = ends[-1] - starts[-1]
-                        if last_chunk_len < params.min_chunk_length:
-                            logging.warning(
-                                "Length of last chunk of record '%s' is %d, lower than min_chunk_length: %d;"
-                                + "merging with previous chunk",
-                                record.id,
-                                last_chunk_len,
-                                params.min_chunk_length,
-                            )
-                            ends[-2] = seq_len
-                            starts.pop()
-                            ends.pop()
-
-                    for i, (start, end) in enumerate(zip(starts, ends), start=1):
-                        chunk_seq = record.seq[start:end]
-                        chunk_record = SeqRecord(
-                            chunk_seq,
-                            id=f"{record.id}_chunk_start_{start}",
-                            description=f"{record.description} (part {i})",
-                        )
-                        if writer.record_count > 0:
-                            writer.open_new_file()
-                        writer.write_record(chunk_record)
-
-                        if params.write_agp:
-                            writer.add_agp_entry(
-                                record.id,
-                                start + 1,
-                                end,
-                                i,
-                                chunk_record.id,
-                                len(chunk_seq),
-                            )
-                else:
-                    logging.warning(
-                        "Record '%s' length %d exceeds max_seq_length_per_file %d but chunking not enabled",
-                        record.id,
-                        seq_len,
-                        max_seq_len,
-                    )
-                    if writer.record_count > 0:
-                        writer.open_new_file()
-                    writer.write_record(record)
-                    if params.write_agp:
-                        writer.add_agp_entry(
-                            record.id, 1, seq_len, 1, record.id, seq_len
-                        )
-    except Exception:
-        logging.exception("Error processing FASTA file '%s'", params.fasta_file)
-        raise
-    finally:
-        writer.close()
-
-    if params.delete_original_file:
-        try:
-            params.fasta_file.unlink(missing_ok=True)
-        except Exception:
-            logging.warning(
-                "Failed to delete original FASTA file '%s'",
-                params.fasta_file,
-                exc_info=True,
-            )
-
-
-def parse_args(argv: Optional[List[str]] = None) -> Params:
-    defaults = _get_param_defaults()
-    parser = ArgumentParser(
-        description="Split a FASTA file into multiple FASTA files, optionally chunking long sequences."
-    )
-    parser.add_argument(
-        "--fasta-file",
-        type=Path,
-        required=True,
-        help="Input raw or compressed FASTA file containing sequences to split",
-    )
-    parser.add_argument(
-        "--out-dir",
-        type=Path,
-        help="Top-level output directory (default: input FASTA directory)",
-    )
-    parser.add_argument(
-        "--write-agp",
-        action="store_true",
-        help=f"Write AGP file describing the splits (default: {defaults['write_agp']})",
-    )
-    parser.add_argument(
-        "--max-seqs-per-file",
-        type=int,
-        help=f"Max records per output file (default: {defaults['max_seqs_per_file']})",
-    )
-    parser.add_argument(
-        "--max-seq-length-per-file",
-        type=int,
-        help=f"Max cumulative sequence length per output file (default: {defaults['max_seq_length_per_file']})",
-    )
-    parser.add_argument(
-        "--min-chunk-length",
-        type=int,
-        help=f"Minimum length of a chunk allowed as a remainder (default: {defaults['min_chunk_length']})",
-    )
-    parser.add_argument(
-        "--max-files-per-directory",
-        type=int,
-        help=f"Max files per directory before moving to next computed dir (default: {defaults['max_files_per_directory']})",
-    )
-    parser.add_argument(
-        "--max-dirs-per-directory",
-        type=int,
-        help=f"Max subdirectories per directory level (default: {defaults['max_dirs_per_directory']})",
-    )
-    parser.add_argument(
-        "--delete-existing-files",
-        action="store_true",
-        help=f"Delete existing files within computed output dirs (default: {defaults['delete_existing_files']})",
-    )
-    parser.add_argument(
-        "--unique-file-names",
-        action="store_true",
-        help=f"Make output file names unique across dirs by including dir_index (default: {defaults['unique_file_names']})",
-    )
-    parser.add_argument(
-        "--delete-original-file",
-        action="store_true",
-        help=f"Delete original input FASTA after splitting (default: {defaults['delete_original_file']})",
-    )
-    parser.add_argument(
-        "--force-max-seq-length",
-        action="store_true",
-        help=f"Chunk single sequences longer than max-seq-length-per-file (default: {defaults['force_max_seq_length']})",
-    )
-
-    args = parser.parse_args(argv)
-    init_logging_with_args(args)
-
-    params = Params(
-        fasta_file=args.fasta_file,
-        out_dir=args.out_dir,
-        write_agp=args.write_agp,
-        max_seqs_per_file=args.max_seqs_per_file,
-        max_seq_length_per_file=args.max_seq_length_per_file,
-        min_chunk_length=args.min_chunk_length,
-        max_files_per_directory=args.max_files_per_directory,
-        max_dirs_per_directory=args.max_dirs_per_directory,
-        delete_existing_files=args.delete_existing_files,
-        unique_file_names=args.unique_file_names,
-        delete_original_file=args.delete_original_file,
-        force_max_seq_length=args.force_max_seq_length,
-    )
-    return params
-
-
-def main(argv: Optional[List[str]] = None) -> None:
-    try:
-        params = parse_args(argv)
-        split_fasta(params)
-    except Exception:
-        logging.exception("Error processing FASTA file '%s'", params.fasta_file)
-        raise
-
-
-if __name__ == "__main__":
-    main()
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp b/modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp
deleted file mode 100644
index 46fc419..0000000
--- a/modules/ensembl/fasta/splitfasta/tests/data/agp/test.agp
+++ /dev/null
@@ -1,4 +0,0 @@
-# AGP-version 2.0
-seq1	1	10	1	W	seq1	1	10	+
-seq2	1	10	1	W	seq2	1	10	+
-seq3	1	11	1	W	seq3	1	11	+
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/real/in.fa b/modules/ensembl/fasta/splitfasta/tests/data/real/in.fa
deleted file mode 100644
index 3d3f65c..0000000
--- a/modules/ensembl/fasta/splitfasta/tests/data/real/in.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->seq1
-AAAAAAAAAA
->seq2
-CCCCCCCCCC
->seq3
-GGGGGGGGGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa
deleted file mode 100644
index 7abe938..0000000
--- a/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.1.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->seq1
-AAAAAAAAAA
->seq2
-CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa
deleted file mode 100644
index 6287efa..0000000
--- a/modules/ensembl/fasta/splitfasta/tests/data/splits/default/0/test.2.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq3
-GGGGGGGGGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa
deleted file mode 100644
index 9512f36..0000000
--- a/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/0/test.1.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1
-AAAAAAAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa
deleted file mode 100644
index 2f3b40f..0000000
--- a/modules/ensembl/fasta/splitfasta/tests/data/splits/multi_dir/0/1/test.2.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq2
-CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa
deleted file mode 100644
index 9512f36..0000000
--- a/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.1.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1
-AAAAAAAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa b/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa
deleted file mode 100644
index 2f3b40f..0000000
--- a/modules/ensembl/fasta/splitfasta/tests/data/splits/unique/0/test.0.2.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq2
-CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/splitfasta/tests/main.nf.test b/modules/ensembl/fasta/splitfasta/tests/main.nf.test
deleted file mode 100644
index 3db1283..0000000
--- a/modules/ensembl/fasta/splitfasta/tests/main.nf.test
+++ /dev/null
@@ -1,301 +0,0 @@
-// nf-core modules test fasta/splitfasta
-nextflow_process {
-
-    name "Test Process FASTA_SPLITFASTA"
-    script "../main.nf"
-    process "FASTA_SPLITFASTA"
-
-    tag "modules"
-    tag "modules_ensembl"
-    tag "fasta"
-    tag "fasta/splitfasta"
-
-
-    def real_fa = new File("modules/ensembl/fasta/splitfasta/tests/data/real/in.fa").canonicalFile
-
-    test("Stub outputs: default layout, no AGP") {
-
-        when {
-            options "-stub"
-
-            // Ensure params are set explicitly for this test
-            params.write_agp = false
-            params.unique_file_names = false
-            params.max_files_per_directory = null
-            params.max_dirs_per_directory  = null
-
-            process {
-                """
-                input[0] = [[ id:'test' ], file('dummy.fa')]
-                """
-            }
-        }
-
-        then {
-            assert snapshot(process.out).match()
-
-            // fasta: tuple(meta, fa_paths)
-            assert process.out.fasta != null
-            assert process.out.fasta.size() == 1
-
-            def fasta_out = process.out.fasta[0]
-            def meta = fasta_out[0]
-            def fas  = fasta_out[1]
-
-            assert meta.id == "test"
-            assert fas != null
-            assert fas.size() == 2
-
-            // agp: tuple(meta, agp_paths) optional -> should be absent
-            assert process.out.agp != null
-            assert process.out.agp.size() == 0
-
-            // Ensure FASTA parsing works (downstream contract)
-            def merged = fas
-                .collect { path(it).fasta }
-                .inject([:]) { acc, m -> acc + m }
-
-            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
-
-            assertAll(
-                { assert process.success }
-            )
-        }
-    }
-
-    test("Stub outputs: AGP optional output appears when enabled") {
-
-        when {
-            options "-stub"
-
-            params.write_agp = true
-            params.unique_file_names = false
-            params.max_files_per_directory = null
-            params.max_dirs_per_directory  = null
-
-            process {
-                """
-                input[0] = [[ id:'test' ], file('dummy.fa')]
-                """
-            }
-        }
-
-        then {
-            assert snapshot(process.out).match()
-
-            assert process.out.fasta.size() == 1
-            def fasta_out = process.out.fasta[0]
-            def fas = fasta_out[1]
-            assert fas.size() == 2
-
-            assert process.out.agp.size() == 1
-            def agp_out = process.out.agp[0]
-            def agp_meta = agp_out[0]
-            def agp = agp_out[1]
-            def agp_paths = agp instanceof List ? agp : [agp]
-            def agp_file = path(agp_paths[0]).toFile()
-
-            assert agp_meta.id == "test"
-            assert agp_paths.size() == 1
-            assert agp_file.name == "test.agp"
-
-            def agp_text = agp_file.text
-            assert agp_text.startsWith("# AGP-version 2.0")
-            assert agp_text.contains("seq1\t1\t10\t1\tW\tseq1\t1\t10\t+")
-            assert agp_text.contains("seq2\t1\t10\t1\tW\tseq2\t1\t10\t+")
-            assert agp_text.contains("seq3\t1\t11\t1\tW\tseq3\t1\t11\t+")
-
-            assertAll(
-                { assert process.success }
-            )
-        }
-    }
-
-    test("Stub outputs: unique_file_names contract") {
-
-        when {
-            options "-stub"
-
-            params.write_agp = false
-            params.unique_file_names = true
-            params.max_files_per_directory = null
-            params.max_dirs_per_directory  = null
-
-            process {
-                """
-                input[0] = [[ id:'test' ], file('dummy.fa')]
-                """
-            }
-        }
-
-        then {
-            assert snapshot(process.out).match()
-
-            def fasta_out = process.out.fasta[0]
-            def fas = fasta_out[1]
-
-            assert fas.size() == 2
-            assert process.out.agp.size() == 0
-
-            // Contract check: names match the unique fixture pattern
-            assert fas.collect { path(it).toFile().name }.sort() == ["test.0.1.fa", "test.0.2.fa"]
-
-            assertAll(
-                { assert process.success }
-            )
-        }
-    }
-
-    test("Stub outputs: nested directory layout contract") {
-
-        when {
-            options "-stub"
-
-            params.write_agp = false
-            params.unique_file_names = false
-
-            // Trigger stub's nested fixture selection
-            params.max_files_per_directory = 100
-            params.max_dirs_per_directory  = 100
-
-            process {
-                """
-                input[0] = [[ id:'test' ], file('dummy.fa')]
-                """
-            }
-        }
-
-        then {
-            assert snapshot(process.out).match()
-
-            def fastas = process.out.fasta[0][1]
-            assert fastas.size() == 2
-            assert process.out.agp.size() == 0
-
-            def rels = fastas.collect { path(it).toString() }
-            assert rels.any { it.contains("splits/0/0/") }
-            assert rels.any { it.contains("splits/0/1/") }
-
-            assertAll(
-                { assert process.success }
-            )
-        }
-    }
-
-    test("Real run: default behaviour produces FASTAs and no AGP") {
-
-        when {
-            params.write_agp = false
-            params.unique_file_names = false
-            params.max_seqs_per_file = null
-            params.max_seq_length_per_file = null
-            params.max_files_per_directory = null
-            params.max_dirs_per_directory  = null
-            params.force_max_seq_length = false
-
-            process {
-                """
-                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            assert process.out.fasta != null
-            assert process.out.fasta.size() == 1
-
-            def out = process.out.fasta[0]
-            def meta = out[0]
-            def fas  = out[1]
-
-            assert meta.id == "test"
-            def fas_list = (fas instanceof List) ? fas : [fas]
-            assert fas_list.size() >= 1
-
-            assert process.out.agp != null
-            assert process.out.agp.size() == 0
-
-            def merged = fas_list
-                .collect { path(it).fasta }
-                .inject([:]) { acc, m -> acc + m }
-
-            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
-        }
-    }
-
-    test("Real run: write_agp=true emits exactly one AGP file") {
-
-        when {
-            params.write_agp = true
-            params.unique_file_names = false
-            params.max_files_per_directory = null
-            params.max_dirs_per_directory  = null
-            params.max_seqs_per_file = null
-            params.max_seq_length_per_file = null
-            params.force_max_seq_length = false
-
-            process {
-                """
-                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            assert process.out.agp != null
-            assert process.out.agp.size() == 1
-
-            def agp_out = process.out.agp[0]
-            def agp_meta = agp_out[0]
-            def agp_val  = agp_out[1]
-
-            assert agp_meta.id == "test"
-
-            def agp_list = (agp_val instanceof List) ? agp_val : [agp_val]
-            assert agp_list.size() == 1
-
-            def agp_path = path(agp_list[0])
-            assert agp_path.fileName.toString().endsWith(".agp")
-
-            def agp_text = agp_path.toFile().text
-            assert agp_text.startsWith("# AGP-version 2.0")
-            assert agp_text.contains("seq1\t1\t10\t1\tW\tseq1\t1\t10\t+")
-            assert agp_text.contains("seq2\t1\t10\t1\tW\tseq2\t1\t10\t+")
-            assert agp_text.contains("seq3\t1\t11\t1\tW\tseq3\t1\t11\t+")
-        }
-    }
-
-    test("Real run: max_seqs_per_file=2 splits into 2 FASTA outputs") {
-
-        when {
-            params.write_agp = false
-            params.max_seqs_per_file = 2
-            params.unique_file_names = false
-            params.max_files_per_directory = null
-            params.max_dirs_per_directory  = null
-
-            process {
-                """
-                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            def fas = process.out.fasta[0][1]
-            assert fas.size() == 2
-
-            def merged = fas
-                .collect { path(it).fasta }
-                .inject([:]) { acc, m -> acc + m }
-
-            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
-        }
-    }
-}
diff --git a/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap b/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
deleted file mode 100644
index 3390583..0000000
--- a/modules/ensembl/fasta/splitfasta/tests/main.nf.test.snap
+++ /dev/null
@@ -1,168 +0,0 @@
-{
-    "Stub outputs: AGP optional output appears when enabled": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
-                        ]
-                    ]
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
-                    ]
-                ],
-                "agp": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
-                    ]
-                ],
-                "fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
-                        ]
-                    ]
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.3",
-            "nextflow": "25.10.3"
-        },
-        "timestamp": "2026-01-30T10:38:07.606463"
-    },
-    "Stub outputs: nested directory layout contract": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
-                        ]
-                    ]
-                ],
-                "1": [
-                    
-                ],
-                "agp": [
-                    
-                ],
-                "fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
-                        ]
-                    ]
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.3",
-            "nextflow": "25.10.3"
-        },
-        "timestamp": "2026-01-30T10:38:11.815126"
-    },
-    "Stub outputs: default layout, no AGP": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
-                        ]
-                    ]
-                ],
-                "1": [
-                    
-                ],
-                "agp": [
-                    
-                ],
-                "fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
-                        ]
-                    ]
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.3",
-            "nextflow": "25.10.3"
-        },
-        "timestamp": "2026-01-30T10:38:05.482323"
-    },
-    "Stub outputs: unique_file_names contract": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
-                        ]
-                    ]
-                ],
-                "1": [
-                    
-                ],
-                "agp": [
-                    
-                ],
-                "fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
-                        ]
-                    ]
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.3",
-            "nextflow": "25.10.3"
-        },
-        "timestamp": "2026-01-30T10:38:09.698407"
-    }
-}
\ No newline at end of file
diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config
index e4c8606..a527e1f 100644
--- a/tests/config/nextflow.config
+++ b/tests/config/nextflow.config
@@ -16,5 +16,5 @@
 includeConfig 'test_data.config'
 
 singularity {
-    enabled = true
+    enabled = false
 }
diff --git a/tests/test_split_fasta.py b/tests/test_split_fasta.py
deleted file mode 100644
index 8a48af2..0000000
--- a/tests/test_split_fasta.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# tests/test_split_fasta.py
-from pathlib import Path
-
-import pytest
-from Bio import SeqIO
-from Bio.Seq import Seq
-from Bio.SeqRecord import SeqRecord
-
-
-def write_fasta(path: Path, records):
-    with open(path, "w", encoding="utf-8", newline="\n") as fh:
-        SeqIO.write(records, fh, "fasta")
-
-
-def list_output_fastas(out_dir: Path):
-    return sorted(out_dir.rglob("*.fa"))
-
-
-def read_all_ids_from_fastas(out_dir: Path):
-    ids = []
-    for fa in list_output_fastas(out_dir):
-        with open(fa, "r", encoding="utf-8") as fh:
-            ids.extend([r.id for r in SeqIO.parse(fh, "fasta")])
-    return ids
-
-
-def parse_agp_lines(agp_path: Path):
-    lines = [l.rstrip("\n") for l in agp_path.read_text(encoding="utf-8").splitlines()]
-    lines = [l for l in lines if l and not l.startswith("#")]
-    return [l.split("\t") for l in lines]
-
-
-def test_no_agp_by_default(tmp_path: Path, split_fasta_module):
-    inp = tmp_path / "in.fa"
-    out = tmp_path / "out"
-    write_fasta(inp, [SeqRecord(Seq("ACGT"), id="seq1", description="")])
-
-    params = split_fasta_module.Params(
-        fasta_file=inp,
-        out_dir=out,
-        write_agp=False,
-    )
-    split_fasta_module.split_fasta(params)
-
-    assert not (out / "in.agp").exists()
-    assert len(list_output_fastas(out)) >= 1
-
-
-def test_split_by_max_seqs_per_file(tmp_path: Path, split_fasta_module):
-    inp = tmp_path / "in.fa"
-    out = tmp_path / "out"
-    recs = [
-        SeqRecord(Seq("A" * 10), id="s1", description=""),
-        SeqRecord(Seq("C" * 10), id="s2", description=""),
-        SeqRecord(Seq("G" * 10), id="s3", description=""),
-    ]
-    write_fasta(inp, recs)
-
-    params = split_fasta_module.Params(
-        fasta_file=inp,
-        out_dir=out,
-        max_seqs_per_file=2,
-        write_agp=False,
-    )
-    split_fasta_module.split_fasta(params)
-
-    fas = list_output_fastas(out)
-    assert len(fas) == 2
-    assert read_all_ids_from_fastas(out) == ["s1", "s2", "s3"]
-
-
-def test_chunk_merge_final_small_chunk_and_agp(tmp_path: Path, split_fasta_module):
-    """
-    seq_len=2100, max=1000 -> chunks [1000, 1000, 100]
-    min_chunk_length=200 -> final chunk merged -> [1000, 1100]
-    """
-    inp = tmp_path / "in.fa"
-    out = tmp_path / "out"
-    write_fasta(inp, [SeqRecord(Seq("A" * 2100), id="chr1", description="chr1")])
-
-    params = split_fasta_module.Params(
-        fasta_file=inp,
-        out_dir=out,
-        write_agp=True,
-        force_max_seq_length=True,
-        max_seq_length_per_file=1000,
-        min_chunk_length=200,
-        max_seqs_per_file=100000,  # avoid seq-count splitting interfering
-    )
-    split_fasta_module.split_fasta(params)
-
-    # 2 chunks expected after merge
-    assert read_all_ids_from_fastas(out) == [
-        "chr1_chunk_start_0",
-        "chr1_chunk_start_1000",
-    ]
-
-    agp = out / "in.agp"
-    assert agp.exists()
-
-    cols = parse_agp_lines(agp)
-    assert len(cols) == 2
-
-    # object, obj_beg, obj_end, part_no, type, comp_id, comp_beg, comp_end, orient
-    assert cols[0][0] == "chr1"
-    assert cols[0][1:4] == ["1", "1000", "1"]
-    assert cols[0][4] == "W"
-    assert cols[0][5] == "chr1_chunk_start_0"
-    assert cols[0][6:9] == ["1", "1000", "+"]
-
-    assert cols[1][0] == "chr1"
-    assert cols[1][1:4] == ["1001", "2100", "2"]
-    assert cols[1][4] == "W"
-    assert cols[1][5] == "chr1_chunk_start_1000"
-    assert cols[1][6:9] == ["1", "1100", "+"]
-
-
-def test_agp_part_numbers_restart_per_object(tmp_path: Path, split_fasta_module):
-    inp = tmp_path / "in.fa"
-    out = tmp_path / "out"
-    recs = [
-        SeqRecord(Seq("A" * 1200), id="obj1", description=""),
-        SeqRecord(Seq("C" * 1200), id="obj2", description=""),
-    ]
-    write_fasta(inp, recs)
-
-    params = split_fasta_module.Params(
-        fasta_file=inp,
-        out_dir=out,
-        write_agp=True,
-        force_max_seq_length=True,
-        max_seq_length_per_file=1000,
-        min_chunk_length=100,  # => 2 chunks each, no merge
-    )
-    split_fasta_module.split_fasta(params)
-
-    cols = parse_agp_lines(out / "in.agp")
-
-    by_obj = {}
-    for c in cols:
-        by_obj.setdefault(c[0], []).append(int(c[3]))
-
-    assert by_obj["obj1"] == [1, 2]
-    assert by_obj["obj2"] == [1, 2]

From 824066c1b8eceff48ef98eb67af437d9549c7707 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Wed, 11 Mar 2026 12:29:31 +0000
Subject: [PATCH 22/36] Remove use of test data

---
 modules/ensembl/fasta/recombine/main.nf       |  24 +-
 .../recombine/tests/data/agp/inputs/part1.fa  |   2 -
 .../recombine/tests/data/agp/inputs/part2.fa  |   2 -
 .../recombine/tests/data/agp/manifest.txt     |   2 -
 .../recombine/tests/data/agp/output/test.fa   |   2 -
 .../fasta/recombine/tests/data/agp/test.agp   |   3 -
 .../tests/data/custom_regex/inputs/seq1_1.fa  |   2 -
 .../tests/data/custom_regex/inputs/seq1_5.fa  |   2 -
 .../tests/data/custom_regex/manifest.txt      |   2 -
 .../tests/data/custom_regex/output/test.fa    |   2 -
 .../data/header/inputs/seq1_chunk_start_1.fa  |   2 -
 .../data/header/inputs/seq1_chunk_start_5.fa  |   2 -
 .../tests/data/header/inputs/seq2.fa          |   2 -
 .../recombine/tests/data/header/manifest.txt  |   3 -
 .../tests/data/header/output/test.fa          |   4 -
 .../tests/data/order/inputs/01_second.fa      |   2 -
 .../tests/data/order/inputs/02_first.fa       |   2 -
 .../recombine/tests/data/order/manifest.txt   |   2 -
 .../recombine/tests/data/order/output/test.fa |   4 -
 .../fasta/recombine/tests/main.nf.test        | 117 +---
 .../fasta/recombine/tests/main.nf.test.snap   | 124 +----
 modules/ensembl/fasta/split/main.nf           |  48 +-
 .../fasta/split/tests/data/agp/test.agp       |   4 -
 .../ensembl/fasta/split/tests/data/real/in.fa |   6 -
 .../tests/data/splits/default/0/test.1.fa     |   4 -
 .../tests/data/splits/default/0/test.2.fa     |   2 -
 .../tests/data/splits/multi_dir/0/0/test.1.fa |   2 -
 .../tests/data/splits/multi_dir/0/1/test.2.fa |   2 -
 .../tests/data/splits/unique/0/test.0.1.fa    |   2 -
 .../tests/data/splits/unique/0/test.0.2.fa    |   2 -
 .../ensembl/fasta/split/tests/main.nf.test    | 136 +----
 .../fasta/split/tests/main.nf.test.snap       |  68 ++-
 modules/ensembl/features/combine_json/main.nf |  52 +-
 .../tests/data/ncrna/agp/.DS_Store            | Bin 6148 -> 0 bytes
 .../tests/data/ncrna/agp/inputs/in.json       |  27 -
 .../tests/data/ncrna/agp/manifest.txt         |   1 -
 .../data/ncrna/agp/output/test.features.json  |  27 -
 .../tests/data/ncrna/agp/test.agp             |   1 -
 .../tests/data/ncrna/custom_regex/.DS_Store   | Bin 6148 -> 0 bytes
 .../data/ncrna/custom_regex/inputs/a.json     |  27 -
 .../data/ncrna/custom_regex/inputs/b.json     |  27 -
 .../data/ncrna/custom_regex/manifest.txt      |   2 -
 .../custom_regex/output/test.features.json    |  37 --
 .../tests/data/ncrna/order/.DS_Store          | Bin 6148 -> 0 bytes
 .../tests/data/ncrna/order/inputs/01.json     |  27 -
 .../tests/data/ncrna/order/inputs/02.json     |  27 -
 .../tests/data/ncrna/order/manifest.txt       |   2 -
 .../ncrna/order/output/test.features.json     |  37 --
 .../tests/data/ncrna/seq_region/.DS_Store     | Bin 6148 -> 0 bytes
 .../tests/data/ncrna/seq_region/inputs/a.json |  27 -
 .../tests/data/ncrna/seq_region/inputs/b.json |  27 -
 .../tests/data/ncrna/seq_region/manifest.txt  |   2 -
 .../seq_region/output/test.features.json      |  37 --
 .../tests/data/repeat/agp/inputs/in.json      |  34 --
 .../tests/data/repeat/agp/manifest.txt        |   1 -
 .../data/repeat/agp/output/test.features.json |  34 --
 .../tests/data/repeat/agp/test.agp            |   1 -
 .../data/repeat/custom_regex/inputs/in.json   |  34 --
 .../data/repeat/custom_regex/manifest.txt     |   1 -
 .../custom_regex/output/test.features.json    |  34 --
 .../tests/data/repeat/order/inputs/01.json    |  34 --
 .../tests/data/repeat/order/inputs/02.json    |  34 --
 .../tests/data/repeat/order/manifest.txt      |   2 -
 .../repeat/order/output/test.features.json    |  43 --
 .../data/repeat/seq_region/inputs/a.json      |  34 --
 .../data/repeat/seq_region/inputs/b.json      |  34 --
 .../tests/data/repeat/seq_region/manifest.txt |   2 -
 .../seq_region/output/test.features.json      |  43 --
 .../features/combine_json/tests/main.nf.test  | 221 ++------
 .../combine_json/tests/main.nf.test.snap      | 522 +-----------------
 70 files changed, 227 insertions(+), 1851 deletions(-)
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/agp/inputs/part1.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/agp/inputs/part2.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/agp/manifest.txt
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/agp/output/test.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/agp/test.agp
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_1.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_5.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/custom_regex/manifest.txt
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/custom_regex/output/test.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_1.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_5.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/header/inputs/seq2.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/header/manifest.txt
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/header/output/test.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/order/inputs/01_second.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/order/inputs/02_first.fa
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/order/manifest.txt
 delete mode 100644 modules/ensembl/fasta/recombine/tests/data/order/output/test.fa
 delete mode 100644 modules/ensembl/fasta/split/tests/data/agp/test.agp
 delete mode 100644 modules/ensembl/fasta/split/tests/data/real/in.fa
 delete mode 100644 modules/ensembl/fasta/split/tests/data/splits/default/0/test.1.fa
 delete mode 100644 modules/ensembl/fasta/split/tests/data/splits/default/0/test.2.fa
 delete mode 100644 modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/0/test.1.fa
 delete mode 100644 modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/1/test.2.fa
 delete mode 100644 modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.1.fa
 delete mode 100644 modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.2.fa
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/agp/.DS_Store
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/agp/inputs/in.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/agp/manifest.txt
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/agp/output/test.features.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/agp/test.agp
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/.DS_Store
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/a.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/b.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/manifest.txt
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/output/test.features.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/order/.DS_Store
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/01.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/02.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/order/manifest.txt
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/order/output/test.features.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/.DS_Store
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/a.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/b.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/manifest.txt
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/output/test.features.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/agp/inputs/in.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/agp/manifest.txt
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/agp/output/test.features.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/agp/test.agp
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/inputs/in.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/manifest.txt
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/output/test.features.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/01.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/02.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/order/manifest.txt
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/order/output/test.features.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/a.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/b.json
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/seq_region/manifest.txt
 delete mode 100644 modules/ensembl/features/combine_json/tests/data/repeat/seq_region/output/test.features.json

diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 01b53dc..057c98f 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -55,7 +55,7 @@ process FASTA_RECOMBINE {
 
         cat <<-END_VERSIONS > versions.yml
         ${task.process}:
-        fasta_recombine: $(fasta_recombine --version 2>/dev/null | head -n 1)
+        fasta_recombine: \$(fasta_recombine --version 2>/dev/null | head -n 1)
         END_VERSIONS
         """
 
@@ -63,26 +63,12 @@ process FASTA_RECOMBINE {
         """
         set -euo pipefail
 
-        test_data_dir="${moduleDir}/tests/data"
+        out_fa="${meta.id}.fa"
+        touch "\$out_fa"
 
-        out_fasta="${meta.id}.fa"
-
-        test -s "${fasta_manifest}"
-
-        mode="header"
-        agp_path="${agp}"
-        agp_name="\${agp_path##*/}"
-        if [[ "\$agp_name" != "NO_FILE" ]]; then
-            mode="agp"
-        fi
-
-
-        cp "\$test_data_dir/\$mode/output/${meta.id}.fa" "\$out_fasta"
-        
         cat <<-END_VERSIONS > versions.yml
         ${task.process}:
-        fasta_recombine: $(fasta_recombine --version 2>/dev/null | head -n 1)
+            fasta_recombine: stub
         END_VERSIONS
-        """
-        
+        """   
 }
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/inputs/part1.fa b/modules/ensembl/fasta/recombine/tests/data/agp/inputs/part1.fa
deleted file mode 100644
index dafb755..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/agp/inputs/part1.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->part1
-AAAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/inputs/part2.fa b/modules/ensembl/fasta/recombine/tests/data/agp/inputs/part2.fa
deleted file mode 100644
index 0fc377e..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/agp/inputs/part2.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->part2
-CCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/manifest.txt b/modules/ensembl/fasta/recombine/tests/data/agp/manifest.txt
deleted file mode 100644
index b128cbe..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/agp/manifest.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-inputs/part1.fa
-inputs/part2.fa
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/output/test.fa b/modules/ensembl/fasta/recombine/tests/data/agp/output/test.fa
deleted file mode 100644
index b53532e..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/agp/output/test.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1
-AAAAAACCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/agp/test.agp b/modules/ensembl/fasta/recombine/tests/data/agp/test.agp
deleted file mode 100644
index a73c8db..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/agp/test.agp
+++ /dev/null
@@ -1,3 +0,0 @@
-##agp-version 2.0
-seq1	1	6	1	W	part1	1	6	+
-seq1	7	12	2	W	part2	1	6	+
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_1.fa b/modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_1.fa
deleted file mode 100644
index 0af2767..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_1.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seqY_1
-CCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_5.fa b/modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_5.fa
deleted file mode 100644
index c722026..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/custom_regex/inputs/seq1_5.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seqY_5
-GGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/custom_regex/manifest.txt b/modules/ensembl/fasta/recombine/tests/data/custom_regex/manifest.txt
deleted file mode 100644
index a125950..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/custom_regex/manifest.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-inputs/seq1_1.fa
-inputs/seq1_5.fa
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/custom_regex/output/test.fa b/modules/ensembl/fasta/recombine/tests/data/custom_regex/output/test.fa
deleted file mode 100644
index 46d11a6..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/custom_regex/output/test.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1
-CCCCGGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_1.fa b/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_1.fa
deleted file mode 100644
index 17d88e1..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_1.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1_chunk_start_1
-AAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_5.fa b/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_5.fa
deleted file mode 100644
index b6646f2..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq1_chunk_start_5.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1_chunk_start_5
-CCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq2.fa b/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq2.fa
deleted file mode 100644
index 70d86fb..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/header/inputs/seq2.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq2
-GGGGTT
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/manifest.txt b/modules/ensembl/fasta/recombine/tests/data/header/manifest.txt
deleted file mode 100644
index a34084d..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/header/manifest.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-inputs/seq1_chunk_start_1.fa
-inputs/seq1_chunk_start_5.fa
-inputs/seq2.fa
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/header/output/test.fa b/modules/ensembl/fasta/recombine/tests/data/header/output/test.fa
deleted file mode 100644
index d3bbb3d..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/header/output/test.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->seq1
-AAAACCCC
->seq2
-GGGGTT
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/order/inputs/01_second.fa b/modules/ensembl/fasta/recombine/tests/data/order/inputs/01_second.fa
deleted file mode 100644
index d06c158..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/order/inputs/01_second.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->second second_record
-TTTT
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/order/inputs/02_first.fa b/modules/ensembl/fasta/recombine/tests/data/order/inputs/02_first.fa
deleted file mode 100644
index 1e20e1f..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/order/inputs/02_first.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->first first_record
-AAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/order/manifest.txt b/modules/ensembl/fasta/recombine/tests/data/order/manifest.txt
deleted file mode 100644
index dae8a10..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/order/manifest.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-inputs/02_first.fa
-inputs/01_second.fa
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/data/order/output/test.fa b/modules/ensembl/fasta/recombine/tests/data/order/output/test.fa
deleted file mode 100644
index b3b6d1e..0000000
--- a/modules/ensembl/fasta/recombine/tests/data/order/output/test.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->first first_record
-AAAA
->second second_record
-TTTT
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test b/modules/ensembl/fasta/recombine/tests/main.nf.test
index 91844ba..9a7a6c9 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test
@@ -25,7 +25,6 @@ nextflow_process {
     tag "fasta"
     tag "fasta/recombine"
 
-
     test("Stub outputs: header mode") {
 
         when {
@@ -33,10 +32,17 @@ nextflow_process {
 
             process {
                 """
-                input[0] = [[ id:'test' ],
-                        file('${moduleDir}/tests/data/header/manifest.txt'),
-                        file('${workflow.projectDir}/modules/assets/NO_FILE')]
+                def manifest = file("manifest.txt")
+                manifest.text = "x\\n"
+
+                def no_file = file("NO_FILE")
+                no_file.text = ""
 
+                input[0] = [
+                    [ id: 'test' ],
+                    manifest,
+                    no_file
+                ]
                 """
             }
         }
@@ -49,7 +55,6 @@ nextflow_process {
         }
     }
 
-
     test("Stub outputs: AGP mode") {
 
         when {
@@ -57,51 +62,16 @@ nextflow_process {
 
             process {
                 """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/agp/manifest.txt'),
-                            file('${moduleDir}/tests/data/agp/test.agp')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.recombined_fasta.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-
-    test("Real run: header recombination") {
-
-        when {
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/header/manifest.txt'),
-                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.recombined_fasta.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-
-    test("Real run: AGP recombination") {
-
-        when {
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/agp/manifest.txt'),
-                            file('${moduleDir}/tests/data/agp/test.agp')]
+                def manifest = file("manifest.txt")
+                manifest.text = "x\\n"
+
+                def agp = file("test.agp")
+                agp.text = ""
+                input[0] = [
+                    [ id: 'test' ],
+                    manifest,
+                    agp
+                ]
                 """
             }
         }
@@ -113,49 +83,4 @@ nextflow_process {
             assert snapshot(process.out).match()
         }
     }
-
-
-    test("Real run: custom chunk regex") {
-
-        when {
-            params.chunk_id_regex = '^(?P<base>.+)_(?P<start>\\d+)$'
-
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/custom_regex/manifest.txt'),
-                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.recombined_fasta.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-
-    test("Real run: manifest order is preserved") {
-
-        when {
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/order/manifest.txt'),
-                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.recombined_fasta.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-}
-
+}
\ No newline at end of file
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
index 3a27deb..3c98f07 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
@@ -7,74 +7,26 @@
                         {
                             "id": "test"
                         },
-                        "test.fa:md5,3ec81eef9dd73dc86ff01621dbacc7a0"
+                        "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
-                "recombined_fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.fa:md5,3ec81eef9dd73dc86ff01621dbacc7a0"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-18T23:12:05.089688",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: AGP recombination": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.fa:md5,f32bc79faea4bc05dd4675e0d4ededa1"
-                    ]
+                "1": [
+                    "versions.yml:md5,191cc20355b504364a619df6b4c639aa"
                 ],
                 "recombined_fasta": [
                     [
                         {
                             "id": "test"
                         },
-                        "test.fa:md5,f32bc79faea4bc05dd4675e0d4ededa1"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-18T23:12:09.601838",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: header recombination": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.fa:md5,700550164316730d1145b7bde2ae3eb7"
+                        "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
-                "recombined_fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.fa:md5,700550164316730d1145b7bde2ae3eb7"
-                    ]
+                "versions": [
+                    "versions.yml:md5,191cc20355b504364a619df6b4c639aa"
                 ]
             }
         ],
-        "timestamp": "2026-02-18T23:12:07.342405",
+        "timestamp": "2026-03-11T12:20:11.373089",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -88,74 +40,26 @@
                         {
                             "id": "test"
                         },
-                        "test.fa:md5,93d1870d020e197708753501e57db68f"
+                        "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
-                "recombined_fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.fa:md5,93d1870d020e197708753501e57db68f"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-18T23:12:03.015143",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: custom chunk regex": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.fa:md5,a589b60028be69f01622a61cc78fa1ae"
-                    ]
+                "1": [
+                    "versions.yml:md5,191cc20355b504364a619df6b4c639aa"
                 ],
                 "recombined_fasta": [
                     [
                         {
                             "id": "test"
                         },
-                        "test.fa:md5,a589b60028be69f01622a61cc78fa1ae"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-18T23:12:11.852053",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: manifest order is preserved": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.fa:md5,52fa2054da674f0a5ebc263e724cf4a4"
+                        "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
-                "recombined_fasta": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.fa:md5,52fa2054da674f0a5ebc263e724cf4a4"
-                    ]
+                "versions": [
+                    "versions.yml:md5,191cc20355b504364a619df6b4c639aa"
                 ]
             }
         ],
-        "timestamp": "2026-02-18T23:12:14.083842",
+        "timestamp": "2026-03-11T12:20:09.308095",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index 8cc900b..4a33e00 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -13,15 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-def fasta_split_mem(longest_seq_bp) {
-    if( !longest_seq_bp || longest_seq_bp <= 0 ) return 8.GB
-
-    // Heuristic: ~2.5 bytes/base peak => ~1 GB per 400 Mbp of the *longest* sequence
-    // Add 2GB base memory to account for overhead
-    def mem_gb = 2 + Math.ceil(longest_seq_bp as double / 400_000_000d)
-    return mem_gb.GB
-}
-
 process FASTA_SPLIT {
 
     tag "${meta.id}"
@@ -87,7 +78,7 @@ process FASTA_SPLIT {
 
         cat <<-END_VERSIONS > versions.yml
         ${task.process}:
-        fasta_split: $(fasta_split --version 2>/dev/null | head -n 1)
+        fasta_split: \$(fasta_split --version 2>/dev/null | head -n 1)
         END_VERSIONS
         """
 
@@ -95,8 +86,6 @@ process FASTA_SPLIT {
         """
         set -euo pipefail
 
-        test_data_dir="${moduleDir}/tests/data"
-
         layout="default"
         if [[ "${params.unique_file_names ?: false}" == "true" ]]; then
             layout="unique"
@@ -105,17 +94,42 @@ process FASTA_SPLIT {
         fi
 
         mkdir -p splits
-        cp -R "\$test_data_dir/splits/\$layout/." "splits/"
+
+        if [[ "\$layout" == "default" ]]; then
+            mkdir -p splits/0
+            touch splits/0/test.1.fa
+            touch splits/0/test.2.fa
+
+        elif [[ "\$layout" == "unique" ]]; then
+            mkdir -p splits/0
+            touch splits/0/test.0.1.fa
+            touch splits/0/test.0.2.fa
+
+        elif [[ "\$layout" == "multi_dir" ]]; then
+            mkdir -p splits/0/0
+            mkdir -p splits/0/1
+            touch splits/0/0/test.1.fa
+            touch splits/0/1/test.2.fa
+        fi
 
         if [[ "${params.write_agp ?: false}" == "true" ]]; then
-            cp "\$test_data_dir/agp/test.agp" "splits/${meta.id}.agp"
+            touch "splits/${meta.id}.agp"
         fi
 
         cat <<-END_VERSIONS > versions.yml
         ${task.process}:
-        fasta_split: $(fasta_split --version 2>/dev/null | head -n 1)
+            fasta_split: stub
         END_VERSIONS
-        """
+        """     
+}
+
+
+def fasta_split_mem(longest_seq_bp) {
+    if( !longest_seq_bp || longest_seq_bp <= 0 ) return 8.GB
 
-        
+    // Heuristic: ~2.5 bytes/base peak => ~1 GB per 400 Mbp of the *longest* sequence
+    // Add 2GB base memory to account for overhead
+    def mem_gb = 2 + Math.ceil(longest_seq_bp as double / 400_000_000d)
+    return mem_gb.GB
 }
+
diff --git a/modules/ensembl/fasta/split/tests/data/agp/test.agp b/modules/ensembl/fasta/split/tests/data/agp/test.agp
deleted file mode 100644
index 46fc419..0000000
--- a/modules/ensembl/fasta/split/tests/data/agp/test.agp
+++ /dev/null
@@ -1,4 +0,0 @@
-# AGP-version 2.0
-seq1	1	10	1	W	seq1	1	10	+
-seq2	1	10	1	W	seq2	1	10	+
-seq3	1	11	1	W	seq3	1	11	+
\ No newline at end of file
diff --git a/modules/ensembl/fasta/split/tests/data/real/in.fa b/modules/ensembl/fasta/split/tests/data/real/in.fa
deleted file mode 100644
index 3d3f65c..0000000
--- a/modules/ensembl/fasta/split/tests/data/real/in.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->seq1
-AAAAAAAAAA
->seq2
-CCCCCCCCCC
->seq3
-GGGGGGGGGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/split/tests/data/splits/default/0/test.1.fa b/modules/ensembl/fasta/split/tests/data/splits/default/0/test.1.fa
deleted file mode 100644
index 7abe938..0000000
--- a/modules/ensembl/fasta/split/tests/data/splits/default/0/test.1.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->seq1
-AAAAAAAAAA
->seq2
-CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/split/tests/data/splits/default/0/test.2.fa b/modules/ensembl/fasta/split/tests/data/splits/default/0/test.2.fa
deleted file mode 100644
index 6287efa..0000000
--- a/modules/ensembl/fasta/split/tests/data/splits/default/0/test.2.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq3
-GGGGGGGGGGG
\ No newline at end of file
diff --git a/modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/0/test.1.fa b/modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/0/test.1.fa
deleted file mode 100644
index 9512f36..0000000
--- a/modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/0/test.1.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1
-AAAAAAAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/1/test.2.fa b/modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/1/test.2.fa
deleted file mode 100644
index 2f3b40f..0000000
--- a/modules/ensembl/fasta/split/tests/data/splits/multi_dir/0/1/test.2.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq2
-CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.1.fa b/modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.1.fa
deleted file mode 100644
index 9512f36..0000000
--- a/modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.1.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1
-AAAAAAAAAA
\ No newline at end of file
diff --git a/modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.2.fa b/modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.2.fa
deleted file mode 100644
index 2f3b40f..0000000
--- a/modules/ensembl/fasta/split/tests/data/splits/unique/0/test.0.2.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq2
-CCCCCCCCCC
\ No newline at end of file
diff --git a/modules/ensembl/fasta/split/tests/main.nf.test b/modules/ensembl/fasta/split/tests/main.nf.test
index 37211ae..5aa3acf 100644
--- a/modules/ensembl/fasta/split/tests/main.nf.test
+++ b/modules/ensembl/fasta/split/tests/main.nf.test
@@ -26,14 +26,11 @@ nextflow_process {
     tag "fasta/split"
 
 
-    def real_fa = new File("modules/ensembl/fasta/split/tests/data/real/in.fa").canonicalFile
-
     test("Stub outputs: default layout, no AGP") {
 
         when {
             options "-stub"
 
-            // Ensure params are set explicitly for this test
             params.write_agp = false
             params.unique_file_names = false
             params.max_files_per_directory = null
@@ -49,7 +46,6 @@ nextflow_process {
         then {
             assert snapshot(process.out).match()
 
-            // fasta: tuple(meta, fa_paths)
             assert process.out.fastas != null
             assert process.out.fastas.size() == 1
 
@@ -60,18 +56,11 @@ nextflow_process {
             assert meta.id == "test"
             assert fas != null
             assert fas.size() == 2
+            assert fas.collect { path(it).toFile().name }.sort() == ["test.1.fa", "test.2.fa"]
 
-            // agp: tuple(meta, agp_paths) optional -> should be absent
             assert process.out.agp != null
             assert process.out.agp.size() == 0
 
-            // Ensure FASTA parsing works (downstream contract)
-            def merged = fas
-                .collect { path(it).fasta }
-                .inject([:]) { acc, m -> acc + m }
-
-            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
-
             assertAll(
                 { assert process.success }
             )
@@ -114,12 +103,6 @@ nextflow_process {
             assert agp_paths.size() == 1
             assert agp_file.name == "test.agp"
 
-            def agp_text = agp_file.text
-            assert agp_text.startsWith("# AGP-version 2.0")
-            assert agp_text.contains("seq1\t1\t10\t1\tW\tseq1\t1\t10\t+")
-            assert agp_text.contains("seq2\t1\t10\t1\tW\tseq2\t1\t10\t+")
-            assert agp_text.contains("seq3\t1\t11\t1\tW\tseq3\t1\t11\t+")
-
             assertAll(
                 { assert process.success }
             )
@@ -196,121 +179,4 @@ nextflow_process {
             )
         }
     }
-
-    test("Real run: default behaviour produces FASTAs and no AGP") {
-
-        when {
-            params.write_agp = false
-            params.unique_file_names = false
-            params.max_seqs_per_file = null
-            params.max_seq_length_per_file = null
-            params.max_files_per_directory = null
-            params.max_dirs_per_directory  = null
-            params.force_max_seq_length = false
-
-            process {
-                """
-                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            assert process.out.fastas != null
-            assert process.out.fastas.size() == 1
-
-            def out = process.out.fastas[0]
-            def meta = out[0]
-            def fas  = out[1]
-
-            assert meta.id == "test"
-            def fas_list = (fas instanceof List) ? fas : [fas]
-            assert fas_list.size() >= 1
-
-            assert process.out.agp != null
-            assert process.out.agp.size() == 0
-
-            def merged = fas_list
-                .collect { path(it).fasta }
-                .inject([:]) { acc, m -> acc + m }
-
-            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
-        }
-    }
-
-    test("Real run: write_agp=true emits exactly one AGP file") {
-
-        when {
-            params.write_agp = true
-            params.unique_file_names = false
-            params.max_files_per_directory = null
-            params.max_dirs_per_directory  = null
-            params.max_seqs_per_file = null
-            params.max_seq_length_per_file = null
-            params.force_max_seq_length = false
-
-            process {
-                """
-                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            assert process.out.agp != null
-            assert process.out.agp.size() == 1
-
-            def agp_out = process.out.agp[0]
-            def agp_meta = agp_out[0]
-            def agp_val  = agp_out[1]
-
-            assert agp_meta.id == "test"
-
-            def agp_list = (agp_val instanceof List) ? agp_val : [agp_val]
-            assert agp_list.size() == 1
-
-            def agp_path = path(agp_list[0])
-            assert agp_path.fileName.toString().endsWith(".agp")
-
-            def agp_text = agp_path.toFile().text
-            assert agp_text.startsWith("# AGP-version 2.0")
-            assert agp_text.contains("seq1\t1\t10\t1\tW\tseq1\t1\t10\t+")
-            assert agp_text.contains("seq2\t1\t10\t1\tW\tseq2\t1\t10\t+")
-            assert agp_text.contains("seq3\t1\t11\t1\tW\tseq3\t1\t11\t+")
-        }
-    }
-
-    test("Real run: max_seqs_per_file=2 splits into 2 FASTA outputs") {
-
-        when {
-            params.write_agp = false
-            params.max_seqs_per_file = 2
-            params.unique_file_names = false
-            params.max_files_per_directory = null
-            params.max_dirs_per_directory  = null
-
-            process {
-                """
-                input[0] = [[ id:'test' ], file('${real_fa.absolutePath}')]
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            def fas = process.out.fastas[0][1]
-            assert fas.size() == 2
-
-            def merged = fas
-                .collect { path(it).fasta }
-                .inject([:]) { acc, m -> acc + m }
-
-            assert merged.keySet().containsAll(["seq1", "seq2", "seq3"])
-        }
-    }
 }
diff --git a/modules/ensembl/fasta/split/tests/main.nf.test.snap b/modules/ensembl/fasta/split/tests/main.nf.test.snap
index eb12321..d736a2a 100644
--- a/modules/ensembl/fasta/split/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/split/tests/main.nf.test.snap
@@ -8,8 +8,8 @@
                             "id": "test"
                         },
                         [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                            "test.1.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.2.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
                 ],
@@ -18,15 +18,18 @@
                         {
                             "id": "test"
                         },
-                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
+                        "test.agp:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
+                "2": [
+                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
+                ],
                 "agp": [
                     [
                         {
                             "id": "test"
                         },
-                        "test.agp:md5,c12ac51bd2b1ca95cdd8f011eca0cd1c"
+                        "test.agp:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "fastas": [
@@ -35,14 +38,17 @@
                             "id": "test"
                         },
                         [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                            "test.1.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.2.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
+                ],
+                "versions": [
+                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
                 ]
             }
         ],
-        "timestamp": "2026-02-18T23:21:51.036982",
+        "timestamp": "2026-03-11T12:20:33.334793",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -57,13 +63,16 @@
                             "id": "test"
                         },
                         [
-                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                            "test.1.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.2.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
                 ],
                 "1": [
                     
+                ],
+                "2": [
+                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
                 ],
                 "agp": [
                     
@@ -74,14 +83,17 @@
                             "id": "test"
                         },
                         [
-                            "test.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                            "test.1.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.2.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
+                ],
+                "versions": [
+                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
                 ]
             }
         ],
-        "timestamp": "2026-02-18T23:06:24.284416",
+        "timestamp": "2026-03-11T12:20:37.504172",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -96,13 +108,16 @@
                             "id": "test"
                         },
                         [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                            "test.1.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.2.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
                 ],
                 "1": [
                     
+                ],
+                "2": [
+                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
                 ],
                 "agp": [
                     
@@ -113,14 +128,17 @@
                             "id": "test"
                         },
                         [
-                            "test.1.fa:md5,336490c5e8c624cb1ae29048f28f0978",
-                            "test.2.fa:md5,55d5ca305356033516f7ae1b5ecca900"
+                            "test.1.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.2.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
+                ],
+                "versions": [
+                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
                 ]
             }
         ],
-        "timestamp": "2026-02-18T23:06:18.00303",
+        "timestamp": "2026-03-11T12:20:31.268587",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -135,13 +153,16 @@
                             "id": "test"
                         },
                         [
-                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                            "test.0.1.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.0.2.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
                 ],
                 "1": [
                     
+                ],
+                "2": [
+                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
                 ],
                 "agp": [
                     
@@ -152,14 +173,17 @@
                             "id": "test"
                         },
                         [
-                            "test.0.1.fa:md5,41e176f082cc04841e50d8aa5c4f4d5a",
-                            "test.0.2.fa:md5,e3bd0305f6466c13a1479c6b82391e6f"
+                            "test.0.1.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.0.2.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
+                ],
+                "versions": [
+                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
                 ]
             }
         ],
-        "timestamp": "2026-02-18T23:06:22.194395",
+        "timestamp": "2026-03-11T12:20:35.403767",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
diff --git a/modules/ensembl/features/combine_json/main.nf b/modules/ensembl/features/combine_json/main.nf
index 6d92340..f6dd127 100644
--- a/modules/ensembl/features/combine_json/main.nf
+++ b/modules/ensembl/features/combine_json/main.nf
@@ -55,7 +55,7 @@ process FEATURES_COMBINE_JSON {
 
         cat <<-END_VERSIONS > versions.yml
         ${task.process}:
-        features_combine_json: $(features_combine_json --version 2>/dev/null | head -n 1)
+        features_combine_json: \$(features_combine_json --version 2>/dev/null | head -n 1)
         END_VERSIONS
         """
 
@@ -63,18 +63,12 @@ process FEATURES_COMBINE_JSON {
         """
         set -euo pipefail
 
-        test_data_dir="${moduleDir}/tests/data"
-
         out_json="${meta.id}.features.json"
 
         test -s "${json_manifest}"
 
-        mode="seq_region"
         agp_path="${agp}"
         agp_name="\${agp_path##*/}"
-        if [[ "\$agp_name" != "NO_FILE" ]]; then
-            mode="agp"
-        fi
 
         manifest_real="\$(python -c 'from pathlib import Path; import sys; print(Path(sys.argv[1]).resolve())' "${json_manifest}")"
         manifest_dir="\$(dirname "\$manifest_real")"
@@ -92,7 +86,6 @@ process FEATURES_COMBINE_JSON {
             exit 1
         fi
 
-        
         if grep -q '"ncrna_features"' "\$first_json"; then
             load_type="ncrna"
         elif grep -q '"repeat_features"' "\$first_json"; then
@@ -103,25 +96,38 @@ process FEATURES_COMBINE_JSON {
             exit 1
         fi
 
-        # Provide a schema-valid combined JSON fixture.
-        # Fixtures are arranged under:
-        #   tests/data/repeat/seq_region/output/<id>.features.json
-        #   tests/data/repeat/agp/output/<id>.features.json
-        #   tests/data/ncrna/seq_region/output/<id>.features.json
-        #   tests/data/ncrna/agp/output/<id>.features.json
-        fixture="\$test_data_dir/\$load_type/\$mode/output/${meta.id}.features.json"
-
-        if [[ ! -s "\$fixture" ]]; then
-            echo "ERROR: missing stub fixture: \$fixture" >&2
-            echo "Make sure you created output fixture for meta.id='${meta.id}' under \$load_type/\$mode/output/." >&2
-            exit 1
+        if [[ "\$load_type" == "repeat" ]]; then
+            cat > "\$out_json" <<-EOF
+{
+    "analysis": {
+        "logic_name": "stub_repeat"
+    },
+    "source": {
+        "source_provider": "stub"
+    },
+    "repeat_consensus": [],
+    "repeat_features": []
+}
+EOF
+        else
+            cat > "\$out_json" <<-EOF
+{
+    "analysis": {
+        "logic_name": "stub_ncrna"
+    },
+    "source": {
+        "source_provider": "stub"
+    },
+    "ncrna_tool": "stub",
+    "ncrna_features": []
+    }
+EOF
         fi
 
-        cp "\$fixture" "\$out_json"
-
         cat <<-END_VERSIONS > versions.yml
         ${task.process}:
-        features_combine_json: $(features_combine_json --version 2>/dev/null | head -n 1)
+        features_combine_json: stub
         END_VERSIONS
         """
+        
 }
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/.DS_Store b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/.DS_Store
deleted file mode 100644
index 46ebb6833f86b8f68ba1c38bb8339c81f2e59428..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKQBK1!40Xzciul<lpSi+LK$yx2b^$adF{G7LgZ<|SJHoyY7vKzBi)T9;?XrnK
zgeF^ZUh3G1`x4a+5xMDRF(DcgQ4M9RuFxD29%n72$`&4UGRGUeuAU~d$Jlm-w;BE;
z1H5;a^h__*(T0lst8Nd++t@VoRh+|KuXmr@<@?uN>9hFNr(5>V37<dY5hVx7WO$CM
za5&!2!+-Sh=*`i_pFB?a9LWPqnHU4cfH82^44`JS)YppE8Ux0FF|cQVzYiYD7&0bB
z`E;O>D*$i=vk3a~F9-B&0EUc75f+HkP@smoe8q4Y4!h5OA!Ab1aB{LT=E*C&d_!@v
zI_zHD$puAgjR9j|$iRg@j=BEdpT7SOgY3x|Fb2+w0XJ%9%@nVcYisdxTx&h_0m{OD
lNpT;7j+A2faw)!nioou<0}L6HA}kR55fB=zF$VsWfp4#8P9XpQ

diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/inputs/in.json b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/inputs/in.json
deleted file mode 100644
index d1701a4..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/inputs/in.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "cmscan",
-    "display_label": "cmscan",
-    "description": "cmscan analysis",
-    "program": "test",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "ncrna_tool": "cmscan",
-  "ncrna_features": [
-    {
-      "seq_region": "comp1",
-      "seq_region_start": 10,
-      "seq_region_end": 20,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/manifest.txt b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/manifest.txt
deleted file mode 100644
index 1ac93e6..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/manifest.txt
+++ /dev/null
@@ -1 +0,0 @@
-inputs/in.json
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/output/test.features.json
deleted file mode 100644
index 3479d90..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/output/test.features.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "cmscan",
-    "display_label": "cmscan",
-    "description": "cmscan analysis",
-    "program": "test",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "ncrna_tool": "cmscan",
-  "ncrna_features": [
-    {
-      "seq_region": "chr1",
-      "seq_region_start": 109,
-      "seq_region_end": 119,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/test.agp b/modules/ensembl/features/combine_json/tests/data/ncrna/agp/test.agp
deleted file mode 100644
index 86dddab..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/agp/test.agp
+++ /dev/null
@@ -1 +0,0 @@
-chr1	100	199	1	W	comp1	1	100	+
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/.DS_Store b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/.DS_Store
deleted file mode 100644
index fe6726456fbd1a5c01fb943de0bb1315c8847206..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKJ5Iwu5S>XZWRxZ)<%)tnH!zVoK`wwoB!U!UOCcSFkH9Io0R<H$*W%4b!H$XO
z5Q=7`*|(mZS&!e^o?Rm1`EE5Mni5eC6&#(T`9Wk{bRZ+Q$RO(+Pqf|K&lY!e)eOAD
z@E;kFwL7B+dZdPSG+4jv`Eb0d%W}D?m$2uT?{6>b*U#&r&+1p7X5D>H5~e^NwdSCi
z^xvcL)!}$IkI6?TkIo!b{hh}#pMD<l9fSk}!9Xx@@(iG6i{ul-XoG=ZAQ;#)ApeI1
z6)YTEL;ZE2(IWsbgINWA-9rJB7=VRiYX}R3Efi>>>_-f?aM+Xk6^^Z;g%j(YF^}Jw
z{e;54gX5&`#D!tB!9Xz3XJBlXQ>p(q$It)$ptuSKf`OA_fU|N@&hbcITL%v(wKhR-
pp(5hf8ty~T(NYXwDa99171)z>fQ4gg2n!@O0!l+P!N8v~@C8=rP3-^x

diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/a.json b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/a.json
deleted file mode 100644
index 82f7bb1..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/a.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "cmscan",
-    "display_label": "cmscan",
-    "description": "cmscan analysis",
-    "program": "test",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "ncrna_tool": "cmscan",
-  "ncrna_features": [
-    {
-      "seq_region": "chr1_1",
-      "seq_region_start": 1,
-      "seq_region_end": 3,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/b.json b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/b.json
deleted file mode 100644
index d6c2349..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/inputs/b.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "cmscan",
-    "display_label": "cmscan",
-    "description": "cmscan analysis",
-    "program": "test",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "ncrna_tool": "cmscan",
-  "ncrna_features": [
-    {
-      "seq_region": "chr1_4",
-      "seq_region_start": 1,
-      "seq_region_end": 2,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/manifest.txt b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/manifest.txt
deleted file mode 100644
index 419c5fd..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/manifest.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-inputs/a.json
-inputs/b.json
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/output/test.features.json
deleted file mode 100644
index 995f408..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/custom_regex/output/test.features.json
+++ /dev/null
@@ -1,37 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "cmscan",
-    "display_label": "cmscan",
-    "description": "cmscan analysis",
-    "program": "test",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "ncrna_tool": "cmscan",
-  "ncrna_features": [
-    {
-      "seq_region": "chr1",
-      "seq_region_start": 1,
-      "seq_region_end": 3,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    },
-    {
-      "seq_region": "chr1",
-      "seq_region_start": 4,
-      "seq_region_end": 5,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/order/.DS_Store b/modules/ensembl/features/combine_json/tests/data/ncrna/order/.DS_Store
deleted file mode 100644
index 66ff2cf9ee6c96bb1913fed247a844fe61ae8b34..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKJ5Iwu5S>XZWJHsaax41Wz(nQ*xc~~02vUMALHez!C^!Q*;0&}>oQF3b1v@69
zLnxY&X5V^tW<7pudv=M4XWQkJXhK9KRB&{P<_D2+(SeNIB7>}RJkrDZb~?XlHtoPW
z4F8b<S-TUuqkC#;OM~?*UiQZ~O<gb6%>wo^f4n`fUf(Z=KFeQy+Ew>GNtgn8G@65E
z(tnS}7yIMgJf1!~d35I3G~am~^6BRx-$6()5DWwZN6!Fiwn%wq7;P{R3<Lu^2IT*c
zpn{cSHq>7S8a)C43z${V*F6+4i2+zSW<yvYY@t94Wj|uDg~OiQuX4<W7EY{p#yoy!
z_7e*G4vv$$6IX`O1_QxBpMkMmPNe={9X|i}gW@U}2nLRd0WRu!J;Nh;Z5=$E)Y=5S
phKh(^Hr$1vqoo+WQi{)^DzGQ%04v9A2n!@O0!l+P!N8v~@CEkLP#XXM

diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/01.json b/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/01.json
deleted file mode 100644
index 8cee59d..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/01.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "cmscan",
-    "display_label": "cmscan",
-    "description": "cmscan analysis",
-    "program": "test",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "ncrna_tool": "cmscan",
-  "ncrna_features": [
-    {
-      "seq_region": "chr1_chunk_start_1",
-      "seq_region_start": 1,
-      "seq_region_end": 3,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/02.json b/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/02.json
deleted file mode 100644
index a76e76e..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/order/inputs/02.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "cmscan",
-    "display_label": "cmscan",
-    "description": "cmscan analysis",
-    "program": "test",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "ncrna_tool": "cmscan",
-  "ncrna_features": [
-    {
-      "seq_region": "chr1_chunk_start_4",
-      "seq_region_start": 1,
-      "seq_region_end": 2,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/order/manifest.txt b/modules/ensembl/features/combine_json/tests/data/ncrna/order/manifest.txt
deleted file mode 100644
index dad42b0..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/order/manifest.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-inputs/02.json
-inputs/01.json
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/order/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/ncrna/order/output/test.features.json
deleted file mode 100644
index 87be453..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/order/output/test.features.json
+++ /dev/null
@@ -1,37 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "cmscan",
-    "display_label": "cmscan",
-    "description": "cmscan analysis",
-    "program": "test",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "ncrna_tool": "cmscan",
-  "ncrna_features": [
-    {
-      "seq_region": "chr1",
-      "seq_region_start": 4,
-      "seq_region_end": 5,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    },
-    {
-      "seq_region": "chr1",
-      "seq_region_start": 1,
-      "seq_region_end": 3,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/.DS_Store b/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/.DS_Store
deleted file mode 100644
index 34af3ace395b1fabfb7621ad8be3aaa77d8fada9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKOHRW;47J+`1$EOU%UQAS4MG)8&<mgvi6S*55_F#}D_C#_ZonD17SA7*q@iMm
zP-RP=H}TjL=S>pN5D_nT)q-eFL^)J&a*pN)k#W(HOxz-ataCik<MwW`x^3!q<Q<0p
z$bhWf8Qs$ZwX~zr`ekp2<8@P(>us}!J)b>)yl&n<uf{&ruRiUj|DGgFfjk<`K{FY?
zN7Kv0@qQlFOD~V!9Ch=Z$1$H_9`YT81Ovf9FmUn=pk|BYGs9?ufnXpQ*fSvihXfTY
z96Lk(b)eBB05F4D1%2H^0h1Vjg=1$33xq8cXrb&!47PCCllv8pouP#j>zy%=-<kb{
z!oGv!r0&FpVYI<OFfe3bYL|1V|JTRQ|HGiT3I>9KlVX6ga#b$zNM2h<4=1%YL2sZU
n;@27OL(tJu3|}e57f==0lXQTEV`m5pBsKy{Lo~s_pEB?TBx_B{

diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/a.json b/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/a.json
deleted file mode 100644
index 8cee59d..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/a.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "cmscan",
-    "display_label": "cmscan",
-    "description": "cmscan analysis",
-    "program": "test",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "ncrna_tool": "cmscan",
-  "ncrna_features": [
-    {
-      "seq_region": "chr1_chunk_start_1",
-      "seq_region_start": 1,
-      "seq_region_end": 3,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/b.json b/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/b.json
deleted file mode 100644
index a76e76e..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/inputs/b.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "cmscan",
-    "display_label": "cmscan",
-    "description": "cmscan analysis",
-    "program": "test",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "ncrna_tool": "cmscan",
-  "ncrna_features": [
-    {
-      "seq_region": "chr1_chunk_start_4",
-      "seq_region_start": 1,
-      "seq_region_end": 2,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/manifest.txt b/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/manifest.txt
deleted file mode 100644
index cada44b..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/manifest.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-inputs/a.json
-inputs/b.json
\ No newline at end of file
diff --git a/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/output/test.features.json
deleted file mode 100644
index 995f408..0000000
--- a/modules/ensembl/features/combine_json/tests/data/ncrna/seq_region/output/test.features.json
+++ /dev/null
@@ -1,37 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "cmscan",
-    "display_label": "cmscan",
-    "description": "cmscan analysis",
-    "program": "test",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "ncrna_tool": "cmscan",
-  "ncrna_features": [
-    {
-      "seq_region": "chr1",
-      "seq_region_start": 1,
-      "seq_region_end": 3,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    },
-    {
-      "seq_region": "chr1",
-      "seq_region_start": 4,
-      "seq_region_end": 5,
-      "seq_region_strand": 1,
-      "biotype": "miRNA",
-      "score": 1.0,
-      "target_name": "MIRTEST",
-      "is_significant": true
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/agp/inputs/in.json b/modules/ensembl/features/combine_json/tests/data/repeat/agp/inputs/in.json
deleted file mode 100644
index 8228fd3..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/agp/inputs/in.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "rm",
-    "display_label": "rm",
-    "description": "rm analysis (nf-test)",
-    "program": "stub",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "repeat_consensus": [
-    {
-      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
-      "repeat_name": "Alu",
-      "repeat_class": "SINE",
-      "repeat_type": "Alu",
-      "repeat_consensus": "ACGT"
-    }
-  ],
-  "repeat_features": [
-    {
-      "seq_region": "comp1",
-      "seq_region_start": 10,
-      "seq_region_end": 20,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 11,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/agp/manifest.txt b/modules/ensembl/features/combine_json/tests/data/repeat/agp/manifest.txt
deleted file mode 100644
index 1ac93e6..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/agp/manifest.txt
+++ /dev/null
@@ -1 +0,0 @@
-inputs/in.json
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/agp/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/repeat/agp/output/test.features.json
deleted file mode 100644
index cfc4cd2..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/agp/output/test.features.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "rm",
-    "display_label": "rm",
-    "description": "rm analysis (nf-test)",
-    "program": "stub",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "repeat_consensus": [
-    {
-      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
-      "repeat_name": "Alu",
-      "repeat_class": "SINE",
-      "repeat_type": "Alu",
-      "repeat_consensus": "ACGT"
-    }
-  ],
-  "repeat_features": [
-    {
-      "seq_region": "chr1",
-      "seq_region_start": 109,
-      "seq_region_end": 119,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 11,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/agp/test.agp b/modules/ensembl/features/combine_json/tests/data/repeat/agp/test.agp
deleted file mode 100644
index 86dddab..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/agp/test.agp
+++ /dev/null
@@ -1 +0,0 @@
-chr1	100	199	1	W	comp1	1	100	+
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/inputs/in.json b/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/inputs/in.json
deleted file mode 100644
index 69bfad7..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/inputs/in.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "rm",
-    "display_label": "rm",
-    "description": "rm analysis (nf-test)",
-    "program": "stub",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "repeat_consensus": [
-    {
-      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
-      "repeat_name": "Alu",
-      "repeat_class": "SINE",
-      "repeat_type": "Alu",
-      "repeat_consensus": "ACGT"
-    }
-  ],
-  "repeat_features": [
-    {
-      "seq_region": "chr1_11",
-      "seq_region_start": 1,
-      "seq_region_end": 5,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 5,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/manifest.txt b/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/manifest.txt
deleted file mode 100644
index 1ac93e6..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/manifest.txt
+++ /dev/null
@@ -1 +0,0 @@
-inputs/in.json
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/output/test.features.json
deleted file mode 100644
index 0d8eff4..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/custom_regex/output/test.features.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "rm",
-    "display_label": "rm",
-    "description": "rm analysis (nf-test)",
-    "program": "stub",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "repeat_consensus": [
-    {
-      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
-      "repeat_name": "Alu",
-      "repeat_class": "SINE",
-      "repeat_type": "Alu",
-      "repeat_consensus": "ACGT"
-    }
-  ],
-  "repeat_features": [
-    {
-      "seq_region": "chr1",
-      "seq_region_start": 11,
-      "seq_region_end": 15,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 5,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/01.json b/modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/01.json
deleted file mode 100644
index 269ac0b..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/01.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "rm",
-    "display_label": "rm",
-    "description": "rm analysis (nf-test)",
-    "program": "stub",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "repeat_consensus": [
-    {
-      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
-      "repeat_name": "Alu",
-      "repeat_class": "SINE",
-      "repeat_type": "Alu",
-      "repeat_consensus": "ACGT"
-    }
-  ],
-  "repeat_features": [
-    {
-      "seq_region": "chr2_chunk_start_1",
-      "seq_region_start": 1,
-      "seq_region_end": 2,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 2,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/02.json b/modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/02.json
deleted file mode 100644
index 8256fd2..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/order/inputs/02.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "rm",
-    "display_label": "rm",
-    "description": "rm analysis (nf-test)",
-    "program": "stub",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "repeat_consensus": [
-    {
-      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
-      "repeat_name": "Alu",
-      "repeat_class": "SINE",
-      "repeat_type": "Alu",
-      "repeat_consensus": "ACGT"
-    }
-  ],
-  "repeat_features": [
-    {
-      "seq_region": "chr2_chunk_start_3",
-      "seq_region_start": 1,
-      "seq_region_end": 1,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 1,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/order/manifest.txt b/modules/ensembl/features/combine_json/tests/data/repeat/order/manifest.txt
deleted file mode 100644
index dad42b0..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/order/manifest.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-inputs/02.json
-inputs/01.json
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/order/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/repeat/order/output/test.features.json
deleted file mode 100644
index 0442952..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/order/output/test.features.json
+++ /dev/null
@@ -1,43 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "rm",
-    "display_label": "rm",
-    "description": "rm analysis (nf-test)",
-    "program": "stub",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "repeat_consensus": [
-    {
-      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
-      "repeat_name": "Alu",
-      "repeat_class": "SINE",
-      "repeat_type": "Alu",
-      "repeat_consensus": "ACGT"
-    }
-  ],
-  "repeat_features": [
-    {
-      "seq_region": "chr2",
-      "seq_region_start": 3,
-      "seq_region_end": 3,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 1,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    },
-    {
-      "seq_region": "chr2",
-      "seq_region_start": 1,
-      "seq_region_end": 2,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 2,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/a.json b/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/a.json
deleted file mode 100644
index b33f05c..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/a.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "rm",
-    "display_label": "rm",
-    "description": "rm analysis (nf-test)",
-    "program": "stub",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "repeat_consensus": [
-    {
-      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
-      "repeat_name": "Alu",
-      "repeat_class": "SINE",
-      "repeat_type": "Alu",
-      "repeat_consensus": "ACGT"
-    }
-  ],
-  "repeat_features": [
-    {
-      "seq_region": "chr1_chunk_start_1",
-      "seq_region_start": 1,
-      "seq_region_end": 3,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 3,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/b.json b/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/b.json
deleted file mode 100644
index e6787cb..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/inputs/b.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "rm",
-    "display_label": "rm",
-    "description": "rm analysis (nf-test)",
-    "program": "stub",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "repeat_consensus": [
-    {
-      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
-      "repeat_name": "Alu",
-      "repeat_class": "SINE",
-      "repeat_type": "Alu",
-      "repeat_consensus": "ACGT"
-    }
-  ],
-  "repeat_features": [
-    {
-      "seq_region": "chr1_chunk_start_4",
-      "seq_region_start": 1,
-      "seq_region_end": 2,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 2,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/manifest.txt b/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/manifest.txt
deleted file mode 100644
index 419c5fd..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/manifest.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-inputs/a.json
-inputs/b.json
diff --git a/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/output/test.features.json b/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/output/test.features.json
deleted file mode 100644
index c69532b..0000000
--- a/modules/ensembl/features/combine_json/tests/data/repeat/seq_region/output/test.features.json
+++ /dev/null
@@ -1,43 +0,0 @@
-{
-  "analysis": {
-    "run_date": "2026-02-18T00:00:00Z",
-    "logic_name": "rm",
-    "display_label": "rm",
-    "description": "rm analysis (nf-test)",
-    "program": "stub",
-    "program_version": "0.0"
-  },
-  "source": {
-    "source_provider": "prov",
-    "is_primary": true
-  },
-  "repeat_consensus": [
-    {
-      "repeat_consensus_key": "58bc82baa00a592e0b49f526b80a7c89",
-      "repeat_name": "Alu",
-      "repeat_class": "SINE",
-      "repeat_type": "Alu",
-      "repeat_consensus": "ACGT"
-    }
-  ],
-  "repeat_features": [
-    {
-      "seq_region": "chr1",
-      "seq_region_start": 1,
-      "seq_region_end": 3,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 3,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    },
-    {
-      "seq_region": "chr1",
-      "seq_region_start": 4,
-      "seq_region_end": 5,
-      "seq_region_strand": 1,
-      "repeat_start": 1,
-      "repeat_end": 2,
-      "repeat_consensus": "58bc82baa00a592e0b49f526b80a7c89"
-    }
-  ]
-}
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test b/modules/ensembl/features/combine_json/tests/main.nf.test
index 153f75b..4557ff1 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test
@@ -32,53 +32,20 @@ nextflow_process {
 
             process {
                 """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/repeat/seq_region/manifest.txt'),
-                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-    test("Stub outputs: ncRNA seq_region mode") {
-
-        when {
-            options "-stub"
+                def inJson = file("in.json")
+                inJson.text = '{"repeat_features": []}\\n'
 
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/ncrna/seq_region/manifest.txt'),
-                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-    test("Stub outputs: repeat AGP mode") {
+                def manifest = file("manifest.txt")
+                manifest.text = "in.json\\n"
 
-        when {
-            options "-stub"
+                def noFile = file("NO_FILE")
+                noFile.text = ""
 
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/repeat/agp/manifest.txt'),
-                            file('${moduleDir}/tests/data/repeat/agp/test.agp')]
+                input[0] = [
+                    [ id:'test' ],
+                    manifest,
+                    noFile
+                ]
                 """
             }
         }
@@ -91,56 +58,27 @@ nextflow_process {
         }
     }
 
-    test("Stub outputs: ncRNA AGP mode") {
+    test("Stub outputs: ncRNA seq_region mode") {
 
         when {
             options "-stub"
 
             process {
                 """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/ncrna/agp/manifest.txt'),
-                            file('${moduleDir}/tests/data/ncrna/agp/test.agp')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-    test("Real run: repeat seq_region combine + seq_region-driven liftover") {
-
-        when {
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/repeat/seq_region/manifest.txt'),
-                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
+                def inJson = file("in.json")
+                inJson.text = '{"ncrna_features": [], "ncrna_tool": "cmscan"}\\n'
 
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
+                def manifest = file("manifest.txt")
+                manifest.text = "in.json\\n"
 
-    test("Real run: ncRNA seq_region combine + seq_region-driven liftover") {
+                def noFile = file("NO_FILE")
+                noFile.text = ""
 
-        when {
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/ncrna/seq_region/manifest.txt'),
-                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
+                input[0] = [
+                    [ id:'test' ],
+                    manifest,
+                    noFile
+                ]
                 """
             }
         }
@@ -153,56 +91,27 @@ nextflow_process {
         }
     }
 
-    test("Real run: repeat AGP-driven liftover") {
+    test("Stub outputs: repeat AGP mode") {
 
         when {
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/repeat/agp/manifest.txt'),
-                            file('${moduleDir}/tests/data/repeat/agp/test.agp')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-    test("Real run: ncRNA AGP-driven liftover") {
+            options "-stub"
 
-        when {
             process {
                 """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/ncrna/agp/manifest.txt'),
-                            file('${moduleDir}/tests/data/ncrna/agp/test.agp')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
+                def inJson = file("in.json")
+                inJson.text = '{"repeat_features": []}\\n'
 
-    test("Real run: repeat custom chunk regex") {
+                def manifest = file("manifest.txt")
+                manifest.text = "in.json\\n"
 
-        when {
-            params.chunk_id_regex = '^(?P<base>.+)_(?P<start>\\d+)$'
+                def agp = file("test.agp")
+                agp.text = ""
 
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/repeat/custom_regex/manifest.txt'),
-                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
+                input[0] = [
+                    [ id:'test' ],
+                    manifest,
+                    agp
+                ]
                 """
             }
         }
@@ -215,57 +124,27 @@ nextflow_process {
         }
     }
 
-    test("Real run: ncRNA custom chunk regex") {
+    test("Stub outputs: ncRNA AGP mode") {
 
         when {
-            // Matches the ncRNA custom_regex inputs: <base>_<start>
-            params.chunk_id_regex = '^(?P<base>.+)_(?P<start>\\d+)$'
-
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/ncrna/custom_regex/manifest.txt'),
-                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
-
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-
-    test("Real run: repeat manifest order is preserved") {
+            options "-stub"
 
-        when {
             process {
                 """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/repeat/order/manifest.txt'),
-                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
-                """
-            }
-        }
+                def inJson = file("in.json")
+                inJson.text = '{"ncrna_features": [], "ncrna_tool": "cmscan"}\\n'
 
-        then {
-            assert process.trace.tasks().size() == 1
-            assert process.out.combined_json.size() == 1
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
+                def manifest = file("manifest.txt")
+                manifest.text = "in.json\\n"
 
-    test("Real run: ncRNA manifest order is preserved") {
+                def agp = file("test.agp")
+                agp.text = ""
 
-        when {
-            process {
-                """
-                input[0] = [[ id:'test' ],
-                            file('${moduleDir}/tests/data/ncrna/order/manifest.txt'),
-                            file('${workflow.projectDir}/modules/assets/NO_FILE')]
+                input[0] = [
+                    [ id:'test' ],
+                    manifest,
+                    agp
+                ]
                 """
             }
         }
@@ -277,4 +156,4 @@ nextflow_process {
             assert snapshot(process.out).match()
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test.snap b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
index 9ff7391..0ad5cd4 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test.snap
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
@@ -1,5 +1,5 @@
 {
-    "Real run: AGP-driven liftover": {
+    "Stub outputs: repeat AGP mode": {
         "content": [
             {
                 "0": [
@@ -7,182 +7,26 @@
                         {
                             "id": "test"
                         },
-                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                        "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
                 ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T17:54:02.625791",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: ncRNA custom chunk regex": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
-                    ]
+                "1": [
+                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
                 ],
                 "combined_json": [
                     [
                         {
                             "id": "test"
                         },
-                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T19:15:58.553743",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: header combine + header-driven liftover": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                        "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
                 ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
+                "versions": [
+                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
                 ]
             }
         ],
-        "timestamp": "2026-02-23T17:54:00.401674",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: repeat custom chunk regex": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,f410544c71be74f7a8a7eab5e494b258"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,f410544c71be74f7a8a7eab5e494b258"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T18:38:58.589502",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: repeat manifest order is preserved": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,1b68c1371265dad11839769a5e776b33"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,1b68c1371265dad11839769a5e776b33"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T18:39:03.129965",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Stub outputs: ncRNA header mode": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,556a240063931bcbba8ee21d6efc373d"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,556a240063931bcbba8ee21d6efc373d"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T19:21:33.771238",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: repeat seq_region combine + seq_region-driven liftover": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T23:31:17.929825",
+        "timestamp": "2026-03-11T12:27:17.171188",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -196,74 +40,26 @@
                         {
                             "id": "test"
                         },
-                        "test.features.json:md5,556a240063931bcbba8ee21d6efc373d"
+                        "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
                 ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,556a240063931bcbba8ee21d6efc373d"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T23:32:28.865106",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: ncRNA header combine + header-driven liftover": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
-                    ]
+                "1": [
+                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
                 ],
                 "combined_json": [
                     [
                         {
                             "id": "test"
                         },
-                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T19:15:49.744214",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: ncRNA seq_region combine + seq_region-driven liftover": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
+                        "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
                 ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,4c10f64659bc581612383e3afece97fb"
-                    ]
+                "versions": [
+                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
                 ]
             }
         ],
-        "timestamp": "2026-02-23T23:31:20.204864",
+        "timestamp": "2026-03-11T12:27:15.074952",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -277,263 +73,26 @@
                         {
                             "id": "test"
                         },
-                        "test.features.json:md5,435c4d8f4008e57685ff951bbe81df0e"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,435c4d8f4008e57685ff951bbe81df0e"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T19:21:38.112104",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: ncRNA manifest order is preserved": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,c5b36cf499f0d111684f91372469154f"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,c5b36cf499f0d111684f91372469154f"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T19:16:02.962026",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Stub outputs: repeat AGP mode": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                        "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
                 ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T19:21:35.954494",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Stub outputs: AGP mode": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
-                    ]
+                "1": [
+                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
                 ],
                 "combined_json": [
                     [
                         {
                             "id": "test"
                         },
-                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T17:53:58.199351",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Stub outputs: header mode": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                        "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
                 ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
+                "versions": [
+                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
                 ]
             }
         ],
-        "timestamp": "2026-02-23T17:53:56.112251",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: ncRNA AGP-driven liftover": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,837dcba57ebd00c1b8adbce528b8f1b0"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,837dcba57ebd00c1b8adbce528b8f1b0"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T19:15:54.146861",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: repeat header combine + header-driven liftover": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T18:38:49.606314",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: custom chunk regex": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,f410544c71be74f7a8a7eab5e494b258"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,f410544c71be74f7a8a7eab5e494b258"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T17:54:04.861554",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Stub outputs: repeat header mode": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T19:21:31.584701",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: manifest order is preserved": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,1b68c1371265dad11839769a5e776b33"
-                    ]
-                ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,1b68c1371265dad11839769a5e776b33"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T17:54:07.074875",
+        "timestamp": "2026-03-11T12:27:19.259793",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -547,47 +106,26 @@
                         {
                             "id": "test"
                         },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
+                        "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
                 ],
-                "combined_json": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,007a5710a0037aae8f907d13cde08f77"
-                    ]
-                ]
-            }
-        ],
-        "timestamp": "2026-02-23T23:32:26.754167",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.3"
-        }
-    },
-    "Real run: repeat AGP-driven liftover": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
-                    ]
+                "1": [
+                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
                 ],
                 "combined_json": [
                     [
                         {
                             "id": "test"
                         },
-                        "test.features.json:md5,5fc5a0cd8050982334ada4bca1a55950"
+                        "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
+                ],
+                "versions": [
+                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
                 ]
             }
         ],
-        "timestamp": "2026-02-23T18:38:54.140158",
+        "timestamp": "2026-03-11T12:27:12.976715",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"

From 1405de94787e7e5b96902bebebf3b29550b8278d Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Mon, 16 Mar 2026 15:09:12 +0000
Subject: [PATCH 23/36] Move dynamic memory allocation to pipeline

---
 modules/ensembl/fasta/split/main.nf | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index 4a33e00..2855857 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -21,8 +21,6 @@ process FASTA_SPLIT {
     conda "${moduleDir}/environment.yml"
     container "ensemblorg/ensembl-genomio:v1.6.1"
 
-    memory { fasta_split_mem(longest_seq_bp) }
-
     input:
         tuple val(meta), path(fasta), val(longest_seq_bp)
 
@@ -122,14 +120,3 @@ process FASTA_SPLIT {
         END_VERSIONS
         """     
 }
-
-
-def fasta_split_mem(longest_seq_bp) {
-    if( !longest_seq_bp || longest_seq_bp <= 0 ) return 8.GB
-
-    // Heuristic: ~2.5 bytes/base peak => ~1 GB per 400 Mbp of the *longest* sequence
-    // Add 2GB base memory to account for overhead
-    def mem_gb = 2 + Math.ceil(longest_seq_bp as double / 400_000_000d)
-    return mem_gb.GB
-}
-

From b00d0ac6b7aa451d0e0ddb484a99e20296b5fd3e Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 20 Mar 2026 16:14:22 +0000
Subject: [PATCH 24/36] Update output filename

---
 modules/ensembl/features/combine_json/main.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/ensembl/features/combine_json/main.nf b/modules/ensembl/features/combine_json/main.nf
index f6dd127..1ebd9c1 100644
--- a/modules/ensembl/features/combine_json/main.nf
+++ b/modules/ensembl/features/combine_json/main.nf
@@ -25,7 +25,7 @@ process FEATURES_COMBINE_JSON {
         tuple val(meta), path(json_manifest), path(agp)
 
     output:
-        tuple val(meta), path("${meta.id}.features.json"), emit: combined_json
+        tuple val(meta), path("${meta.id}.${meta.analysis}.json"), emit: combined_json
         path "versions.yml", emit: versions
 
     script:
@@ -45,7 +45,7 @@ process FEATURES_COMBINE_JSON {
             args << "--agp-file '${agp}'"
         }
 
-        def out_json = "${meta.id}.features.json"
+        def out_json = "${meta.id}.${meta.analysis}.json"
 
         """
         features_combine_json \\
@@ -63,7 +63,7 @@ process FEATURES_COMBINE_JSON {
         """
         set -euo pipefail
 
-        out_json="${meta.id}.features.json"
+        out_json="${meta.id}.${meta.analysis}.json"
 
         test -s "${json_manifest}"
 

From 78e789d0cdc62bfcb75591f27f444965be4ac9fe Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Tue, 12 May 2026 13:36:47 +0100
Subject: [PATCH 25/36] Add meta files

---
 modules/ensembl/fasta/recombine/meta.yml      | 56 ++++++++++++++++
 modules/ensembl/fasta/split/meta.yml          | 64 +++++++++++++++++++
 .../ensembl/features/combine_json/meta.yml    | 56 ++++++++++++++++
 .../features/combine_json/tests/main.nf.test  | 10 +--
 .../combine_json/tests/main.nf.test.snap      | 26 +++++---
 5 files changed, 198 insertions(+), 14 deletions(-)
 create mode 100644 modules/ensembl/fasta/recombine/meta.yml
 create mode 100644 modules/ensembl/fasta/split/meta.yml
 create mode 100644 modules/ensembl/features/combine_json/meta.yml

diff --git a/modules/ensembl/fasta/recombine/meta.yml b/modules/ensembl/fasta/recombine/meta.yml
new file mode 100644
index 0000000..98bc4b6
--- /dev/null
+++ b/modules/ensembl/fasta/recombine/meta.yml
@@ -0,0 +1,56 @@
+---
+name: "fasta_recombine"
+description: Recombine split FASTA sequences into a single FASTA file,
+  optionally using an AGP file.
+keywords:
+  - ensembl
+  - fasta
+  - genomics
+  - genomio
+  - recombine
+
+tools:
+  - "fasta_recombine":
+      description: "Recombine split FASTA sequences generated by ensembl-genomio."
+      homepage: "https://github.com/Ensembl/ensembl-genomio"
+      licence: ["Apache License version 2.0"]
+      identifier: ""
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing meta information
+          e.g. `[ id:'accession1' ]`
+    - fasta_manifest:
+        type: file
+        description: Manifest file listing split FASTA files to recombine.
+        pattern: "*.txt"
+        ontologies: []
+    - agp:
+        type: file
+        description:
+          Optional AGP file describing how split sequence chunks should
+          be recombined. Use NO_FILE when not required.
+        pattern: "*.{agp,NO_FILE}"
+        ontologies: []
+output:
+  recombined_fasta:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing meta information
+            e.g. `[ id:'accession1' ]`
+      - "${meta.id}.fa":
+          type: file
+          description: Recombined FASTA file.
+          pattern: "*.fa"
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions.
+        pattern: "versions.yml"
+authors:
+  - "ensembl-dev@ebi.ac.uk"
+maintainers:
+  - "ensembl-dev@ebi.ac.uk"
diff --git a/modules/ensembl/fasta/split/meta.yml b/modules/ensembl/fasta/split/meta.yml
new file mode 100644
index 0000000..59ca0a8
--- /dev/null
+++ b/modules/ensembl/fasta/split/meta.yml
@@ -0,0 +1,64 @@
+---
+name: "fasta_split"
+description: Split a FASTA file into smaller FASTA files and optionally write an
+  AGP file.
+keywords:
+  - ensembl
+  - fasta
+  - genomics
+  - genomio
+  - split
+
+tools:
+  - "fasta_split":
+      description: "Split FASTA files with ensembl-genomio."
+      homepage: "https://github.com/Ensembl/ensembl-genomio"
+      licence: ["Apache License version 2.0"]
+      identifier: ""
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing meta information
+          e.g. `[ id:'accession1' ]`
+    - fasta:
+        type: file
+        description: FASTA file to split.
+        pattern: "*.{fa,fasta,fna}"
+        ontologies: []
+    - longest_seq_bp:
+        type: integer
+        description: Length in base pairs of the longest sequence in the input
+          FASTA.
+
+output:
+  fastas:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing meta information
+            e.g. `[ id:'accession1' ]`
+      - "splits/**/*.fa":
+          type: file
+          description: Split FASTA files.
+          pattern: "splits/**/*.fa"
+  agp:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing meta information
+            e.g. `[ id:'accession1' ]`
+      - "splits/*.agp":
+          type: file
+          description: Optional AGP file describing split sequence chunks.
+          pattern: "splits/*.agp"
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions.
+        pattern: "versions.yml"
+authors:
+  - "ensembl-dev@ebi.ac.uk"
+maintainers:
+  - "ensembl-dev@ebi.ac.uk"
diff --git a/modules/ensembl/features/combine_json/meta.yml b/modules/ensembl/features/combine_json/meta.yml
new file mode 100644
index 0000000..750559d
--- /dev/null
+++ b/modules/ensembl/features/combine_json/meta.yml
@@ -0,0 +1,56 @@
+---
+name: "features_combine_json"
+description: Combine split feature JSON files into a single JSON file,
+  optionally using an AGP file.
+keywords:
+  - ensembl
+  - features
+  - genomics
+  - genomio
+  - json
+
+tools:
+  - "features_combine_json":
+      description: "Combine split feature JSON files generated by ensembl-genomio."
+      homepage: "https://github.com/Ensembl/ensembl-genomio"
+      licence: ["Apache License version 2.0"]
+      identifier: ""
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing meta information
+          e.g. `[ id:'accession1', analysis:'repeat' ]`
+    - json_manifest:
+        type: file
+        description: Manifest file listing split JSON files to combine.
+        pattern: "*.txt"
+        ontologies: []
+    - agp:
+        type: file
+        description:
+          Optional AGP file describing how split sequence chunks should
+          be recombined. Use NO_FILE when not required.
+        pattern: "*.{agp,NO_FILE}"
+        ontologies: []
+output:
+  combined_json:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing meta information
+            e.g. `[ id:'accession1', analysis:'repeat' ]`
+      - "${meta.id}.${meta.analysis}.json":
+          type: file
+          description: Combined feature JSON file.
+          pattern: "*.json"
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions.
+        pattern: "versions.yml"
+authors:
+  - "ensembl-dev@ebi.ac.uk"
+maintainers:
+  - "ensembl-dev@ebi.ac.uk"
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test b/modules/ensembl/features/combine_json/tests/main.nf.test
index 4557ff1..cb61d27 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test
@@ -42,7 +42,7 @@ nextflow_process {
                 noFile.text = ""
 
                 input[0] = [
-                    [ id:'test' ],
+                    [ id:'test', analysis:'features' ],
                     manifest,
                     noFile
                 ]
@@ -75,7 +75,7 @@ nextflow_process {
                 noFile.text = ""
 
                 input[0] = [
-                    [ id:'test' ],
+                    [ id:'test', analysis:'features' ],
                     manifest,
                     noFile
                 ]
@@ -108,7 +108,7 @@ nextflow_process {
                 agp.text = ""
 
                 input[0] = [
-                    [ id:'test' ],
+                    [ id:'test', analysis:'features' ],
                     manifest,
                     agp
                 ]
@@ -141,7 +141,7 @@ nextflow_process {
                 agp.text = ""
 
                 input[0] = [
-                    [ id:'test' ],
+                    [ id:'test', analysis:'features' ],
                     manifest,
                     agp
                 ]
@@ -156,4 +156,4 @@ nextflow_process {
             assert snapshot(process.out).match()
         }
     }
-}
\ No newline at end of file
+}
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test.snap b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
index 0ad5cd4..238ac28 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test.snap
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
@@ -5,7 +5,8 @@
                 "0": [
                     [
                         {
-                            "id": "test"
+                            "id": "test",
+                            "analysis": "features"
                         },
                         "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
@@ -16,7 +17,8 @@
                 "combined_json": [
                     [
                         {
-                            "id": "test"
+                            "id": "test",
+                            "analysis": "features"
                         },
                         "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
@@ -38,7 +40,8 @@
                 "0": [
                     [
                         {
-                            "id": "test"
+                            "id": "test",
+                            "analysis": "features"
                         },
                         "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
@@ -49,7 +52,8 @@
                 "combined_json": [
                     [
                         {
-                            "id": "test"
+                            "id": "test",
+                            "analysis": "features"
                         },
                         "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
@@ -71,7 +75,8 @@
                 "0": [
                     [
                         {
-                            "id": "test"
+                            "id": "test",
+                            "analysis": "features"
                         },
                         "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
@@ -82,7 +87,8 @@
                 "combined_json": [
                     [
                         {
-                            "id": "test"
+                            "id": "test",
+                            "analysis": "features"
                         },
                         "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
@@ -104,7 +110,8 @@
                 "0": [
                     [
                         {
-                            "id": "test"
+                            "id": "test",
+                            "analysis": "features"
                         },
                         "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
@@ -115,7 +122,8 @@
                 "combined_json": [
                     [
                         {
-                            "id": "test"
+                            "id": "test",
+                            "analysis": "features"
                         },
                         "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
@@ -131,4 +139,4 @@
             "nextflow": "25.10.3"
         }
     }
-}
\ No newline at end of file
+}

From 2883bc08cef4fb12b21abbea7420c3b25a23ca34 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Tue, 12 May 2026 13:59:04 +0100
Subject: [PATCH 26/36] Linting updates

---
 .../ensembl/fasta/recombine/environment.yml   |   3 +-
 modules/ensembl/fasta/recombine/main.nf       |  19 ++--
 modules/ensembl/fasta/recombine/meta.yml      |  24 ++++-
 .../fasta/recombine/tests/main.nf.test        |   6 +-
 .../fasta/recombine/tests/main.nf.test.snap   |  36 +++++--
 modules/ensembl/fasta/split/environment.yml   |   3 +-
 modules/ensembl/fasta/split/main.nf           |  18 ++--
 modules/ensembl/fasta/split/meta.yml          |  24 ++++-
 .../ensembl/fasta/split/tests/main.nf.test    |   8 +-
 .../fasta/split/tests/main.nf.test.snap       | 100 ++++++++++++------
 .../features/combine_json/environment.yml     |   3 +-
 modules/ensembl/features/combine_json/main.nf |  18 ++--
 .../ensembl/features/combine_json/meta.yml    |  24 ++++-
 .../combine_json/tests/main.nf.test.snap      |  66 +++++++++---
 14 files changed, 232 insertions(+), 120 deletions(-)

diff --git a/modules/ensembl/fasta/recombine/environment.yml b/modules/ensembl/fasta/recombine/environment.yml
index 52b218c..94089f3 100644
--- a/modules/ensembl/fasta/recombine/environment.yml
+++ b/modules/ensembl/fasta/recombine/environment.yml
@@ -1,7 +1,6 @@
 ---
-name: "fasta_recombine"
 channels:
   - conda-forge
   - bioconda
 dependencies:
-  - ensembl-genomio=1.6.1
\ No newline at end of file
+  - ensembl-genomio=1.6.1
diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 057c98f..1e2e88c 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -19,14 +19,19 @@ process FASTA_RECOMBINE {
     label 'process_medium'
 
     conda "${moduleDir}/environment.yml"
-    container "ensemblorg/ensembl-genomio:v1.6.1"
+    container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/ensembl-genomio:1.6.1--pyhdfd78af_0'
+        : 'biocontainers/ensembl-genomio:1.6.1--pyhdfd78af_0'}"
 
     input:
         tuple val(meta), path(fasta_manifest), path(agp)
 
     output:
         tuple val(meta), path("${meta.id}.fa"), emit: recombined_fasta
-        path "versions.yml", emit: versions
+        tuple val("${task.process}"), val('fasta_recombine'), eval('echo 1.6.1'), emit: versions_fasta_recombine, topic: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
         def args = []
@@ -52,11 +57,6 @@ process FASTA_RECOMBINE {
             --fasta-manifest ${fasta_manifest} \\
             --out-fasta ${out_fasta} \\
             ${args.join(' ')}
-
-        cat <<-END_VERSIONS > versions.yml
-        ${task.process}:
-        fasta_recombine: \$(fasta_recombine --version 2>/dev/null | head -n 1)
-        END_VERSIONS
         """
 
     stub:
@@ -65,10 +65,5 @@ process FASTA_RECOMBINE {
 
         out_fa="${meta.id}.fa"
         touch "\$out_fa"
-
-        cat <<-END_VERSIONS > versions.yml
-        ${task.process}:
-            fasta_recombine: stub
-        END_VERSIONS
         """   
 }
diff --git a/modules/ensembl/fasta/recombine/meta.yml b/modules/ensembl/fasta/recombine/meta.yml
index 98bc4b6..8583e3e 100644
--- a/modules/ensembl/fasta/recombine/meta.yml
+++ b/modules/ensembl/fasta/recombine/meta.yml
@@ -45,11 +45,27 @@ output:
           type: file
           description: Recombined FASTA file.
           pattern: "*.fa"
+  versions_fasta_recombine:
+    - - ${task.process}:
+          type: string
+          description: The name of the process.
+      - fasta_recombine:
+          type: string
+          description: The name of the tool.
+      - echo 1.6.1:
+          type: eval
+          description: The expression to obtain the version of the tool.
+topics:
   versions:
-    - versions.yml:
-        type: file
-        description: File containing software versions.
-        pattern: "versions.yml"
+    - - ${task.process}:
+          type: string
+          description: The name of the process.
+      - fasta_recombine:
+          type: string
+          description: The name of the tool.
+      - echo 1.6.1:
+          type: eval
+          description: The expression to obtain the version of the tool.
 authors:
   - "ensembl-dev@ebi.ac.uk"
 maintainers:
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test b/modules/ensembl/fasta/recombine/tests/main.nf.test
index 9a7a6c9..4448cf7 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test
@@ -25,7 +25,7 @@ nextflow_process {
     tag "fasta"
     tag "fasta/recombine"
 
-    test("Stub outputs: header mode") {
+    test("stub outputs: header mode") {
 
         when {
             options "-stub"
@@ -55,7 +55,7 @@ nextflow_process {
         }
     }
 
-    test("Stub outputs: AGP mode") {
+    test("stub outputs: AGP mode") {
 
         when {
             options "-stub"
@@ -83,4 +83,4 @@ nextflow_process {
             assert snapshot(process.out).match()
         }
     }
-}
\ No newline at end of file
+}
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
index 3c98f07..2ad0719 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
@@ -1,5 +1,5 @@
 {
-    "Stub outputs: AGP mode": {
+    "stub outputs: AGP mode": {
         "content": [
             {
                 "0": [
@@ -11,7 +11,11 @@
                     ]
                 ],
                 "1": [
-                    "versions.yml:md5,191cc20355b504364a619df6b4c639aa"
+                    [
+                        "FASTA_RECOMBINE",
+                        "fasta_recombine",
+                        "1.6.1"
+                    ]
                 ],
                 "recombined_fasta": [
                     [
@@ -21,18 +25,22 @@
                         "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
-                "versions": [
-                    "versions.yml:md5,191cc20355b504364a619df6b4c639aa"
+                "versions_fasta_recombine": [
+                    [
+                        "FASTA_RECOMBINE",
+                        "fasta_recombine",
+                        "1.6.1"
+                    ]
                 ]
             }
         ],
-        "timestamp": "2026-03-11T12:20:11.373089",
+        "timestamp": "2026-05-12T13:50:13.249443",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
         }
     },
-    "Stub outputs: header mode": {
+    "stub outputs: header mode": {
         "content": [
             {
                 "0": [
@@ -44,7 +52,11 @@
                     ]
                 ],
                 "1": [
-                    "versions.yml:md5,191cc20355b504364a619df6b4c639aa"
+                    [
+                        "FASTA_RECOMBINE",
+                        "fasta_recombine",
+                        "1.6.1"
+                    ]
                 ],
                 "recombined_fasta": [
                     [
@@ -54,12 +66,16 @@
                         "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
-                "versions": [
-                    "versions.yml:md5,191cc20355b504364a619df6b4c639aa"
+                "versions_fasta_recombine": [
+                    [
+                        "FASTA_RECOMBINE",
+                        "fasta_recombine",
+                        "1.6.1"
+                    ]
                 ]
             }
         ],
-        "timestamp": "2026-03-11T12:20:09.308095",
+        "timestamp": "2026-05-12T13:50:11.167936",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
diff --git a/modules/ensembl/fasta/split/environment.yml b/modules/ensembl/fasta/split/environment.yml
index 208dc35..94089f3 100644
--- a/modules/ensembl/fasta/split/environment.yml
+++ b/modules/ensembl/fasta/split/environment.yml
@@ -1,7 +1,6 @@
 ---
-name: "fasta_split"
 channels:
   - conda-forge
   - bioconda
 dependencies:
-  - ensembl-genomio=1.6.1
\ No newline at end of file
+  - ensembl-genomio=1.6.1
diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index 2855857..2e3acc5 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -19,7 +19,9 @@ process FASTA_SPLIT {
     label 'process_medium'
 
     conda "${moduleDir}/environment.yml"
-    container "ensemblorg/ensembl-genomio:v1.6.1"
+    container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/ensembl-genomio:1.6.1--pyhdfd78af_0'
+        : 'biocontainers/ensembl-genomio:1.6.1--pyhdfd78af_0'}"
 
     input:
         tuple val(meta), path(fasta), val(longest_seq_bp)
@@ -27,7 +29,10 @@ process FASTA_SPLIT {
     output:
         tuple val(meta), path("splits/**/*.fa"), emit: fastas
         tuple val(meta), path("splits/*.agp"), emit: agp, optional: true
-        path "versions.yml", emit: versions
+        tuple val("${task.process}"), val('fasta_split'), eval('echo 1.6.1'), emit: versions_fasta_split, topic: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
         def args = []
@@ -73,11 +78,6 @@ process FASTA_SPLIT {
             --fasta-file ${fasta} \\
             --out-dir splits \\
             ${args.join(' ')}
-
-        cat <<-END_VERSIONS > versions.yml
-        ${task.process}:
-        fasta_split: \$(fasta_split --version 2>/dev/null | head -n 1)
-        END_VERSIONS
         """
 
     stub:
@@ -114,9 +114,5 @@ process FASTA_SPLIT {
             touch "splits/${meta.id}.agp"
         fi
 
-        cat <<-END_VERSIONS > versions.yml
-        ${task.process}:
-            fasta_split: stub
-        END_VERSIONS
         """     
 }
diff --git a/modules/ensembl/fasta/split/meta.yml b/modules/ensembl/fasta/split/meta.yml
index 59ca0a8..96303dd 100644
--- a/modules/ensembl/fasta/split/meta.yml
+++ b/modules/ensembl/fasta/split/meta.yml
@@ -53,11 +53,27 @@ output:
           type: file
           description: Optional AGP file describing split sequence chunks.
           pattern: "splits/*.agp"
+  versions_fasta_split:
+    - - ${task.process}:
+          type: string
+          description: The name of the process.
+      - fasta_split:
+          type: string
+          description: The name of the tool.
+      - echo 1.6.1:
+          type: eval
+          description: The expression to obtain the version of the tool.
+topics:
   versions:
-    - versions.yml:
-        type: file
-        description: File containing software versions.
-        pattern: "versions.yml"
+    - - ${task.process}:
+          type: string
+          description: The name of the process.
+      - fasta_split:
+          type: string
+          description: The name of the tool.
+      - echo 1.6.1:
+          type: eval
+          description: The expression to obtain the version of the tool.
 authors:
   - "ensembl-dev@ebi.ac.uk"
 maintainers:
diff --git a/modules/ensembl/fasta/split/tests/main.nf.test b/modules/ensembl/fasta/split/tests/main.nf.test
index 5aa3acf..042ff9c 100644
--- a/modules/ensembl/fasta/split/tests/main.nf.test
+++ b/modules/ensembl/fasta/split/tests/main.nf.test
@@ -26,7 +26,7 @@ nextflow_process {
     tag "fasta/split"
 
 
-    test("Stub outputs: default layout, no AGP") {
+    test("stub outputs: default layout, no AGP") {
 
         when {
             options "-stub"
@@ -67,7 +67,7 @@ nextflow_process {
         }
     }
 
-    test("Stub outputs: AGP optional output appears when enabled") {
+    test("stub outputs: AGP optional output appears when enabled") {
 
         when {
             options "-stub"
@@ -109,7 +109,7 @@ nextflow_process {
         }
     }
 
-    test("Stub outputs: unique_file_names contract") {
+    test("stub outputs: unique_file_names contract") {
 
         when {
             options "-stub"
@@ -144,7 +144,7 @@ nextflow_process {
         }
     }
 
-    test("Stub outputs: nested directory layout contract") {
+    test("stub outputs: nested directory layout contract") {
 
         when {
             options "-stub"
diff --git a/modules/ensembl/fasta/split/tests/main.nf.test.snap b/modules/ensembl/fasta/split/tests/main.nf.test.snap
index d736a2a..9914c59 100644
--- a/modules/ensembl/fasta/split/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/split/tests/main.nf.test.snap
@@ -1,5 +1,5 @@
 {
-    "Stub outputs: AGP optional output appears when enabled": {
+    "stub outputs: default layout, no AGP": {
         "content": [
             {
                 "0": [
@@ -14,23 +14,17 @@
                     ]
                 ],
                 "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.agp:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
+                    
                 ],
                 "2": [
-                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
-                ],
-                "agp": [
                     [
-                        {
-                            "id": "test"
-                        },
-                        "test.agp:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        "FASTA_SPLIT",
+                        "fasta_split",
+                        "1.6.1"
                     ]
+                ],
+                "agp": [
+                    
                 ],
                 "fastas": [
                     [
@@ -43,18 +37,22 @@
                         ]
                     ]
                 ],
-                "versions": [
-                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
+                "versions_fasta_split": [
+                    [
+                        "FASTA_SPLIT",
+                        "fasta_split",
+                        "1.6.1"
+                    ]
                 ]
             }
         ],
-        "timestamp": "2026-03-11T12:20:33.334793",
+        "timestamp": "2026-05-12T13:50:15.366293",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
         }
     },
-    "Stub outputs: nested directory layout contract": {
+    "stub outputs: AGP optional output appears when enabled": {
         "content": [
             {
                 "0": [
@@ -69,13 +67,27 @@
                     ]
                 ],
                 "1": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.agp:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
                 ],
                 "2": [
-                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
+                    [
+                        "FASTA_SPLIT",
+                        "fasta_split",
+                        "1.6.1"
+                    ]
                 ],
                 "agp": [
-                    
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.agp:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
                 ],
                 "fastas": [
                     [
@@ -88,18 +100,22 @@
                         ]
                     ]
                 ],
-                "versions": [
-                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
+                "versions_fasta_split": [
+                    [
+                        "FASTA_SPLIT",
+                        "fasta_split",
+                        "1.6.1"
+                    ]
                 ]
             }
         ],
-        "timestamp": "2026-03-11T12:20:37.504172",
+        "timestamp": "2026-05-12T13:50:17.499614",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
         }
     },
-    "Stub outputs: default layout, no AGP": {
+    "stub outputs: nested directory layout contract": {
         "content": [
             {
                 "0": [
@@ -117,7 +133,11 @@
                     
                 ],
                 "2": [
-                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
+                    [
+                        "FASTA_SPLIT",
+                        "fasta_split",
+                        "1.6.1"
+                    ]
                 ],
                 "agp": [
                     
@@ -133,18 +153,22 @@
                         ]
                     ]
                 ],
-                "versions": [
-                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
+                "versions_fasta_split": [
+                    [
+                        "FASTA_SPLIT",
+                        "fasta_split",
+                        "1.6.1"
+                    ]
                 ]
             }
         ],
-        "timestamp": "2026-03-11T12:20:31.268587",
+        "timestamp": "2026-05-12T13:50:21.755317",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
         }
     },
-    "Stub outputs: unique_file_names contract": {
+    "stub outputs: unique_file_names contract": {
         "content": [
             {
                 "0": [
@@ -162,7 +186,11 @@
                     
                 ],
                 "2": [
-                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
+                    [
+                        "FASTA_SPLIT",
+                        "fasta_split",
+                        "1.6.1"
+                    ]
                 ],
                 "agp": [
                     
@@ -178,12 +206,16 @@
                         ]
                     ]
                 ],
-                "versions": [
-                    "versions.yml:md5,f9829a6851db178766a8ce7426f53a65"
+                "versions_fasta_split": [
+                    [
+                        "FASTA_SPLIT",
+                        "fasta_split",
+                        "1.6.1"
+                    ]
                 ]
             }
         ],
-        "timestamp": "2026-03-11T12:20:35.403767",
+        "timestamp": "2026-05-12T13:50:19.618244",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
diff --git a/modules/ensembl/features/combine_json/environment.yml b/modules/ensembl/features/combine_json/environment.yml
index 5f1cb32..94089f3 100644
--- a/modules/ensembl/features/combine_json/environment.yml
+++ b/modules/ensembl/features/combine_json/environment.yml
@@ -1,7 +1,6 @@
 ---
-name: "features_combine_json"
 channels:
   - conda-forge
   - bioconda
 dependencies:
-  - ensembl-genomio=1.6.1
\ No newline at end of file
+  - ensembl-genomio=1.6.1
diff --git a/modules/ensembl/features/combine_json/main.nf b/modules/ensembl/features/combine_json/main.nf
index 1ebd9c1..a44767e 100644
--- a/modules/ensembl/features/combine_json/main.nf
+++ b/modules/ensembl/features/combine_json/main.nf
@@ -19,14 +19,19 @@ process FEATURES_COMBINE_JSON {
     label 'process_medium'
 
     conda "${moduleDir}/environment.yml"
-    container "ensemblorg/ensembl-genomio:v1.6.1"
+    container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/ensembl-genomio:1.6.1--pyhdfd78af_0'
+        : 'biocontainers/ensembl-genomio:1.6.1--pyhdfd78af_0'}"
 
     input:
         tuple val(meta), path(json_manifest), path(agp)
 
     output:
         tuple val(meta), path("${meta.id}.${meta.analysis}.json"), emit: combined_json
-        path "versions.yml", emit: versions
+        tuple val("${task.process}"), val('features_combine_json'), eval('echo 1.6.1'), emit: versions_features_combine_json, topic: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
         def args = []
@@ -52,11 +57,6 @@ process FEATURES_COMBINE_JSON {
             --json-manifest '${json_manifest}' \\
             --out-json '${out_json}' \\
             ${args.join(' ')}
-
-        cat <<-END_VERSIONS > versions.yml
-        ${task.process}:
-        features_combine_json: \$(features_combine_json --version 2>/dev/null | head -n 1)
-        END_VERSIONS
         """
 
     stub:
@@ -124,10 +124,6 @@ EOF
 EOF
         fi
 
-        cat <<-END_VERSIONS > versions.yml
-        ${task.process}:
-        features_combine_json: stub
-        END_VERSIONS
         """
         
 }
diff --git a/modules/ensembl/features/combine_json/meta.yml b/modules/ensembl/features/combine_json/meta.yml
index 750559d..e524e75 100644
--- a/modules/ensembl/features/combine_json/meta.yml
+++ b/modules/ensembl/features/combine_json/meta.yml
@@ -45,11 +45,27 @@ output:
           type: file
           description: Combined feature JSON file.
           pattern: "*.json"
+  versions_features_combine_json:
+    - - ${task.process}:
+          type: string
+          description: The name of the process.
+      - features_combine_json:
+          type: string
+          description: The name of the tool.
+      - echo 1.6.1:
+          type: eval
+          description: The expression to obtain the version of the tool.
+topics:
   versions:
-    - versions.yml:
-        type: file
-        description: File containing software versions.
-        pattern: "versions.yml"
+    - - ${task.process}:
+          type: string
+          description: The name of the process.
+      - features_combine_json:
+          type: string
+          description: The name of the tool.
+      - echo 1.6.1:
+          type: eval
+          description: The expression to obtain the version of the tool.
 authors:
   - "ensembl-dev@ebi.ac.uk"
 maintainers:
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test.snap b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
index 238ac28..986d31b 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test.snap
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
@@ -12,7 +12,11 @@
                     ]
                 ],
                 "1": [
-                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
+                    [
+                        "FEATURES_COMBINE_JSON",
+                        "features_combine_json",
+                        "1.6.1"
+                    ]
                 ],
                 "combined_json": [
                     [
@@ -23,12 +27,16 @@
                         "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
                 ],
-                "versions": [
-                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
+                "versions_features_combine_json": [
+                    [
+                        "FEATURES_COMBINE_JSON",
+                        "features_combine_json",
+                        "1.6.1"
+                    ]
                 ]
             }
         ],
-        "timestamp": "2026-03-11T12:27:17.171188",
+        "timestamp": "2026-05-12T13:52:12.162809",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -47,7 +55,11 @@
                     ]
                 ],
                 "1": [
-                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
+                    [
+                        "FEATURES_COMBINE_JSON",
+                        "features_combine_json",
+                        "1.6.1"
+                    ]
                 ],
                 "combined_json": [
                     [
@@ -58,12 +70,16 @@
                         "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
                 ],
-                "versions": [
-                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
+                "versions_features_combine_json": [
+                    [
+                        "FEATURES_COMBINE_JSON",
+                        "features_combine_json",
+                        "1.6.1"
+                    ]
                 ]
             }
         ],
-        "timestamp": "2026-03-11T12:27:15.074952",
+        "timestamp": "2026-05-12T13:52:09.797407",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -82,7 +98,11 @@
                     ]
                 ],
                 "1": [
-                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
+                    [
+                        "FEATURES_COMBINE_JSON",
+                        "features_combine_json",
+                        "1.6.1"
+                    ]
                 ],
                 "combined_json": [
                     [
@@ -93,12 +113,16 @@
                         "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
                 ],
-                "versions": [
-                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
+                "versions_features_combine_json": [
+                    [
+                        "FEATURES_COMBINE_JSON",
+                        "features_combine_json",
+                        "1.6.1"
+                    ]
                 ]
             }
         ],
-        "timestamp": "2026-03-11T12:27:19.259793",
+        "timestamp": "2026-05-12T13:52:14.52976",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -117,7 +141,11 @@
                     ]
                 ],
                 "1": [
-                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
+                    [
+                        "FEATURES_COMBINE_JSON",
+                        "features_combine_json",
+                        "1.6.1"
+                    ]
                 ],
                 "combined_json": [
                     [
@@ -128,15 +156,19 @@
                         "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
                 ],
-                "versions": [
-                    "versions.yml:md5,8258695d6a28a46edbaf2a9bf2dde339"
+                "versions_features_combine_json": [
+                    [
+                        "FEATURES_COMBINE_JSON",
+                        "features_combine_json",
+                        "1.6.1"
+                    ]
                 ]
             }
         ],
-        "timestamp": "2026-03-11T12:27:12.976715",
+        "timestamp": "2026-05-12T13:52:07.471915",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
         }
     }
-}
+}
\ No newline at end of file

From 6c07eb1fb50f7ee4c8d0c001800ba59fb884eeef Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Thu, 14 May 2026 10:42:24 +0100
Subject: [PATCH 27/36] Code review update

---
 modules/assets/NO_FILE                        |  0
 modules/ensembl/fasta/recombine/main.nf       | 22 ++++++++++++++---
 modules/ensembl/fasta/recombine/meta.yml      |  4 ++--
 .../fasta/recombine/tests/main.nf.test        |  2 --
 modules/ensembl/fasta/split/main.nf           | 23 ++++++++++++++----
 modules/ensembl/fasta/split/meta.yml          |  4 ++--
 .../ensembl/fasta/split/tests/main.nf.test    |  2 --
 modules/ensembl/features/combine_json/main.nf | 23 ++++++++++++++----
 .../ensembl/features/combine_json/meta.yml    |  4 ++--
 requirements-dev.txt                          |  2 --
 tests/config/nextflow.config                  |  2 +-
 tests/conftest.py                             | 24 -------------------
 12 files changed, 64 insertions(+), 48 deletions(-)
 delete mode 100644 modules/assets/NO_FILE
 delete mode 100644 requirements-dev.txt
 delete mode 100644 tests/conftest.py

diff --git a/modules/assets/NO_FILE b/modules/assets/NO_FILE
deleted file mode 100644
index e69de29..0000000
diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 1e2e88c..4992c35 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -13,6 +13,21 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+params.ensembl_genomio_version_cmd = '''
+python - <<'PY'
+from importlib.metadata import distributions
+
+print(next(
+    (
+        dist.version
+        for dist in distributions()
+        if dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"
+    ),
+    "unknown",
+))
+PY
+'''.stripIndent()
+
 process FASTA_RECOMBINE {
 
     tag "${meta.id}"
@@ -28,7 +43,10 @@ process FASTA_RECOMBINE {
 
     output:
         tuple val(meta), path("${meta.id}.fa"), emit: recombined_fasta
-        tuple val("${task.process}"), val('fasta_recombine'), eval('echo 1.6.1'), emit: versions_fasta_recombine, topic: versions
+        tuple val("${task.process}"),
+            val('fasta_recombine'),
+            eval(params.ensembl_genomio_version_cmd),
+            emit: versions_fasta_recombine, topic: versions
 
     when:
         task.ext.when == null || task.ext.when
@@ -61,8 +79,6 @@ process FASTA_RECOMBINE {
 
     stub:
         """
-        set -euo pipefail
-
         out_fa="${meta.id}.fa"
         touch "\$out_fa"
         """   
diff --git a/modules/ensembl/fasta/recombine/meta.yml b/modules/ensembl/fasta/recombine/meta.yml
index 8583e3e..c28be5b 100644
--- a/modules/ensembl/fasta/recombine/meta.yml
+++ b/modules/ensembl/fasta/recombine/meta.yml
@@ -52,7 +52,7 @@ output:
       - fasta_recombine:
           type: string
           description: The name of the tool.
-      - echo 1.6.1:
+      - params.ensembl_genomio_version_cmd:
           type: eval
           description: The expression to obtain the version of the tool.
 topics:
@@ -63,7 +63,7 @@ topics:
       - fasta_recombine:
           type: string
           description: The name of the tool.
-      - echo 1.6.1:
+      - params.ensembl_genomio_version_cmd:
           type: eval
           description: The expression to obtain the version of the tool.
 authors:
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test b/modules/ensembl/fasta/recombine/tests/main.nf.test
index 4448cf7..a0c650d 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test
@@ -20,8 +20,6 @@ nextflow_process {
     script "../main.nf"
     process "FASTA_RECOMBINE"
 
-    tag "modules"
-    tag "modules_ensembl"
     tag "fasta"
     tag "fasta/recombine"
 
diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index 2e3acc5..7e22745 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -13,8 +13,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-process FASTA_SPLIT {
+params.ensembl_genomio_version_cmd = '''
+python - <<'PY'
+from importlib.metadata import distributions
+
+print(next(
+    (
+        dist.version
+        for dist in distributions()
+        if dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"
+    ),
+    "unknown",
+))
+PY
+'''.stripIndent()
 
+process FASTA_SPLIT {
     tag "${meta.id}"
     label 'process_medium'
 
@@ -29,7 +43,10 @@ process FASTA_SPLIT {
     output:
         tuple val(meta), path("splits/**/*.fa"), emit: fastas
         tuple val(meta), path("splits/*.agp"), emit: agp, optional: true
-        tuple val("${task.process}"), val('fasta_split'), eval('echo 1.6.1'), emit: versions_fasta_split, topic: versions
+        tuple val("${task.process}"),
+            val('fasta_split'),
+            eval(params.ensembl_genomio_version_cmd),
+            emit: versions_fasta_split, topic: versions
 
     when:
         task.ext.when == null || task.ext.when
@@ -82,8 +99,6 @@ process FASTA_SPLIT {
 
     stub:
         """
-        set -euo pipefail
-
         layout="default"
         if [[ "${params.unique_file_names ?: false}" == "true" ]]; then
             layout="unique"
diff --git a/modules/ensembl/fasta/split/meta.yml b/modules/ensembl/fasta/split/meta.yml
index 96303dd..90e45ca 100644
--- a/modules/ensembl/fasta/split/meta.yml
+++ b/modules/ensembl/fasta/split/meta.yml
@@ -60,7 +60,7 @@ output:
       - fasta_split:
           type: string
           description: The name of the tool.
-      - echo 1.6.1:
+      - params.ensembl_genomio_version_cmd:
           type: eval
           description: The expression to obtain the version of the tool.
 topics:
@@ -71,7 +71,7 @@ topics:
       - fasta_split:
           type: string
           description: The name of the tool.
-      - echo 1.6.1:
+      - params.ensembl_genomio_version_cmd:
           type: eval
           description: The expression to obtain the version of the tool.
 authors:
diff --git a/modules/ensembl/fasta/split/tests/main.nf.test b/modules/ensembl/fasta/split/tests/main.nf.test
index 042ff9c..5906c14 100644
--- a/modules/ensembl/fasta/split/tests/main.nf.test
+++ b/modules/ensembl/fasta/split/tests/main.nf.test
@@ -20,8 +20,6 @@ nextflow_process {
     script "../main.nf"
     process "FASTA_SPLIT"
 
-    tag "modules"
-    tag "modules_ensembl"
     tag "fasta"
     tag "fasta/split"
 
diff --git a/modules/ensembl/features/combine_json/main.nf b/modules/ensembl/features/combine_json/main.nf
index a44767e..8e55063 100644
--- a/modules/ensembl/features/combine_json/main.nf
+++ b/modules/ensembl/features/combine_json/main.nf
@@ -13,8 +13,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-process FEATURES_COMBINE_JSON {
+params.ensembl_genomio_version_cmd = '''
+python - <<'PY'
+from importlib.metadata import distributions
+
+print(next(
+    (
+        dist.version
+        for dist in distributions()
+        if dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"
+    ),
+    "unknown",
+))
+PY
+'''.stripIndent()
 
+process FEATURES_COMBINE_JSON {
     tag "${meta.id}"
     label 'process_medium'
 
@@ -28,7 +42,10 @@ process FEATURES_COMBINE_JSON {
 
     output:
         tuple val(meta), path("${meta.id}.${meta.analysis}.json"), emit: combined_json
-        tuple val("${task.process}"), val('features_combine_json'), eval('echo 1.6.1'), emit: versions_features_combine_json, topic: versions
+        tuple val("${task.process}"),
+            val('features_combine_json'),
+            eval(params.ensembl_genomio_version_cmd),
+            emit: versions_features_combine_json, topic: versions
 
     when:
         task.ext.when == null || task.ext.when
@@ -61,8 +78,6 @@ process FEATURES_COMBINE_JSON {
 
     stub:
         """
-        set -euo pipefail
-
         out_json="${meta.id}.${meta.analysis}.json"
 
         test -s "${json_manifest}"
diff --git a/modules/ensembl/features/combine_json/meta.yml b/modules/ensembl/features/combine_json/meta.yml
index e524e75..e14d694 100644
--- a/modules/ensembl/features/combine_json/meta.yml
+++ b/modules/ensembl/features/combine_json/meta.yml
@@ -52,7 +52,7 @@ output:
       - features_combine_json:
           type: string
           description: The name of the tool.
-      - echo 1.6.1:
+      - params.ensembl_genomio_version_cmd:
           type: eval
           description: The expression to obtain the version of the tool.
 topics:
@@ -63,7 +63,7 @@ topics:
       - features_combine_json:
           type: string
           description: The name of the tool.
-      - echo 1.6.1:
+      - params.ensembl_genomio_version_cmd:
           type: eval
           description: The expression to obtain the version of the tool.
 authors:
diff --git a/requirements-dev.txt b/requirements-dev.txt
deleted file mode 100644
index c0367d2..0000000
--- a/requirements-dev.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-biopython
-pytest
\ No newline at end of file
diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config
index a527e1f..e4c8606 100644
--- a/tests/config/nextflow.config
+++ b/tests/config/nextflow.config
@@ -16,5 +16,5 @@
 includeConfig 'test_data.config'
 
 singularity {
-    enabled = false
+    enabled = true
 }
diff --git a/tests/conftest.py b/tests/conftest.py
deleted file mode 100644
index 766dbc3..0000000
--- a/tests/conftest.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import importlib.util
-from pathlib import Path
-
-import pytest
-
-
-@pytest.fixture(scope="session")
-def split_fasta_module():
-    """
-    Load modules/ensembl/fasta/splitfasta/split_fasta.py as a Python module
-    regardless of whether 'modules/' is a Python package.
-    """
-    repo_root = Path(__file__).resolve().parents[1]
-    module_path = (
-        repo_root / "modules" / "ensembl" / "fasta" / "splitfasta" / "split_fasta.py"
-    )
-
-    spec = importlib.util.spec_from_file_location("split_fasta", module_path)
-    if spec is None or spec.loader is None:
-        raise RuntimeError(f"Could not load module spec from {module_path}")
-
-    mod = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(mod)
-    return mod

From 4e6e53f7d23b39b11cf9dd5d4335be06ef40815e Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Thu, 14 May 2026 10:49:00 +0100
Subject: [PATCH 28/36] Remove blank line

---
 modules/ensembl/fasta/recombine/main.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 4992c35..1cb042f 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -69,7 +69,6 @@ process FASTA_RECOMBINE {
         }
 
         def out_fasta = "${meta.id}.fa"
-
         """
         fasta_recombine \\
             --fasta-manifest ${fasta_manifest} \\

From a4d48549e1ead62b963cdf13d0c2720efefad99f Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Thu, 14 May 2026 13:56:32 +0100
Subject: [PATCH 29/36] Use single line version cmd for param

---
 modules/ensembl/fasta/recombine/main.nf       | 15 +--------------
 modules/ensembl/fasta/split/main.nf           | 15 +--------------
 modules/ensembl/features/combine_json/main.nf | 15 +--------------
 3 files changed, 3 insertions(+), 42 deletions(-)

diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 1cb042f..2aa07d7 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -13,20 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-params.ensembl_genomio_version_cmd = '''
-python - <<'PY'
-from importlib.metadata import distributions
-
-print(next(
-    (
-        dist.version
-        for dist in distributions()
-        if dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"
-    ),
-    "unknown",
-))
-PY
-'''.stripIndent()
+params.ensembl_genomio_version_cmd = "python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"
 
 process FASTA_RECOMBINE {
 
diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index 7e22745..b17f539 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -13,20 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-params.ensembl_genomio_version_cmd = '''
-python - <<'PY'
-from importlib.metadata import distributions
-
-print(next(
-    (
-        dist.version
-        for dist in distributions()
-        if dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"
-    ),
-    "unknown",
-))
-PY
-'''.stripIndent()
+params.ensembl_genomio_version_cmd = "python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"
 
 process FASTA_SPLIT {
     tag "${meta.id}"
diff --git a/modules/ensembl/features/combine_json/main.nf b/modules/ensembl/features/combine_json/main.nf
index 8e55063..9bed496 100644
--- a/modules/ensembl/features/combine_json/main.nf
+++ b/modules/ensembl/features/combine_json/main.nf
@@ -13,20 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-params.ensembl_genomio_version_cmd = '''
-python - <<'PY'
-from importlib.metadata import distributions
-
-print(next(
-    (
-        dist.version
-        for dist in distributions()
-        if dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"
-    ),
-    "unknown",
-))
-PY
-'''.stripIndent()
+params.ensembl_genomio_version_cmd = "python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"
 
 process FEATURES_COMBINE_JSON {
     tag "${meta.id}"

From 59e24c4c3ac26cf93a9b71235a5b228329c1b389 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Thu, 14 May 2026 14:18:52 +0100
Subject: [PATCH 30/36] Use command directly within eval

---
 modules/ensembl/fasta/recombine/main.nf       | 4 +---
 modules/ensembl/fasta/split/main.nf           | 4 +---
 modules/ensembl/features/combine_json/main.nf | 4 +---
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 2aa07d7..2d86da7 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -13,8 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-params.ensembl_genomio_version_cmd = "python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"
-
 process FASTA_RECOMBINE {
 
     tag "${meta.id}"
@@ -32,7 +30,7 @@ process FASTA_RECOMBINE {
         tuple val(meta), path("${meta.id}.fa"), emit: recombined_fasta
         tuple val("${task.process}"),
             val('fasta_recombine'),
-            eval(params.ensembl_genomio_version_cmd),
+            eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"),
             emit: versions_fasta_recombine, topic: versions
 
     when:
diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index b17f539..b287f6a 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -13,8 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-params.ensembl_genomio_version_cmd = "python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"
-
 process FASTA_SPLIT {
     tag "${meta.id}"
     label 'process_medium'
@@ -32,7 +30,7 @@ process FASTA_SPLIT {
         tuple val(meta), path("splits/*.agp"), emit: agp, optional: true
         tuple val("${task.process}"),
             val('fasta_split'),
-            eval(params.ensembl_genomio_version_cmd),
+            eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"),
             emit: versions_fasta_split, topic: versions
 
     when:
diff --git a/modules/ensembl/features/combine_json/main.nf b/modules/ensembl/features/combine_json/main.nf
index 9bed496..bac3f91 100644
--- a/modules/ensembl/features/combine_json/main.nf
+++ b/modules/ensembl/features/combine_json/main.nf
@@ -13,8 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-params.ensembl_genomio_version_cmd = "python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"
-
 process FEATURES_COMBINE_JSON {
     tag "${meta.id}"
     label 'process_medium'
@@ -31,7 +29,7 @@ process FEATURES_COMBINE_JSON {
         tuple val(meta), path("${meta.id}.${meta.analysis}.json"), emit: combined_json
         tuple val("${task.process}"),
             val('features_combine_json'),
-            eval(params.ensembl_genomio_version_cmd),
+            eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"),
             emit: versions_features_combine_json, topic: versions
 
     when:

From 30961a3db2c7f09efa988594194a6dd7f0c821d8 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Thu, 14 May 2026 14:40:24 +0100
Subject: [PATCH 31/36] Update snapshots

---
 .../fasta/recombine/tests/main.nf.test.snap   | 12 +++++-----
 .../fasta/split/tests/main.nf.test.snap       | 24 +++++++++----------
 .../combine_json/tests/main.nf.test.snap      | 24 +++++++++----------
 3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
index 2ad0719..3f33a6d 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
@@ -14,7 +14,7 @@
                     [
                         "FASTA_RECOMBINE",
                         "fasta_recombine",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ],
                 "recombined_fasta": [
@@ -29,12 +29,12 @@
                     [
                         "FASTA_RECOMBINE",
                         "fasta_recombine",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-05-12T13:50:13.249443",
+        "timestamp": "2026-05-14T14:39:11.350698",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -55,7 +55,7 @@
                     [
                         "FASTA_RECOMBINE",
                         "fasta_recombine",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ],
                 "recombined_fasta": [
@@ -70,12 +70,12 @@
                     [
                         "FASTA_RECOMBINE",
                         "fasta_recombine",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-05-12T13:50:11.167936",
+        "timestamp": "2026-05-14T14:39:09.216174",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
diff --git a/modules/ensembl/fasta/split/tests/main.nf.test.snap b/modules/ensembl/fasta/split/tests/main.nf.test.snap
index 9914c59..ebe20c3 100644
--- a/modules/ensembl/fasta/split/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/split/tests/main.nf.test.snap
@@ -20,7 +20,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ],
                 "agp": [
@@ -41,12 +41,12 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-05-12T13:50:15.366293",
+        "timestamp": "2026-05-14T14:38:41.602246",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -78,7 +78,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ],
                 "agp": [
@@ -104,12 +104,12 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-05-12T13:50:17.499614",
+        "timestamp": "2026-05-14T14:38:43.765608",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -136,7 +136,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ],
                 "agp": [
@@ -157,12 +157,12 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-05-12T13:50:21.755317",
+        "timestamp": "2026-05-14T14:38:48.132705",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -189,7 +189,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ],
                 "agp": [
@@ -210,12 +210,12 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-05-12T13:50:19.618244",
+        "timestamp": "2026-05-14T14:38:45.953655",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test.snap b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
index 986d31b..5850f06 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test.snap
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
@@ -15,7 +15,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ],
                 "combined_json": [
@@ -31,12 +31,12 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-05-12T13:52:12.162809",
+        "timestamp": "2026-05-14T14:39:29.784572",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -58,7 +58,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ],
                 "combined_json": [
@@ -74,12 +74,12 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-05-12T13:52:09.797407",
+        "timestamp": "2026-05-14T14:39:27.607529",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -101,7 +101,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ],
                 "combined_json": [
@@ -117,12 +117,12 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-05-12T13:52:14.52976",
+        "timestamp": "2026-05-14T14:39:31.963829",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"
@@ -144,7 +144,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ],
                 "combined_json": [
@@ -160,12 +160,12 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.1"
+                        "1.6.2"
                     ]
                 ]
             }
         ],
-        "timestamp": "2026-05-12T13:52:07.471915",
+        "timestamp": "2026-05-14T14:39:25.403423",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.3"

From b771306996e506b30787a1e59852b934a9c897bc Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Thu, 14 May 2026 16:38:48 +0100
Subject: [PATCH 32/36] Linting fixes

---
 modules/ensembl/fasta/recombine/main.nf       |  7 ++--
 modules/ensembl/fasta/recombine/meta.yml      | 27 ++++++++------
 .../fasta/recombine/tests/main.nf.test        |  2 ++
 modules/ensembl/fasta/split/main.nf           |  7 ++--
 modules/ensembl/fasta/split/meta.yml          | 31 +++++++++-------
 .../ensembl/fasta/split/tests/main.nf.test    |  2 ++
 modules/ensembl/features/combine_json/main.nf | 15 ++++----
 .../ensembl/features/combine_json/meta.yml    | 35 ++++++++++++-------
 .../features/combine_json/tests/main.nf.test  | 12 ++++---
 .../combine_json/tests/main.nf.test.snap      | 24 +++++--------
 10 files changed, 86 insertions(+), 76 deletions(-)

diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 2d86da7..0c8afa9 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -21,17 +21,14 @@ process FASTA_RECOMBINE {
     conda "${moduleDir}/environment.yml"
     container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container
         ? 'https://depot.galaxyproject.org/singularity/ensembl-genomio:1.6.1--pyhdfd78af_0'
-        : 'biocontainers/ensembl-genomio:1.6.1--pyhdfd78af_0'}"
+        : 'quay.io/biocontainers/ensembl-genomio:1.6.1--pyhdfd78af_0'}"
 
     input:
         tuple val(meta), path(fasta_manifest), path(agp)
 
     output:
         tuple val(meta), path("${meta.id}.fa"), emit: recombined_fasta
-        tuple val("${task.process}"),
-            val('fasta_recombine'),
-            eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"),
-            emit: versions_fasta_recombine, topic: versions
+        tuple val("${task.process}"), val('fasta_recombine'), eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"), emit: versions_fasta_recombine, topic: versions
 
     when:
         task.ext.when == null || task.ext.when
diff --git a/modules/ensembl/fasta/recombine/meta.yml b/modules/ensembl/fasta/recombine/meta.yml
index c28be5b..2836455 100644
--- a/modules/ensembl/fasta/recombine/meta.yml
+++ b/modules/ensembl/fasta/recombine/meta.yml
@@ -1,4 +1,3 @@
----
 name: "fasta_recombine"
 description: Recombine split FASTA sequences into a single FASTA file,
   optionally using an AGP file.
@@ -8,14 +7,13 @@ keywords:
   - genomics
   - genomio
   - recombine
-
 tools:
   - "fasta_recombine":
       description: "Recombine split FASTA sequences generated by ensembl-genomio."
       homepage: "https://github.com/Ensembl/ensembl-genomio"
-      licence: ["Apache License version 2.0"]
+      licence:
+        - "Apache License version 2.0"
       identifier: ""
-
 input:
   - - meta:
         type: map
@@ -41,10 +39,11 @@ output:
           description: |
             Groovy Map containing meta information
             e.g. `[ id:'accession1' ]`
-      - "${meta.id}.fa":
+      - ${meta.id}.fa:
           type: file
           description: Recombined FASTA file.
           pattern: "*.fa"
+          ontologies: []
   versions_fasta_recombine:
     - - ${task.process}:
           type: string
@@ -52,9 +51,12 @@ output:
       - fasta_recombine:
           type: string
           description: The name of the tool.
-      - params.ensembl_genomio_version_cmd:
-          type: eval
-          description: The expression to obtain the version of the tool.
+      - ? python -c 'from importlib.metadata import distributions;
+          print(next((dist.version for dist in distributions() if
+          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
+          "unknown"))'
+        : type: eval
+          description: The expression to obtain the version of the tool
 topics:
   versions:
     - - ${task.process}:
@@ -63,9 +65,12 @@ topics:
       - fasta_recombine:
           type: string
           description: The name of the tool.
-      - params.ensembl_genomio_version_cmd:
-          type: eval
-          description: The expression to obtain the version of the tool.
+      - ? python -c 'from importlib.metadata import distributions;
+          print(next((dist.version for dist in distributions() if
+          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
+          "unknown"))'
+        : type: eval
+          description: The expression to obtain the version of the tool
 authors:
   - "ensembl-dev@ebi.ac.uk"
 maintainers:
diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test b/modules/ensembl/fasta/recombine/tests/main.nf.test
index a0c650d..4448cf7 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test
@@ -20,6 +20,8 @@ nextflow_process {
     script "../main.nf"
     process "FASTA_RECOMBINE"
 
+    tag "modules"
+    tag "modules_ensembl"
     tag "fasta"
     tag "fasta/recombine"
 
diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index b287f6a..9b62fc0 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -20,7 +20,7 @@ process FASTA_SPLIT {
     conda "${moduleDir}/environment.yml"
     container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container
         ? 'https://depot.galaxyproject.org/singularity/ensembl-genomio:1.6.1--pyhdfd78af_0'
-        : 'biocontainers/ensembl-genomio:1.6.1--pyhdfd78af_0'}"
+        : 'quay.io/biocontainers/ensembl-genomio:1.6.1--pyhdfd78af_0'}"
 
     input:
         tuple val(meta), path(fasta), val(longest_seq_bp)
@@ -28,10 +28,7 @@ process FASTA_SPLIT {
     output:
         tuple val(meta), path("splits/**/*.fa"), emit: fastas
         tuple val(meta), path("splits/*.agp"), emit: agp, optional: true
-        tuple val("${task.process}"),
-            val('fasta_split'),
-            eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"),
-            emit: versions_fasta_split, topic: versions
+        tuple val("${task.process}"), val('fasta_split'), eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"), emit: versions_fasta_split, topic: versions
 
     when:
         task.ext.when == null || task.ext.when
diff --git a/modules/ensembl/fasta/split/meta.yml b/modules/ensembl/fasta/split/meta.yml
index 90e45ca..6617745 100644
--- a/modules/ensembl/fasta/split/meta.yml
+++ b/modules/ensembl/fasta/split/meta.yml
@@ -1,4 +1,3 @@
----
 name: "fasta_split"
 description: Split a FASTA file into smaller FASTA files and optionally write an
   AGP file.
@@ -8,14 +7,13 @@ keywords:
   - genomics
   - genomio
   - split
-
 tools:
   - "fasta_split":
       description: "Split FASTA files with ensembl-genomio."
       homepage: "https://github.com/Ensembl/ensembl-genomio"
-      licence: ["Apache License version 2.0"]
+      licence:
+        - "Apache License version 2.0"
       identifier: ""
-
 input:
   - - meta:
         type: map
@@ -31,7 +29,6 @@ input:
         type: integer
         description: Length in base pairs of the longest sequence in the input
           FASTA.
-
 output:
   fastas:
     - - meta:
@@ -39,20 +36,22 @@ output:
           description: |
             Groovy Map containing meta information
             e.g. `[ id:'accession1' ]`
-      - "splits/**/*.fa":
+      - splits/**/*.fa:
           type: file
           description: Split FASTA files.
           pattern: "splits/**/*.fa"
+          ontologies: []
   agp:
     - - meta:
           type: map
           description: |
             Groovy Map containing meta information
             e.g. `[ id:'accession1' ]`
-      - "splits/*.agp":
+      - splits/*.agp:
           type: file
           description: Optional AGP file describing split sequence chunks.
           pattern: "splits/*.agp"
+          ontologies: []
   versions_fasta_split:
     - - ${task.process}:
           type: string
@@ -60,9 +59,12 @@ output:
       - fasta_split:
           type: string
           description: The name of the tool.
-      - params.ensembl_genomio_version_cmd:
-          type: eval
-          description: The expression to obtain the version of the tool.
+      - ? python -c 'from importlib.metadata import distributions;
+          print(next((dist.version for dist in distributions() if
+          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
+          "unknown"))'
+        : type: eval
+          description: The expression to obtain the version of the tool
 topics:
   versions:
     - - ${task.process}:
@@ -71,9 +73,12 @@ topics:
       - fasta_split:
           type: string
           description: The name of the tool.
-      - params.ensembl_genomio_version_cmd:
-          type: eval
-          description: The expression to obtain the version of the tool.
+      - ? python -c 'from importlib.metadata import distributions;
+          print(next((dist.version for dist in distributions() if
+          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
+          "unknown"))'
+        : type: eval
+          description: The expression to obtain the version of the tool
 authors:
   - "ensembl-dev@ebi.ac.uk"
 maintainers:
diff --git a/modules/ensembl/fasta/split/tests/main.nf.test b/modules/ensembl/fasta/split/tests/main.nf.test
index 5906c14..042ff9c 100644
--- a/modules/ensembl/fasta/split/tests/main.nf.test
+++ b/modules/ensembl/fasta/split/tests/main.nf.test
@@ -20,6 +20,8 @@ nextflow_process {
     script "../main.nf"
     process "FASTA_SPLIT"
 
+    tag "modules"
+    tag "modules_ensembl"
     tag "fasta"
     tag "fasta/split"
 
diff --git a/modules/ensembl/features/combine_json/main.nf b/modules/ensembl/features/combine_json/main.nf
index bac3f91..ebe2f7e 100644
--- a/modules/ensembl/features/combine_json/main.nf
+++ b/modules/ensembl/features/combine_json/main.nf
@@ -20,17 +20,14 @@ process FEATURES_COMBINE_JSON {
     conda "${moduleDir}/environment.yml"
     container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container
         ? 'https://depot.galaxyproject.org/singularity/ensembl-genomio:1.6.1--pyhdfd78af_0'
-        : 'biocontainers/ensembl-genomio:1.6.1--pyhdfd78af_0'}"
+        : 'quay.io/biocontainers/ensembl-genomio:1.6.1--pyhdfd78af_0'}"
 
     input:
-        tuple val(meta), path(json_manifest), path(agp)
+        tuple val(meta), val(analysis), path(json_manifest), path(agp)
 
     output:
-        tuple val(meta), path("${meta.id}.${meta.analysis}.json"), emit: combined_json
-        tuple val("${task.process}"),
-            val('features_combine_json'),
-            eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"),
-            emit: versions_features_combine_json, topic: versions
+        tuple val(meta), path("${meta.id}.${analysis}.json"), emit: combined_json
+        tuple val("${task.process}"), val('features_combine_json'), eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"), emit: versions_features_combine_json, topic: versions
 
     when:
         task.ext.when == null || task.ext.when
@@ -52,7 +49,7 @@ process FEATURES_COMBINE_JSON {
             args << "--agp-file '${agp}'"
         }
 
-        def out_json = "${meta.id}.${meta.analysis}.json"
+        def out_json = "${meta.id}.${analysis}.json"
 
         """
         features_combine_json \\
@@ -63,7 +60,7 @@ process FEATURES_COMBINE_JSON {
 
     stub:
         """
-        out_json="${meta.id}.${meta.analysis}.json"
+        out_json="${meta.id}.${analysis}.json"
 
         test -s "${json_manifest}"
 
diff --git a/modules/ensembl/features/combine_json/meta.yml b/modules/ensembl/features/combine_json/meta.yml
index e14d694..4558ebe 100644
--- a/modules/ensembl/features/combine_json/meta.yml
+++ b/modules/ensembl/features/combine_json/meta.yml
@@ -1,4 +1,3 @@
----
 name: "features_combine_json"
 description: Combine split feature JSON files into a single JSON file,
   optionally using an AGP file.
@@ -8,20 +7,22 @@ keywords:
   - genomics
   - genomio
   - json
-
 tools:
   - "features_combine_json":
       description: "Combine split feature JSON files generated by ensembl-genomio."
       homepage: "https://github.com/Ensembl/ensembl-genomio"
-      licence: ["Apache License version 2.0"]
+      licence:
+        - "Apache License version 2.0"
       identifier: ""
-
 input:
   - - meta:
         type: map
         description: |
           Groovy Map containing meta information
-          e.g. `[ id:'accession1', analysis:'repeat' ]`
+          e.g. `[ id:'accession1' ]`
+    - analysis:
+        type: string
+        description: Analysis name to include in the combined JSON filename.
     - json_manifest:
         type: file
         description: Manifest file listing split JSON files to combine.
@@ -40,11 +41,13 @@ output:
           type: map
           description: |
             Groovy Map containing meta information
-            e.g. `[ id:'accession1', analysis:'repeat' ]`
-      - "${meta.id}.${meta.analysis}.json":
+            e.g. `[ id:'accession1' ]`
+      - ${meta.id}.${analysis}.json:
           type: file
           description: Combined feature JSON file.
           pattern: "*.json"
+          ontologies:
+            - edam: http://edamontology.org/format_3464
   versions_features_combine_json:
     - - ${task.process}:
           type: string
@@ -52,9 +55,12 @@ output:
       - features_combine_json:
           type: string
           description: The name of the tool.
-      - params.ensembl_genomio_version_cmd:
-          type: eval
-          description: The expression to obtain the version of the tool.
+      - ? python -c 'from importlib.metadata import distributions;
+          print(next((dist.version for dist in distributions() if
+          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
+          "unknown"))'
+        : type: eval
+          description: The expression to obtain the version of the tool
 topics:
   versions:
     - - ${task.process}:
@@ -63,9 +69,12 @@ topics:
       - features_combine_json:
           type: string
           description: The name of the tool.
-      - params.ensembl_genomio_version_cmd:
-          type: eval
-          description: The expression to obtain the version of the tool.
+      - ? python -c 'from importlib.metadata import distributions;
+          print(next((dist.version for dist in distributions() if
+          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
+          "unknown"))'
+        : type: eval
+          description: The expression to obtain the version of the tool
 authors:
   - "ensembl-dev@ebi.ac.uk"
 maintainers:
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test b/modules/ensembl/features/combine_json/tests/main.nf.test
index cb61d27..eb6af6d 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test
@@ -42,7 +42,8 @@ nextflow_process {
                 noFile.text = ""
 
                 input[0] = [
-                    [ id:'test', analysis:'features' ],
+                    [ id:'test' ],
+                    'features',
                     manifest,
                     noFile
                 ]
@@ -75,7 +76,8 @@ nextflow_process {
                 noFile.text = ""
 
                 input[0] = [
-                    [ id:'test', analysis:'features' ],
+                    [ id:'test' ],
+                    'features',
                     manifest,
                     noFile
                 ]
@@ -108,7 +110,8 @@ nextflow_process {
                 agp.text = ""
 
                 input[0] = [
-                    [ id:'test', analysis:'features' ],
+                    [ id:'test' ],
+                    'features',
                     manifest,
                     agp
                 ]
@@ -141,7 +144,8 @@ nextflow_process {
                 agp.text = ""
 
                 input[0] = [
-                    [ id:'test', analysis:'features' ],
+                    [ id:'test' ],
+                    'features',
                     manifest,
                     agp
                 ]
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test.snap b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
index 5850f06..1927537 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test.snap
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
@@ -5,8 +5,7 @@
                 "0": [
                     [
                         {
-                            "id": "test",
-                            "analysis": "features"
+                            "id": "test"
                         },
                         "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
@@ -21,8 +20,7 @@
                 "combined_json": [
                     [
                         {
-                            "id": "test",
-                            "analysis": "features"
+                            "id": "test"
                         },
                         "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
@@ -48,8 +46,7 @@
                 "0": [
                     [
                         {
-                            "id": "test",
-                            "analysis": "features"
+                            "id": "test"
                         },
                         "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
@@ -64,8 +61,7 @@
                 "combined_json": [
                     [
                         {
-                            "id": "test",
-                            "analysis": "features"
+                            "id": "test"
                         },
                         "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
@@ -91,8 +87,7 @@
                 "0": [
                     [
                         {
-                            "id": "test",
-                            "analysis": "features"
+                            "id": "test"
                         },
                         "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
@@ -107,8 +102,7 @@
                 "combined_json": [
                     [
                         {
-                            "id": "test",
-                            "analysis": "features"
+                            "id": "test"
                         },
                         "test.features.json:md5,67c630685f9c819ef28574144c284b4e"
                     ]
@@ -134,8 +128,7 @@
                 "0": [
                     [
                         {
-                            "id": "test",
-                            "analysis": "features"
+                            "id": "test"
                         },
                         "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]
@@ -150,8 +143,7 @@
                 "combined_json": [
                     [
                         {
-                            "id": "test",
-                            "analysis": "features"
+                            "id": "test"
                         },
                         "test.features.json:md5,aefc84472e26178b64d01051be6d58b2"
                     ]

From b50cfba5019afab709012af4f4db8673971f483c Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 15 May 2026 15:31:55 +0100
Subject: [PATCH 33/36] Use package versions

---
 modules/ensembl/fasta/split/main.nf           | 2 +-
 modules/ensembl/features/combine_json/main.nf | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf
index 9b62fc0..da643c7 100644
--- a/modules/ensembl/fasta/split/main.nf
+++ b/modules/ensembl/fasta/split/main.nf
@@ -28,7 +28,7 @@ process FASTA_SPLIT {
     output:
         tuple val(meta), path("splits/**/*.fa"), emit: fastas
         tuple val(meta), path("splits/*.agp"), emit: agp, optional: true
-        tuple val("${task.process}"), val('fasta_split'), eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"), emit: versions_fasta_split, topic: versions
+        tuple val("${task.process}"), val('fasta_split'), eval("fasta_split --version"), emit: versions_fasta_split, topic: versions
 
     when:
         task.ext.when == null || task.ext.when
diff --git a/modules/ensembl/features/combine_json/main.nf b/modules/ensembl/features/combine_json/main.nf
index ebe2f7e..e366038 100644
--- a/modules/ensembl/features/combine_json/main.nf
+++ b/modules/ensembl/features/combine_json/main.nf
@@ -27,7 +27,7 @@ process FEATURES_COMBINE_JSON {
 
     output:
         tuple val(meta), path("${meta.id}.${analysis}.json"), emit: combined_json
-        tuple val("${task.process}"), val('features_combine_json'), eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"), emit: versions_features_combine_json, topic: versions
+        tuple val("${task.process}"), val('features_combine_json'), eval("features_combine_json --version"), emit: versions_features_combine_json, topic: versions
 
     when:
         task.ext.when == null || task.ext.when

From adb281acd7c44be67f8753149c7e20e90a8acc41 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 15 May 2026 15:39:37 +0100
Subject: [PATCH 34/36] Update meta.yml

---
 modules/ensembl/fasta/split/meta.yml           | 10 ++--------
 modules/ensembl/features/combine_json/meta.yml | 10 ++--------
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/modules/ensembl/fasta/split/meta.yml b/modules/ensembl/fasta/split/meta.yml
index 6617745..a00fcba 100644
--- a/modules/ensembl/fasta/split/meta.yml
+++ b/modules/ensembl/fasta/split/meta.yml
@@ -59,10 +59,7 @@ output:
       - fasta_split:
           type: string
           description: The name of the tool.
-      - ? python -c 'from importlib.metadata import distributions;
-          print(next((dist.version for dist in distributions() if
-          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
-          "unknown"))'
+      - ? fasta_split --version
         : type: eval
           description: The expression to obtain the version of the tool
 topics:
@@ -73,10 +70,7 @@ topics:
       - fasta_split:
           type: string
           description: The name of the tool.
-      - ? python -c 'from importlib.metadata import distributions;
-          print(next((dist.version for dist in distributions() if
-          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
-          "unknown"))'
+      - ? fasta_split --version
         : type: eval
           description: The expression to obtain the version of the tool
 authors:
diff --git a/modules/ensembl/features/combine_json/meta.yml b/modules/ensembl/features/combine_json/meta.yml
index 4558ebe..049fa1a 100644
--- a/modules/ensembl/features/combine_json/meta.yml
+++ b/modules/ensembl/features/combine_json/meta.yml
@@ -55,10 +55,7 @@ output:
       - features_combine_json:
           type: string
           description: The name of the tool.
-      - ? python -c 'from importlib.metadata import distributions;
-          print(next((dist.version for dist in distributions() if
-          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
-          "unknown"))'
+      - ? features_combine_json --version
         : type: eval
           description: The expression to obtain the version of the tool
 topics:
@@ -69,10 +66,7 @@ topics:
       - features_combine_json:
           type: string
           description: The name of the tool.
-      - ? python -c 'from importlib.metadata import distributions;
-          print(next((dist.version for dist in distributions() if
-          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
-          "unknown"))'
+      - ? features_combine_json --version
         : type: eval
           description: The expression to obtain the version of the tool
 authors:

From a14b6c572c368a9187aa2ca27b61de1b5080746b Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Fri, 15 May 2026 16:24:34 +0100
Subject: [PATCH 35/36] Bump genomio version in snapshots

---
 .../fasta/recombine/tests/main.nf.test.snap      |  8 ++++----
 .../ensembl/fasta/split/tests/main.nf.test.snap  | 16 ++++++++--------
 .../combine_json/tests/main.nf.test.snap         | 16 ++++++++--------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
index 3f33a6d..0357999 100644
--- a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap
@@ -14,7 +14,7 @@
                     [
                         "FASTA_RECOMBINE",
                         "fasta_recombine",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ],
                 "recombined_fasta": [
@@ -29,7 +29,7 @@
                     [
                         "FASTA_RECOMBINE",
                         "fasta_recombine",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ]
             }
@@ -55,7 +55,7 @@
                     [
                         "FASTA_RECOMBINE",
                         "fasta_recombine",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ],
                 "recombined_fasta": [
@@ -70,7 +70,7 @@
                     [
                         "FASTA_RECOMBINE",
                         "fasta_recombine",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ]
             }
diff --git a/modules/ensembl/fasta/split/tests/main.nf.test.snap b/modules/ensembl/fasta/split/tests/main.nf.test.snap
index ebe20c3..07ec1d6 100644
--- a/modules/ensembl/fasta/split/tests/main.nf.test.snap
+++ b/modules/ensembl/fasta/split/tests/main.nf.test.snap
@@ -20,7 +20,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ],
                 "agp": [
@@ -41,7 +41,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ]
             }
@@ -78,7 +78,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ],
                 "agp": [
@@ -104,7 +104,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ]
             }
@@ -136,7 +136,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ],
                 "agp": [
@@ -157,7 +157,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ]
             }
@@ -189,7 +189,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ],
                 "agp": [
@@ -210,7 +210,7 @@
                     [
                         "FASTA_SPLIT",
                         "fasta_split",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ]
             }
diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test.snap b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
index 1927537..397c8f0 100644
--- a/modules/ensembl/features/combine_json/tests/main.nf.test.snap
+++ b/modules/ensembl/features/combine_json/tests/main.nf.test.snap
@@ -14,7 +14,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ],
                 "combined_json": [
@@ -29,7 +29,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ]
             }
@@ -55,7 +55,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ],
                 "combined_json": [
@@ -70,7 +70,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ]
             }
@@ -96,7 +96,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ],
                 "combined_json": [
@@ -111,7 +111,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ]
             }
@@ -137,7 +137,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ],
                 "combined_json": [
@@ -152,7 +152,7 @@
                     [
                         "FEATURES_COMBINE_JSON",
                         "features_combine_json",
-                        "1.6.2"
+                        "1.6.3"
                     ]
                 ]
             }

From ef8a3ea388d054494cb52888f8496abe9e8d8434 Mon Sep 17 00:00:00 2001
From: Mark Quinton-Tulloch <markquintontulloch@gmail.com>
Date: Tue, 19 May 2026 15:36:53 +0100
Subject: [PATCH 36/36] Update versioning for fasta_recombine

---
 modules/ensembl/fasta/recombine/main.nf  |  2 +-
 modules/ensembl/fasta/recombine/meta.yml | 10 ++--------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf
index 0c8afa9..d14d71b 100644
--- a/modules/ensembl/fasta/recombine/main.nf
+++ b/modules/ensembl/fasta/recombine/main.nf
@@ -28,7 +28,7 @@ process FASTA_RECOMBINE {
 
     output:
         tuple val(meta), path("${meta.id}.fa"), emit: recombined_fasta
-        tuple val("${task.process}"), val('fasta_recombine'), eval("python -c 'from importlib.metadata import distributions; print(next((dist.version for dist in distributions() if dist.metadata[\"Name\"].lower().replace(\"_\", \"-\") == \"ensembl-genomio\"), \"unknown\"))'"), emit: versions_fasta_recombine, topic: versions
+        tuple val("${task.process}"), val('fasta_recombine'), eval("fasta_recombine --version"), emit: versions_fasta_recombine, topic: versions
 
     when:
         task.ext.when == null || task.ext.when
diff --git a/modules/ensembl/fasta/recombine/meta.yml b/modules/ensembl/fasta/recombine/meta.yml
index 2836455..7c7aec4 100644
--- a/modules/ensembl/fasta/recombine/meta.yml
+++ b/modules/ensembl/fasta/recombine/meta.yml
@@ -51,10 +51,7 @@ output:
       - fasta_recombine:
           type: string
           description: The name of the tool.
-      - ? python -c 'from importlib.metadata import distributions;
-          print(next((dist.version for dist in distributions() if
-          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
-          "unknown"))'
+      - ? fasta_recombine --version
         : type: eval
           description: The expression to obtain the version of the tool
 topics:
@@ -65,10 +62,7 @@ topics:
       - fasta_recombine:
           type: string
           description: The name of the tool.
-      - ? python -c 'from importlib.metadata import distributions;
-          print(next((dist.version for dist in distributions() if
-          dist.metadata["Name"].lower().replace("_", "-") == "ensembl-genomio"),
-          "unknown"))'
+      - ? fasta_recombine --version
         : type: eval
           description: The expression to obtain the version of the tool
 authors: