From dee7e4c94eba61a2e4ba02c818fbcdf4be4d55bd Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Tue, 9 Jun 2026 23:21:55 +0200 Subject: [PATCH] Ship type information and enforce it; make gpr_to_dnf public Three related "make the package's contracts real" changes: - Add the PEP 561 py.typed marker so the package's extensive type hints are visible to downstream type checkers (geckopy included). The hatchling wheel ships raven_python/py.typed. - Add mypy to the dev extra, a lenient [tool.mypy] config (ignore_missing_imports for the un-stubbed cobra/optlang/scipy/ruamel), and a mypy CI job. Fix the 36 type errors this surfaced -- all type-only (Path vs str annotations, None-guards that match existing behaviour, optlang Variable typing, isinstance/cast narrowing). No runtime behaviour changes; the full test suite stays green. - Promote manipulation.expand._gpr_to_dnf to a public gpr_to_dnf (re-exported from raven_python.manipulation). geckopy's call sites switch to it in lockstep (separate PR), so no deprecated alias is kept. --- .github/workflows/ci.yml | 16 ++++++++++++++++ pyproject.toml | 7 +++++++ src/raven_python/comparison/compare.py | 7 +++++-- src/raven_python/gapfilling/fill.py | 3 ++- src/raven_python/init/build.py | 4 ++-- src/raven_python/init/ftinit.py | 7 ++++--- src/raven_python/init/genes.py | 8 ++++---- src/raven_python/init/init.py | 3 ++- src/raven_python/init/score.py | 3 +-- src/raven_python/io/excel.py | 2 +- src/raven_python/localization/predict.py | 7 ++++--- src/raven_python/manipulation/__init__.py | 3 ++- src/raven_python/manipulation/expand.py | 6 ++++-- src/raven_python/manipulation/remove.py | 2 +- src/raven_python/py.typed | 0 .../reconstruction/homology/blast.py | 8 ++++---- src/raven_python/reconstruction/kegg/download.py | 2 ++ src/raven_python/reconstruction/kegg/hmm.py | 6 +++--- src/raven_python/tasks/check.py | 12 ++++++++---- src/raven_python/utils/gpr.py | 7 +++---- tests/test_manipulation_expand.py | 4 ++-- 21 files changed, 77 insertions(+), 40 deletions(-) create mode 100644 src/raven_python/py.typed diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cdfbe0c..efa7b85 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,6 +28,22 @@ jobs: - run: pip install ruff==0.15.15 - run: ruff check . + typecheck: + name: mypy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + cache-dependency-path: pyproject.toml + - run: pip install --upgrade pip + # Installs runtime deps + the dev extra (mypy). Third-party stubs are + # absent for cobra/optlang/scipy; ignore_missing_imports handles that. + - run: pip install -e ".[dev]" + - run: mypy + test: name: pytest (py${{ matrix.python }}) runs-on: ubuntu-latest diff --git a/pyproject.toml b/pyproject.toml index 376a961..16da355 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ dev = [ "pytest>=7", "pytest-cov", "ruff==0.15.15", + "mypy>=1.8", ] excel = [ "openpyxl>=3.1", @@ -82,3 +83,9 @@ ignore = [ [tool.ruff.lint.per-file-ignores] "tests/*" = ["E402"] + +[tool.mypy] +python_version = "3.11" +files = ["src/raven_python"] +# cobra / optlang / scipy / ruamel ship no type stubs; don't error on their imports. +ignore_missing_imports = true diff --git a/src/raven_python/comparison/compare.py b/src/raven_python/comparison/compare.py index 36e8b73..f7eafde 100644 --- a/src/raven_python/comparison/compare.py +++ b/src/raven_python/comparison/compare.py @@ -13,6 +13,8 @@ from collections.abc import Iterable from dataclasses import dataclass, field +from pathlib import Path +from typing import cast import cobra import pandas as pd @@ -131,8 +133,9 @@ def compare_models( # raven_python.tasks.check_tasks accepts a path or an iterable of Task; preserve task # ids for the index. Capture the list once so all models test the same set. from raven_python.tasks.tasklist import parse_task_list - task_list = (parse_task_list(tasks) if isinstance(tasks, (str, bytes)) - or hasattr(tasks, "__fspath__") else list(tasks)) + task_list = (parse_task_list(cast("str | Path", tasks)) + if isinstance(tasks, (str, bytes)) or hasattr(tasks, "__fspath__") + else list(tasks)) task_ids = [t.id for t in task_list] task_df = pd.DataFrame(False, index=task_ids, columns=model_ids, dtype=bool) for mid, m in zip(model_ids, models_list, strict=True): diff --git a/src/raven_python/gapfilling/fill.py b/src/raven_python/gapfilling/fill.py index ad0f1ab..798f1aa 100644 --- a/src/raven_python/gapfilling/fill.py +++ b/src/raven_python/gapfilling/fill.py @@ -20,6 +20,7 @@ import cobra from cobra.flux_analysis import find_blocked_reactions, flux_variability_analysis +from optlang.interface import Variable # untyped (resolves to Any); used for container hints from raven_python.manipulation.transfer import add_reactions_from_model @@ -74,7 +75,7 @@ def _solve_min_templates( the problem is infeasible. """ prob = working.problem - indicators: dict[str, object] = {} + indicators: dict[str, Variable] = {} extra = [] for rid in template_ids: rxn = working.reactions.get_by_id(rid) diff --git a/src/raven_python/init/build.py b/src/raven_python/init/build.py index a0d0538..f6aba56 100644 --- a/src/raven_python/init/build.py +++ b/src/raven_python/init/build.py @@ -69,8 +69,8 @@ def get_init_model( model, gene_scores, isozyme_scoring=isozyme_scoring, complex_scoring=complex_scoring, no_gene_score=no_gene_score, ) - else: - scores = dict(rxn_scores) + else: # rxn_scores is not None here (exactly-one-of check above); `or {}` placates mypy + scores = dict(rxn_scores or {}) deleted_dead_end: list[str] = [] if remove_dead_ends: diff --git a/src/raven_python/init/ftinit.py b/src/raven_python/init/ftinit.py index 58e4ab9..e46be94 100644 --- a/src/raven_python/init/ftinit.py +++ b/src/raven_python/init/ftinit.py @@ -52,6 +52,7 @@ import cobra from cobra.exceptions import OptimizationError +from optlang.interface import Variable # untyped (resolves to Any); used for container hints from optlang.symbolics import Real, add, mul from raven_python.init.genes import remove_low_score_genes @@ -112,8 +113,8 @@ def run_ftinit( variables: list = [] constraints: list = [] - flux_terms: dict[str, list[tuple[object, float]]] = {} # rxn id -> [(var, sign)] - indicators: dict[str, tuple[object, float]] = {} # rxn id -> (indicator var, score) + flux_terms: dict[str, list[tuple[Variable, float]]] = {} # rxn id -> [(var, sign)] + indicators: dict[str, tuple[Variable, float]] = {} # rxn id -> (indicator var, score) free_or_essential: set[str] = set() # kept regardless of an indicator def add_constraint(expr, **kw): @@ -218,7 +219,7 @@ def add_constraint(expr, **kw): on = {rid for rid, (ind, _) in indicators.items() if (ind.primal or 0.0) >= 0.5} kept = free_or_essential | on deleted = [r.id for r in model.reactions if r.id not in kept] - fluxes = { + fluxes: dict[str, float] = { rid: sum(sign * (var.primal or 0.0) for var, sign in terms) for rid, terms in flux_terms.items() } diff --git a/src/raven_python/init/genes.py b/src/raven_python/init/genes.py index 4076c13..abc9a5d 100644 --- a/src/raven_python/init/genes.py +++ b/src/raven_python/init/genes.py @@ -26,15 +26,15 @@ def _prune(node, scores, iso, cplx) -> tuple[str | None, float | None]: if not isinstance(node, ast.BoolOp): return None, None - children = [_prune(v, scores, iso, cplx) for v in node.values] - children = [(s, sc) for s, sc in children if s is not None] + pruned = [_prune(v, scores, iso, cplx) for v in node.values] + children: list[tuple[str, float | None]] = [(s, sc) for s, sc in pruned if s is not None] if isinstance(node.op, ast.And): # complex: keep every subunit, prune nested ORs kept = children else: # OR / isozymes: drop negative-scoring alternatives, keep at least one kept = [(s, sc) for s, sc in children if sc is None or sc >= 0] - if not kept: # all negative → keep the least-negative - kept = [max(children, key=lambda c: c[1])] + if not kept: # all negative → keep the least-negative (every sc is non-None here) + kept = [max(children, key=lambda c: c[1] if c[1] is not None else float("-inf"))] parts = [s for s, _ in kept] score_vals = [sc for _, sc in kept if sc is not None] diff --git a/src/raven_python/init/init.py b/src/raven_python/init/init.py index 3cd5942..05b6d84 100644 --- a/src/raven_python/init/init.py +++ b/src/raven_python/init/init.py @@ -40,6 +40,7 @@ import cobra from cobra.exceptions import OptimizationError +from optlang.interface import Variable # untyped (resolves to Any); used for container hints from optlang.symbolics import Real, add, mul _EPS = 1.0 # flux an included reaction must carry (RAVEN's fake-met unit) @@ -137,7 +138,7 @@ def run_init( flux = {d.key: prob.Variable(f"v_{d.key}", lb=0.0, ub=d.ub) for d in directed} # Binary include-indicators for non-essential reactions; eps*x <= v <= ub*x. - keep: dict[str, object] = {} + keep: dict[str, Variable] = {} gates = [] for d in directed: if d.essential: diff --git a/src/raven_python/init/score.py b/src/raven_python/init/score.py index c380544..215912c 100644 --- a/src/raven_python/init/score.py +++ b/src/raven_python/init/score.py @@ -39,10 +39,9 @@ def gene_scores_from_expression( score is clamped to ``[min_score, max_score]``; non-positive level/reference (and missing reference) → ``min_score`` (RAVEN maps these NaNs to -5). """ - scalar = isinstance(reference, (int, float)) scores: dict[str, float] = {} for gene, level in expression.items(): - ref = reference if scalar else reference.get(gene) + ref = reference if isinstance(reference, (int, float)) else reference.get(gene) if not level or not ref or level <= 0 or ref <= 0: scores[gene] = min_score else: diff --git a/src/raven_python/io/excel.py b/src/raven_python/io/excel.py index 398b6da..bf768a0 100644 --- a/src/raven_python/io/excel.py +++ b/src/raven_python/io/excel.py @@ -17,7 +17,7 @@ def _miriam_string(annotation: dict, exclude: tuple[str, ...] = ()) -> str: """RAVEN MIRIAM column: ``namespace/id;namespace/id2;...`` (sorted).""" - parts = [] + parts: list[str] = [] for namespace in sorted(annotation): if namespace in exclude: continue diff --git a/src/raven_python/localization/predict.py b/src/raven_python/localization/predict.py index 3fc2369..f112927 100644 --- a/src/raven_python/localization/predict.py +++ b/src/raven_python/localization/predict.py @@ -31,6 +31,7 @@ import cobra import pandas as pd +from optlang.interface import Variable # untyped (resolves to Any); used for container hints from optlang.symbolics import Real, add, mul from raven_python.localization.scores import LocalizationScores @@ -165,12 +166,12 @@ def _met_cost(m_id: str) -> float: opt = prob.Model() # x[r, c] = 1 iff reaction r placed in c (only for r ∈ placeable) - x: dict[tuple[str, str], object] = { + x: dict[tuple[str, str], Variable] = { (r.id, c): prob.Variable(f"x_{r.id}_{c}", type="binary") for r in placeable for c in compartments } # y[g, c] = 1 iff gene g assigned to c - y: dict[tuple[str, str], object] = { + y: dict[tuple[str, str], Variable] = { (g, c): prob.Variable(f"y_{g}_{c}", type="binary") for g in genes_in_scope for c in compartments } @@ -182,7 +183,7 @@ def _met_cost(m_id: str) -> float: for c in compartments: if c != default_compartment: met_keys.add((m.id, c)) - t: dict[tuple[str, str], object] = { + t: dict[tuple[str, str], Variable] = { k: prob.Variable(f"t_{k[0]}_{k[1]}", type="binary") for k in met_keys } diff --git a/src/raven_python/manipulation/__init__.py b/src/raven_python/manipulation/__init__.py index a462949..587643e 100644 --- a/src/raven_python/manipulation/__init__.py +++ b/src/raven_python/manipulation/__init__.py @@ -3,7 +3,7 @@ isozyme expansion, compartment merge / copy, and model merging by name.""" from .add import add_reactions_from_equations from .change import change_gene_reaction_rules, change_reaction_equations -from .expand import expand_model +from .expand import expand_model, gpr_to_dnf from .irreversible import convert_to_irreversible from .merge import merge_models from .parameters import set_variance_bounds @@ -28,6 +28,7 @@ "convert_to_irreversible", "expand_model", "find_duplicate_reactions", + "gpr_to_dnf", "group_linear_reactions", "merge_models", "remove_dead_end_reactions", diff --git a/src/raven_python/manipulation/expand.py b/src/raven_python/manipulation/expand.py index 246f3b9..3fd6847 100644 --- a/src/raven_python/manipulation/expand.py +++ b/src/raven_python/manipulation/expand.py @@ -20,8 +20,10 @@ import cobra from cobra.core.gene import GPR +__all__ = ["expand_model", "gpr_to_dnf"] -def _gpr_to_dnf(gpr: GPR) -> list[list[str]]: + +def gpr_to_dnf(gpr: GPR) -> list[list[str]]: """Convert a GPR to disjunctive normal form (list of AND-clauses). An empty GPR yields an empty list. A single clause (no OR anywhere) @@ -88,7 +90,7 @@ def expand_model(model: cobra.Model) -> list[str]: for rxn in model.reactions: if not rxn.gene_reaction_rule: continue - clauses = _gpr_to_dnf(rxn.gpr) + clauses = gpr_to_dnf(rxn.gpr) if len(clauses) <= 1: continue expansions.append((rxn, clauses)) diff --git a/src/raven_python/manipulation/remove.py b/src/raven_python/manipulation/remove.py index 492de36..43f966f 100644 --- a/src/raven_python/manipulation/remove.py +++ b/src/raven_python/manipulation/remove.py @@ -99,7 +99,7 @@ def remove_genes( return [] # Reactions touched by these genes that currently have a GPR. - affected = set() + affected: set[str] = set() for gid in present: affected.update(r.id for r in model.genes.get_by_id(gid).reactions) had_gpr = {rid for rid in affected if model.reactions.get_by_id(rid).gene_reaction_rule} diff --git a/src/raven_python/py.typed b/src/raven_python/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/raven_python/reconstruction/homology/blast.py b/src/raven_python/reconstruction/homology/blast.py index 246ddab..fdffae1 100644 --- a/src/raven_python/reconstruction/homology/blast.py +++ b/src/raven_python/reconstruction/homology/blast.py @@ -72,8 +72,8 @@ def run_blast( outfmt = "10 " + " ".join(_OUTFMT_FIELDS) # 10 = CSV frames = [] - with tempfile.TemporaryDirectory() as tmp: - tmp = Path(tmp) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp = Path(tmp_dir) def blastp_dir(query, subject_fasta, from_id, to_id): db = tmp / f"db_{from_id}_{to_id}" @@ -113,8 +113,8 @@ def run_diamond( diamond = resolve_binary("diamond", binary=diamond) frames = [] - with tempfile.TemporaryDirectory() as tmp: - tmp = Path(tmp) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp = Path(tmp_dir) def diamond_dir(query, subject_fasta, from_id, to_id): db = tmp / f"db_{from_id}_{to_id}" diff --git a/src/raven_python/reconstruction/kegg/download.py b/src/raven_python/reconstruction/kegg/download.py index d990b42..f6102f8 100644 --- a/src/raven_python/reconstruction/kegg/download.py +++ b/src/raven_python/reconstruction/kegg/download.py @@ -195,6 +195,8 @@ def lift(rel: str, tmp: str) -> Path | None: f"KEGG archives did not yield required file(s): {missing}. " f"Check that the source .tar.gz archives are present in {dest}." ) + # Narrowing for the type checker: the `missing` check above raised if any were None. + assert reaction is not None and compound is not None and ko is not None shutil.move(str(reaction), str(dest / "reaction")) shutil.move(str(ko), str(dest / "ko")) diff --git a/src/raven_python/reconstruction/kegg/hmm.py b/src/raven_python/reconstruction/kegg/hmm.py index 0e210b6..34afd8d 100644 --- a/src/raven_python/reconstruction/kegg/hmm.py +++ b/src/raven_python/reconstruction/kegg/hmm.py @@ -329,8 +329,8 @@ def build_ko_hmm( logger.info("[%s] start: %d sequences", label, n) hmmbuild = resolve_binary("hmmbuild", binary=hmmbuild) - with tempfile.TemporaryDirectory() as tmp: - tmp = Path(tmp) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp = Path(tmp_dir) if n == 1: if verbose: logger.info("[%s] single sequence: skipping CD-HIT/MAFFT", label) @@ -408,7 +408,7 @@ def build_hmm_library( mafft: str | Path | None = None, hmmbuild: str | Path | None = None, hmmpress: str | Path | None = None, -) -> dict[str, Path | list[Path]]: +) -> dict[str, Path | list[Path] | None]: """Build a domain (``"prokaryotes"``/``"eukaryotes"``) HMM library. Restricts genes to the domain's organisms (from ``taxonomy``), builds a diff --git a/src/raven_python/tasks/check.py b/src/raven_python/tasks/check.py index 817bae5..0e66df2 100644 --- a/src/raven_python/tasks/check.py +++ b/src/raven_python/tasks/check.py @@ -17,6 +17,7 @@ from collections.abc import Iterable from dataclasses import dataclass from pathlib import Path +from typing import cast import cobra from cobra.exceptions import OptimizationError @@ -51,11 +52,14 @@ def _set_constraint_bounds(constraint, lb: float, ub: float) -> None: constraint.ub = ub -def _classify(token: str) -> tuple[str, str | None]: - """Return ``("all", None)``, ``("comp", COMP)``, or ``("met", token_upper)``.""" +def _classify(token: str) -> tuple[str, str]: + """Return ``("all", "")``, ``("comp", COMP)``, or ``("met", token_upper)``. + + The arg is empty only for ``"all"``, which never reads it. + """ upper = token.upper() if upper == _ALLMETS: - return "all", None + return "all", "" if upper.startswith(_ALLMETSIN + "[") and upper.endswith("]"): return "comp", upper[len(_ALLMETSIN) + 1: -1] return "met", upper # incl. malformed ALLMETSIN[... → treated as a (missing) metabolite @@ -210,7 +214,7 @@ def check_tasks( def _as_tasks(tasks: str | Iterable[Task]) -> list[Task]: if isinstance(tasks, (str, bytes)) or hasattr(tasks, "__fspath__"): - return parse_task_list(tasks) + return parse_task_list(cast("str | Path", tasks)) # guard above ⇒ path-like return list(tasks) diff --git a/src/raven_python/utils/gpr.py b/src/raven_python/utils/gpr.py index 28818da..64ecbdb 100644 --- a/src/raven_python/utils/gpr.py +++ b/src/raven_python/utils/gpr.py @@ -95,11 +95,10 @@ def is_dnf(gpr: GPR | str | None) -> bool: >>> is_dnf("(G1 or G2) and G3") False """ - if isinstance(gpr, str): - gpr = GPR.from_string(gpr) - if gpr is None: + parsed = GPR.from_string(gpr) if isinstance(gpr, str) else gpr + if parsed is None: return True - return _is_dnf_node(gpr.body) + return _is_dnf_node(parsed.body) @dataclass(frozen=True) diff --git a/tests/test_manipulation_expand.py b/tests/test_manipulation_expand.py index 08cd2f2..2a5492d 100644 --- a/tests/test_manipulation_expand.py +++ b/tests/test_manipulation_expand.py @@ -5,7 +5,7 @@ import cobra from raven_python.manipulation import expand_model -from raven_python.manipulation.expand import _gpr_to_dnf +from raven_python.manipulation.expand import gpr_to_dnf # --------------------------------------------------------------------------- # # DNF conversion (internal helper, worth testing directly) @@ -15,7 +15,7 @@ def _dnf_from_gpr_string(gpr_str: str) -> list[list[str]]: from cobra.core.gene import GPR gpr = GPR.from_string(gpr_str) - return _gpr_to_dnf(gpr) + return gpr_to_dnf(gpr) def test_dnf_empty_gpr():