diff --git a/src/raven_python/analysis/sampling.py b/src/raven_python/analysis/sampling.py index e187ffd..5d11054 100644 --- a/src/raven_python/analysis/sampling.py +++ b/src/raven_python/analysis/sampling.py @@ -38,6 +38,7 @@ import pandas as pd from cobra.exceptions import OptimizationError from cobra.flux_analysis import flux_variability_analysis, pfba +from optlang.symbolics import add logger = logging.getLogger(__name__) @@ -186,7 +187,9 @@ def random_sampling( weights = rng.random(n_objectives) * signs terms = [w * good_rxn_objs[j].flux_expression for j, w in zip(chosen, weights, strict=True)] - model.objective = model.problem.Objective(sum(terms), direction="max") + # add() (not sum()) builds the symbolic objective in one pass; sum() + # re-canonicalises the optlang expression on every term (O(n^2)). + model.objective = model.problem.Objective(add(terms), direction="max") sol = model.optimize() if sol.status == "optimal" and abs(sol.objective_value) > 1e-8: fluxes = (pfba(model) if min_flux else sol).fluxes.reindex(reaction_ids) diff --git a/src/raven_python/manipulation/add.py b/src/raven_python/manipulation/add.py index d772acd..4adb2e7 100644 --- a/src/raven_python/manipulation/add.py +++ b/src/raven_python/manipulation/add.py @@ -97,8 +97,24 @@ def _new_met_id(model: cobra.Model, prefix: str) -> str: return f"{prefix}{n}" +def _build_met_index(model: cobra.Model) -> dict[tuple[str, str | None], Metabolite]: + """Build a ``(name, compartment) -> metabolite`` index (first match wins, + mirroring the old linear scan). Lets name / name[comp] resolution be O(1) + instead of re-scanning ``model.metabolites`` per token; callers update it as + new mets are created so later tokens dedup against earlier ones.""" + index: dict[tuple[str, str | None], Metabolite] = {} + for met in model.metabolites: + index.setdefault((met.name, met.compartment), met) + return index + + def _try_existing( - model: cobra.Model, token: str, *, mets_by: str, compartment: str | None + model: cobra.Model, + token: str, + *, + mets_by: str, + compartment: str | None, + met_index: dict[tuple[str, str | None], Metabolite], ) -> Metabolite | None: """Look up ``token`` as an existing metabolite (no creation, no side effects). @@ -113,10 +129,7 @@ def _try_existing( target_comp = comp if comp is not None else compartment if target_comp is None: return None - for met in model.metabolites: - if met.name == name and met.compartment == target_comp: - return met - return None + return met_index.get((name, target_comp)) def _resolve_metabolite( @@ -127,6 +140,7 @@ def _resolve_metabolite( compartment: str | None, allow_new_mets: bool, new_met_prefix: str, + met_index: dict[tuple[str, str | None], Metabolite], ) -> Metabolite: """Resolve an equation token to an existing or newly created Metabolite.""" name, comp = parse_name_comp(token) @@ -146,6 +160,7 @@ def _resolve_metabolite( _warn_unknown_compartment(model, compartment, token) met = Metabolite(token, compartment=compartment) model.add_metabolites([met]) + met_index.setdefault((met.name, met.compartment), met) return met # name-based (mets_by="name") or explicit name[comp] @@ -158,13 +173,9 @@ def _resolve_metabolite( if comp is not None and target_comp not in model.compartments and not allow_new_mets: raise ValueError(f"Compartment {target_comp!r} is not in the model.") - matches = [ - met - for met in model.metabolites - if met.name == name and met.compartment == target_comp - ] - if matches: - return matches[0] + existing = met_index.get((name, target_comp)) + if existing is not None: + return existing if not allow_new_mets: raise ValueError( f"No metabolite named {name!r} in compartment {target_comp!r}; " @@ -173,6 +184,7 @@ def _resolve_metabolite( _warn_unknown_compartment(model, target_comp, name) met = Metabolite(_new_met_id(model, new_met_prefix), name=name, compartment=target_comp) model.add_metabolites([met]) + met_index.setdefault((met.name, met.compartment), met) return met @@ -202,6 +214,7 @@ def _stoichiometry( compartment: str | None, allow_new_mets: bool, new_met_prefix: str, + met_index: dict[tuple[str, str | None], Metabolite], ) -> tuple[dict[Metabolite, float], bool]: """Parse an equation into a {Metabolite: net coefficient} dict + reversibility.""" lhs, rhs, reversible = _split_equation(equation) @@ -217,7 +230,8 @@ def _stoichiometry( met = None if fallback is not None: met = _try_existing( - model, fallback, mets_by=mets_by, compartment=compartment + model, fallback, mets_by=mets_by, compartment=compartment, + met_index=met_index, ) if met is not None: coeff = 1.0 @@ -229,6 +243,7 @@ def _stoichiometry( compartment=compartment, allow_new_mets=allow_new_mets, new_met_prefix=new_met_prefix, + met_index=met_index, ) coeffs[met] = coeffs.get(met, 0.0) + sign * coeff # Drop metabolites that net to zero (present as both substrate and product). @@ -295,6 +310,7 @@ def add_reactions_from_equations( known_genes = {gene.id for gene in model.genes} added: list[Reaction] = [] + met_index = _build_met_index(model) for spec in reactions: if "id" not in spec: @@ -316,6 +332,7 @@ def add_reactions_from_equations( compartment=compartment, allow_new_mets=allow_new_mets, new_met_prefix=new_met_prefix, + met_index=met_index, ) rxn = Reaction(rxn_id, name=spec.get("name", "")) diff --git a/src/raven_python/manipulation/change.py b/src/raven_python/manipulation/change.py index 073fdfd..95808d2 100644 --- a/src/raven_python/manipulation/change.py +++ b/src/raven_python/manipulation/change.py @@ -18,7 +18,7 @@ import cobra from cobra import Reaction -from raven_python.manipulation.add import _stoichiometry +from raven_python.manipulation.add import _build_met_index, _stoichiometry __all__ = ["change_reaction_equations", "change_gene_reaction_rules"] @@ -60,6 +60,7 @@ def change_reaction_equations( raise ValueError(f"mets_by must be 'id' or 'name', got {mets_by!r}") changed: list[Reaction] = [] + met_index = _build_met_index(model) for rxn_id, equation in equations.items(): if rxn_id not in model.reactions: raise ValueError(f"Reaction {rxn_id!r} not found in the model.") @@ -72,6 +73,7 @@ def change_reaction_equations( compartment=compartment, allow_new_mets=allow_new_mets, new_met_prefix=new_met_prefix, + met_index=met_index, ) rxn.subtract_metabolites(dict(rxn.metabolites), combine=True) diff --git a/src/raven_python/reconstruction/homology/homology.py b/src/raven_python/reconstruction/homology/homology.py index bc6fa41..d5400a4 100644 --- a/src/raven_python/reconstruction/homology/homology.py +++ b/src/raven_python/reconstruction/homology/homology.py @@ -153,8 +153,14 @@ def _ortholog_map( if pairs.empty: return {} - # Keep only template genes that actually exist in their model. - pairs = pairs[pairs.apply(lambda r: r.template_gene in model_genes.get(r.model_id, ()), axis=1)] + # Keep only template genes that actually exist in their model. A list + # comprehension over the columns avoids DataFrame.apply(axis=1)'s per-row + # Series construction (model_genes values are sets, so membership is O(1)). + keep = [ + tg in model_genes.get(mid, ()) + for mid, tg in zip(pairs.model_id, pairs.template_gene, strict=True) + ] + pairs = pairs[keep] ortho: dict = {} for model_id, template_gene, new_gene in zip(pairs.model_id, pairs.template_gene, pairs.new_gene, strict=True): diff --git a/tests/test_manipulation_add.py b/tests/test_manipulation_add.py index 2a3a9d3..3ba2153 100644 --- a/tests/test_manipulation_add.py +++ b/tests/test_manipulation_add.py @@ -114,6 +114,25 @@ def test_name_mode_creates_new_met_with_auto_id(model): assert new[0].compartment == "c" +def test_name_mode_dedups_new_met_across_reactions(model): + # A new metabolite named on more than one reaction in the same call must be + # created once and shared — later tokens have to see mets created earlier in + # the call (the (name, comp) index is seeded once and updated on creation). + r1, r2 = add_reactions_from_equations( + model, + [ + {"id": "R1", "equation": "ATP --> AMP"}, + {"id": "R2", "equation": "AMP --> ADP"}, + ], + mets_by="name", + compartment="c", + ) + amp = [m for m in model.metabolites if m.name == "AMP"] + assert len(amp) == 1 # created once, not duplicated + assert amp[0] in r1.metabolites + assert amp[0] in r2.metabolites + + def test_name_mode_requires_compartment(model): with pytest.raises(ValueError, match="needs a compartment"): add_reactions_from_equations(