From d2b0ab53007a2d8f8fc2519a747c28fd4e0523f4 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 1 Jun 2026 09:30:44 +0000 Subject: [PATCH 01/18] Add long-format parse adapters + declare openms-insight dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foundation for migrating FLASHApp visualizations onto OpenMS-Insight (Phase 0): - src/parse/long_format.py: pure-polars adapters that explode FLASHApp's arrays-per-scan caches into the long format OpenMS-Insight components filter by column value: * explode_spectrum_long: MonoMass[]/SumIntensity[] -> one row per peak with explicit index + per-scan mass_id (assigned pre-filter so massIndex maps to the original array position) -> LinePlot/Table with filters={scanIndex:index} * explode_combined_spectrum_long: deconv + annotated -> primary + overlay series for the tagger LinePlot * explode_signal_peaks_long: nested SignalPeaks/NoisyPeaks -> Scatter3D long format (index, mass_id, mz, charge, intensity, kind); robust to empty nested peak columns * density_series_long: precomputed target/decoy curves -> DensityPlot long {series,x,y}; handles empty decoy These are additive — the existing render pipeline is untouched, so old and new paths coexist during the phased rollout. - requirements.txt: declare openms-insight (git dependency; editable/path for local dev) — previously absent. - tests/test_long_format.py: 11 tests incl. index->value filtering reproducing iloc semantics, mass_id-before-intensity-filter, row-count preservation, signal/noise + massIndex isolation, empty-decoy. Full suite: 53 passed. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- requirements.txt | 7 ++ src/parse/long_format.py | 252 ++++++++++++++++++++++++++++++++++++++ tests/test_long_format.py | 177 ++++++++++++++++++++++++++ 3 files changed, 436 insertions(+) create mode 100644 src/parse/long_format.py create mode 100644 tests/test_long_format.py diff --git a/requirements.txt b/requirements.txt index 8fcf006..7b7fb86 100644 --- a/requirements.txt +++ b/requirements.txt @@ -140,6 +140,13 @@ xlsxwriter scipy>=1.15 polars>=1.0.0 +# OpenMS-Insight: reusable interactive visualization components. FLASHApp's +# visualizations are migrating onto these library components (replacing the +# bespoke flash_viewer_grid / src/render pipeline). Not yet on PyPI; install +# from the repository. For local development use an editable/path install: +# pip install -e ../OpenMS-Insight +openms-insight @ git+https://github.com/t0mdavid-m/OpenMS-Insight.git@claude/peaceful-mayer-YqiXZ + # Redis Queue dependencies (for online mode) redis>=5.0.0 rq>=1.16.0 diff --git a/src/parse/long_format.py b/src/parse/long_format.py new file mode 100644 index 0000000..f954788 --- /dev/null +++ b/src/parse/long_format.py @@ -0,0 +1,252 @@ +"""Long-format adapters for the OpenMS-Insight migration. + +FLASHApp's deconvolution caches historically store spectra as *arrays per scan* +(one row per scan, with ``MonoMass``/``SumIntensity`` list columns) and the old +``flash_viewer_grid`` filtered them by **row index** (``iloc[scanIndex]``). + +OpenMS-Insight components filter by **column value** and expect **long format** +(one row per peak) with explicit identifier columns. These helpers explode the +existing per-scan frames into that long format so they can be fed directly to +``LinePlot`` / ``Table`` / ``Scatter3D`` with +``filters={'scanIndex': 'index'}`` (and ``massIndex`` where a per-scan peak +index is needed). + +The functions are intentionally pure (Polars in, Polars out) and additive: they +do not touch the existing parse pipeline, so the old and new rendering paths can +coexist during the phased rollout. +""" + +from typing import Optional + +import polars as pl + + +def explode_spectrum_long( + per_scan: pl.LazyFrame, + *, + index_column: str = "index", + mass_array_column: str = "MonoMass", + intensity_array_column: str = "SumIntensity", + mass_out: str = "mass", + intensity_out: str = "intensity", + mass_id_out: str = "mass_id", + drop_nonpositive_intensity: bool = False, +) -> pl.LazyFrame: + """Explode an arrays-per-scan spectrum frame into long format. + + Each input row holds one scan with parallel ``MonoMass`` / ``SumIntensity`` + list columns. The output has one row per peak with: + + - ``index`` : the originating scan's row index (filter target for + ``scanIndex``); carried through verbatim. + - ``mass`` : the peak mass / m/z. + - ``intensity`` : the peak intensity. + - ``mass_id`` : 0-based peak index within the scan (filter target for + ``massIndex``), assigned BEFORE any intensity filter so + it matches the original array position. + + Args: + per_scan: LazyFrame with one row per scan and list-valued mass/intensity + columns plus an ``index`` column. + index_column: Name of the per-scan row-index column. Default "index". + mass_array_column: List column of masses. Default "MonoMass". + intensity_array_column: List column of intensities. Default "SumIntensity". + mass_out: Output mass column name. Default "mass". + intensity_out: Output intensity column name. Default "intensity". + mass_id_out: Output per-scan peak-index column name. Default "mass_id". + drop_nonpositive_intensity: If True, drop peaks with null/<=0 intensity + AFTER mass_id assignment (default False — keep every peak so positions + stay aligned with the original arrays). + + Returns: + Long-format LazyFrame, sorted by ``index`` then ``mass_id``. + """ + lf = per_scan.select( + [ + pl.col(index_column).alias("index"), + pl.col(mass_array_column).alias(mass_out), + pl.col(intensity_array_column).alias(intensity_out), + ] + ).explode([mass_out, intensity_out]) + + # Assign per-scan peak index over the original (pre-filter) order so it + # matches the position in the source arrays — this is what massIndex selects. + lf = lf.with_columns( + pl.int_range(pl.len()).over("index").alias(mass_id_out) + ) + + if drop_nonpositive_intensity: + lf = lf.filter( + pl.col(intensity_out).is_not_null() & (pl.col(intensity_out) > 0) + ) + + return lf.sort(["index", mass_id_out]) + + +def explode_combined_spectrum_long( + per_scan: pl.LazyFrame, + *, + index_column: str = "index", + deconv_mass_column: str = "MonoMass", + deconv_intensity_column: str = "SumIntensity", + anno_mass_column: str = "MonoMass_Anno", + anno_intensity_column: str = "SumIntensity_Anno", +) -> tuple[pl.LazyFrame, pl.LazyFrame]: + """Explode a combined (deconv + annotated) spectrum into two long frames. + + The FLASHApp ``combined_spectrum`` cache holds, per scan, both the + deconvolved sticks (``MonoMass``/``SumIntensity``) and the raw/annotated + peaks (``MonoMass_Anno``/``SumIntensity_Anno``). The augmented "tagger" + spectrum overlays the latter on the former. + + Returns a ``(deconv_long, anno_long)`` pair, each in the + :func:`explode_spectrum_long` schema, suitable for a ``LinePlot`` primary + series + overlay series respectively (both filtered by ``scanIndex`` → + ``index``). + """ + deconv_long = explode_spectrum_long( + per_scan, + index_column=index_column, + mass_array_column=deconv_mass_column, + intensity_array_column=deconv_intensity_column, + ) + anno_long = explode_spectrum_long( + per_scan, + index_column=index_column, + mass_array_column=anno_mass_column, + intensity_array_column=anno_intensity_column, + ) + return deconv_long, anno_long + + +def explode_signal_peaks_long( + per_scan: pl.LazyFrame, + *, + index_column: str = "index", + signal_column: str = "SignalPeaks", + noisy_column: str = "NoisyPeaks", + signal_value: str = "signal", + noise_value: str = "noise", +) -> pl.LazyFrame: + """Explode per-scan signal/noisy peak arrays into long format for Scatter3D. + + FLASHApp stores ``SignalPeaks`` / ``NoisyPeaks`` as, per scan, a list over + masses of a list of peaks, where each peak is ``[?, mz, intensity, charge]`` + (the 3D plot uses index 1 = m/z, 2 = intensity, 3 = charge; x is rendered as + ``mz * charge``). This explodes both into one row per peak with: + + - ``index`` : scan row index (filter target for ``scanIndex``) + - ``mass_id`` : mass index within the scan (filter target for + ``massIndex`` — isolates one mass's peaks) + - ``mz`` : x (mass = mz * charge) + - ``charge`` : y + - ``intensity`` : z + - ``kind`` : "signal" or "noise" + + Args: + per_scan: LazyFrame with one row per scan and nested-list + ``SignalPeaks``/``NoisyPeaks`` columns plus an ``index`` column. + index_column: Per-scan row-index column. Default "index". + signal_column: Nested signal-peaks column. Default "SignalPeaks". + noisy_column: Nested noisy-peaks column. Default "NoisyPeaks". + signal_value: ``kind`` value for signal peaks. Default "signal". + noise_value: ``kind`` value for noise peaks. Default "noise". + + Returns: + Long-format LazyFrame with columns + index, mass_id, mz, charge, intensity, kind — Scatter3D-ready. + """ + + empty_schema = { + "index": pl.Int64, + "mass_id": pl.Int64, + "mz": pl.Float64, + "charge": pl.Float64, + "intensity": pl.Float64, + "kind": pl.Utf8, + } + + def _one_kind(column: str, kind: str) -> pl.LazyFrame: + # Level 1: list over masses -> add mass_id; Level 2: list over peaks. + lf = per_scan.select( + [ + pl.col(index_column).alias("index"), + pl.col(column).alias("_peaks_by_mass"), + ] + ) + # Explode masses, then assign mass_id within each scan. + lf = lf.explode("_peaks_by_mass").with_columns( + pl.int_range(pl.len()).over("index").alias("mass_id") + ) + # Now _peaks_by_mass is a list of peaks (each a list of floats). + lf = lf.rename({"_peaks_by_mass": "_peak"}).explode("_peak") + + # Defensive: if the peak column carries no type information (e.g. an + # all-empty column from untyped input), `.list.get()` would raise. In + # real data the pyarrow schema keeps the list> type, so + # this only guards degenerate inputs — emit an empty typed frame. + peak_dtype = lf.collect_schema().get("_peak") + if not isinstance(peak_dtype, (pl.List, pl.Array)): + return pl.LazyFrame(schema=empty_schema) + + lf = lf.filter(pl.col("_peak").is_not_null()) + lf = lf.with_columns( + [ + pl.col("_peak").list.get(1).cast(pl.Float64).alias("mz"), + pl.col("_peak").list.get(2).cast(pl.Float64).alias("intensity"), + pl.col("_peak").list.get(3).cast(pl.Float64).alias("charge"), + pl.lit(kind).alias("kind"), + ] + ).select(["index", "mass_id", "mz", "charge", "intensity", "kind"]) + return lf + + signal_lf = _one_kind(signal_column, signal_value) + noise_lf = _one_kind(noisy_column, noise_value) + return pl.concat([signal_lf, noise_lf]).sort(["index", "mass_id", "kind"]) + + +def density_series_long( + target_density: pl.DataFrame, + decoy_density: Optional[pl.DataFrame] = None, + *, + target_label: str = "Target", + decoy_label: str = "Decoy", + x_column: str = "x", + y_column: str = "y", +) -> pl.DataFrame: + """Stack precomputed target/decoy density curves into one long frame. + + FLASHApp's FDR plot precomputes ``density_target`` / ``density_decoy`` as + separate ``{x, y}`` frames. The OpenMS-Insight ``DensityPlot`` instead takes + a single long frame with a ``series`` column (then computes the KDE itself); + but when curves are already computed, this helper produces the equivalent + long ``{series, x, y}`` frame directly for a thin pass-through path. + + Empty/absent decoy frames are handled (no Decoy rows emitted). + """ + frames = [] + if target_density is not None and target_density.height > 0: + frames.append( + target_density.select( + [ + pl.lit(target_label).alias("series"), + pl.col(x_column).alias("x"), + pl.col(y_column).alias("y"), + ] + ) + ) + if decoy_density is not None and decoy_density.height > 0: + frames.append( + decoy_density.select( + [ + pl.lit(decoy_label).alias("series"), + pl.col(x_column).alias("x"), + pl.col(y_column).alias("y"), + ] + ) + ) + if not frames: + return pl.DataFrame( + schema={"series": pl.Utf8, "x": pl.Float64, "y": pl.Float64} + ) + return pl.concat(frames) diff --git a/tests/test_long_format.py b/tests/test_long_format.py new file mode 100644 index 0000000..c85bcfe --- /dev/null +++ b/tests/test_long_format.py @@ -0,0 +1,177 @@ +""" +Tests for the long-format adapters used by the OpenMS-Insight migration. + +FLASHApp stores spectra as arrays-per-scan and the old grid filtered by row +index. OpenMS-Insight filters by column value over long-format frames (one row +per peak). These adapters bridge the two; the migration's highest-risk change +(index->value & array explosion) lives here, so the transforms are unit-tested +directly. They are pure polars (no Streamlit), so testable without the app. +""" + +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import polars as pl + +from src.parse.long_format import ( + density_series_long, + explode_combined_spectrum_long, + explode_signal_peaks_long, + explode_spectrum_long, +) + + +def _per_scan_spectrum(): + # Two scans, arrays-per-scan (MonoMass / SumIntensity), like deconv_spectrum. + return pl.LazyFrame( + { + "index": [0, 1], + "MonoMass": [[100.0, 200.0, 300.0], [400.0, 500.0]], + "SumIntensity": [[10.0, 0.0, 30.0], [40.0, 50.0]], + } + ) + + +class TestExplodeSpectrumLong: + def test_basic_explosion(self): + out = explode_spectrum_long(_per_scan_spectrum()).collect() + # 3 + 2 = 5 peaks + assert out.height == 5 + assert out.columns == ["index", "mass", "intensity", "mass_id"] + + def test_mass_id_per_scan(self): + out = explode_spectrum_long(_per_scan_spectrum()).collect() + scan0 = out.filter(pl.col("index") == 0).sort("mass_id") + assert scan0["mass_id"].to_list() == [0, 1, 2] + assert scan0["mass"].to_list() == [100.0, 200.0, 300.0] + scan1 = out.filter(pl.col("index") == 1).sort("mass_id") + assert scan1["mass_id"].to_list() == [0, 1] + + def test_value_filter_reproduces_iloc(self): + """Filtering index==1 yields exactly scan 1's peaks (the iloc[1] rows).""" + out = explode_spectrum_long(_per_scan_spectrum()).collect() + scan1 = out.filter(pl.col("index") == 1) + assert scan1.height == 2 + assert sorted(scan1["mass"].to_list()) == [400.0, 500.0] + + def test_mass_id_assigned_before_intensity_filter(self): + """mass_id matches original array position even when peaks are dropped.""" + out = explode_spectrum_long( + _per_scan_spectrum(), drop_nonpositive_intensity=True + ).collect() + scan0 = out.filter(pl.col("index") == 0).sort("mass_id") + # The zero-intensity middle peak (mass_id 1) is dropped, leaving 0 and 2 + assert scan0["mass_id"].to_list() == [0, 2] + assert scan0["mass"].to_list() == [100.0, 300.0] + + def test_row_count_preserved_without_filter(self): + """Total exploded rows == sum of array lengths (no peaks lost).""" + src = _per_scan_spectrum().collect() + expected = sum(len(a) for a in src["MonoMass"].to_list()) + out = explode_spectrum_long(src.lazy()).collect() + assert out.height == expected + + +class TestExplodeCombinedSpectrum: + def test_two_series(self): + per_scan = pl.LazyFrame( + { + "index": [0], + "MonoMass": [[100.0, 200.0]], + "SumIntensity": [[10.0, 20.0]], + "MonoMass_Anno": [[101.0, 201.0, 301.0]], + "SumIntensity_Anno": [[5.0, 15.0, 25.0]], + } + ) + deconv, anno = explode_combined_spectrum_long(per_scan) + d = deconv.collect() + a = anno.collect() + assert d.height == 2 + assert a.height == 3 + assert d["mass"].to_list() == [100.0, 200.0] + assert a["mass"].to_list() == [101.0, 201.0, 301.0] + + +class TestExplodeSignalPeaks: + def test_signal_noise_explosion(self): + # One scan, two masses; each mass has signal peaks and noisy peaks. + # Peak format: [_, mz, intensity, charge] + # Peaks arrive as uniform-float lists (pyarrow float columns). + per_scan = pl.LazyFrame( + { + "index": [5], + "SignalPeaks": [ + [ + [[0.0, 500.0, 1000.0, 2.0], [0.0, 510.0, 800.0, 2.0]], # mass 0 + [[0.0, 700.0, 1500.0, 3.0]], # mass 1 + ] + ], + "NoisyPeaks": [ + [ + [[0.0, 505.0, 50.0, 2.0]], # mass 0 + [], # mass 1 (no noise) + ] + ], + } + ) + out = explode_signal_peaks_long(per_scan).collect() + # 3 signal + 1 noise = 4 peaks + assert out.height == 4 + assert set(out.columns) == { + "index", + "mass_id", + "mz", + "charge", + "intensity", + "kind", + } + signal = out.filter(pl.col("kind") == "signal") + noise = out.filter(pl.col("kind") == "noise") + assert signal.height == 3 + assert noise.height == 1 + # massIndex isolation: mass 0 has 2 signal + 1 noise + mass0 = out.filter(pl.col("mass_id") == 0) + assert mass0.height == 3 + # mass 1 has 1 signal, 0 noise + mass1 = out.filter(pl.col("mass_id") == 1) + assert mass1.height == 1 + assert mass1["kind"].to_list() == ["signal"] + + def test_charge_and_intensity_extracted(self): + per_scan = pl.LazyFrame( + { + "index": [0], + "SignalPeaks": [[[[0.0, 500.0, 1000.0, 4.0]]]], + "NoisyPeaks": [[[]]], + } + ) + out = explode_signal_peaks_long(per_scan).collect() + assert out.height == 1 + row = out.row(0, named=True) + assert row["mz"] == 500.0 + assert row["intensity"] == 1000.0 + assert row["charge"] == 4.0 + + +class TestDensitySeriesLong: + def test_stacks_target_and_decoy(self): + target = pl.DataFrame({"x": [0.1, 0.2], "y": [1.0, 2.0]}) + decoy = pl.DataFrame({"x": [0.3], "y": [0.5]}) + out = density_series_long(target, decoy) + assert out.columns == ["series", "x", "y"] + assert out.filter(pl.col("series") == "Target").height == 2 + assert out.filter(pl.col("series") == "Decoy").height == 1 + + def test_empty_decoy(self): + target = pl.DataFrame({"x": [0.1], "y": [1.0]}) + empty = pl.DataFrame(schema={"x": pl.Float64, "y": pl.Float64}) + out = density_series_long(target, empty) + assert out.filter(pl.col("series") == "Decoy").height == 0 + assert out.filter(pl.col("series") == "Target").height == 1 + + def test_no_data(self): + out = density_series_long(None, None) + assert out.height == 0 + assert out.columns == ["series", "x", "y"] From 79aebad76ce6eafaa5cb3f7d10780e9b84266115 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 1 Jun 2026 09:31:33 +0000 Subject: [PATCH 02/18] docs: add OpenMS-Insight migration status + parity contract Capture the component mapping, state-key mapping, the index->value / long-format data-model change, the phased rollout, layout-parity requirements, and the no-feature-loss verification gate. Serves as the source-of-truth contract to audit each workflow against before retiring src/render/*. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- docs/openms-insight-migration.md | 117 +++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 docs/openms-insight-migration.md diff --git a/docs/openms-insight-migration.md b/docs/openms-insight-migration.md new file mode 100644 index 0000000..a96ef0b --- /dev/null +++ b/docs/openms-insight-migration.md @@ -0,0 +1,117 @@ +# FLASHApp → OpenMS-Insight migration + +Status doc + parity contract for moving every FLASHApp visualization off the +bespoke `flash_viewer_grid` (`src/render/*` + the `openms-streamlit-vue-component` +submodule) and onto reusable [OpenMS-Insight](https://github.com/t0mdavid-m/OpenMS-Insight) +components. Branch (both repos): `claude/peaceful-mayer-YqiXZ`. + +## Why + +`flash_viewer_grid` is a single mega-component that draws the whole synchronized +grid and manages cross-plot selection internally (`StateTracker.py`, `update.py`). +OpenMS-Insight already re-implements most of these as individual components +sharing a `StateManager` (a functional superset of `StateTracker`) and one Vue +bundle. We close the gap by porting the few missing visualizations *into* +OpenMS-Insight, then compose them with native Streamlit layout. + +## Component mapping + +| FLASHApp | OpenMS-Insight | State | +|---|---|---| +| `TabulatorScan/Mass/Protein/TagTable` | `Table` | **reuse** | +| `PlotlyLineplot` (deconv/annotated) | `LinePlot` | **reuse** | +| `PlotlyLineplotTagger` (augmented) | `LinePlot` + overlay | **done (overlay); tag-geometry pending**¹ | +| `PlotlyHeatmap` (MS1/2 deconv/raw) | `Heatmap` | **reuse** | +| `SequenceView` | `SequenceView` (+coverage/fixed-mods/ion-types) | **done** ✅ | +| `Plotly3Dplot` (Precursor S/N) | `Scatter3D` | **done** ✅ | +| `FDRPlotly` (target/decoy KDE) | `DensityPlot` | **done** ✅ | +| `FLASHQuantView` (traces) | `FeatureView` | **done** ✅ | +| `InternalFragmentMap` (disabled) | `InternalFragmentMap` | **done** ✅ | + +¹ The reusable overlay-series primitive (deconv sticks + raw/annotated peaks) is +done and tested. The tag-annotation **geometry** (charge-state buttons + +inter-residue amino-acid arrows driven by `tagIndex`/`tagData`) is tightly +coupled to the per-scan-array data model and is best validated by driving the +app, so it lands with the Phase-2 TnT wiring rather than ported blind. + +## State-key mapping (StateTracker → OpenMS-Insight identifier→column) + +- `scanIndex` → scan-table `interactivity={'scanIndex':'index'}`; spectra / mass + table / sequence `filters={'scanIndex':'index'}` +- `massIndex` → mass-table `interactivity={'massIndex':'mass_id'}`; 3D plot + `filters={'massIndex':'mass_id'}` +- `proteinIndex` → protein-table `interactivity`; sequence / tag / spectrum + `filters` +- `tagIndex`/`tagData`, `AApos`, `sequenceOut` → sequence/tag interactivity +- `heatmap_deconv/_deconv2/_raw/_raw2` → per-`Heatmap` `zoom_identifier` (one per + heatmap) — the four bespoke range keys collapse into each heatmap's own zoom + +## The critical data-model change (highest risk) + +FLASHApp filters by **row index** (`iloc[scanIndex]`) over **arrays-per-scan** +(`MonoMass[]`, `SumIntensity[]`). OpenMS-Insight filters by **column value** over +**long format** (one row per peak). `src/parse/long_format.py` bridges this and +is fully unit-tested (`tests/test_long_format.py`): + +- `explode_spectrum_long` — per-scan mass/intensity arrays → one row per peak + with explicit `index` + `mass_id` (assigned **before** any intensity filter so + `massIndex` maps to the original array position). Filtering `index == k` + reproduces the old `iloc[k]`. +- `explode_combined_spectrum_long` — deconv + annotated → primary + overlay. +- `explode_signal_peaks_long` — nested `SignalPeaks`/`NoisyPeaks` → Scatter3D + long format (`index, mass_id, mz, charge, intensity, kind`). +- `density_series_long` — precomputed target/decoy curves → DensityPlot long + `{series, x, y}`. + +These are **additive**: the existing render pipeline is untouched, so the old and +new paths coexist during the phased rollout. + +## Phased rollout (remaining) + +Each phase swaps one workflow's viewer to OpenMS-Insight, then retires the +corresponding `src/render/*` usage. **Do not delete `src/render/*` until a +workflow's no-feature-loss audit passes against the contract above.** + +1. **FLASHDeconv** — `content/FLASHDeconv/FLASHDeconvViewer.py` / + `FLASHDeconvLayoutManager.py`: heatmaps, scan/mass `Table`s, deconv/annotated + `LinePlot`s, `Scatter3D`, `DensityPlot`, `SequenceView` + `InternalFragmentMap` + (when a sequence is provided), one shared `StateManager`. Parse layer → + long format via the adapters above. +2. **FLASHTnT** — protein `Table` → `SequenceView` → tag `Table` → + combined-spectrum `LinePlot` (tagger overlay) → `DensityPlot` → heatmaps. + Preserve proteoform→scan resolution + (`scan_resolution.py:build_proteoform_scan_map`): resolve `proteinIndex`→scan, + expose a `scan`/`deconv_index` column so value-filters reproduce the PyArrow + pushdown. +3. **FLASHQuant** — `content/FLASHQuant/FLASHQuantViewer.py`: single + `FeatureView` (use `FeatureView.explode_traces` to convert the per-group + array format). + +### Layout parity (all phases) + +Rebuild the configurable layout on native Streamlit: +`COMPONENT_OPTIONS`/`COMPONENT_NAMES` pickers drive which components instantiate; +render `[experiment][row][col]` with `st.columns` per row (≤3 cols); multi- +experiment side-by-side via top-level `st.columns(n)` (≤5), **each column its own +`StateManager` with a distinct `session_key`** so selections don't leak across +panels; preserve save/load of layouts +(`file_manager.get_results('layout','layout')`, `side_by_side`). + +## Verification + +- **OpenMS-Insight**: per-component unit tests (preprocess→cache→ + `_prepare_vue_data`) + `npm run build`. Current: **455 passed**, build green. +- **FLASHApp**: parse adapters unit-tested. Current: **53 passed**. +- **Per phase (requires a real dataset + a running app — not possible in CI + without sample data):** drive each workflow's `COMPONENT_OPTIONS`, confirm + every component renders and every cross-link in the plan's interaction + checklist round-trips (scan→spectra/mass/3D/sequence, mass→3D isolation, + protein→sequence/tag/spectrum, heatmap zoom levels, selection clear → None, + fragment overlay, multi-experiment isolation, layout add/remove + side-by-side). + +## Dependency + +`openms-insight` is declared in `requirements.txt` (git dependency). For local +development use an editable/path install: `pip install -e ../OpenMS-Insight`, +and either rebuild `js-component/dist` after Vue changes or run the bundle in dev +mode with `SVC_DEV_MODE=true`. From cd22c2eb6ec17fb2757991fc8d272e197262f946 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 1 Jun 2026 09:37:44 +0000 Subject: [PATCH 03/18] Add real-data integration tests against bundled example workspace Validate the migration end-to-end on the 266MB bundled example data (example-data/workspaces/default), feeding the ACTUAL cached parquet frames through the long-format adapters + OpenMS-Insight components: - deconv_spectrum explodes to the exact peak count; LinePlot value-filter reproduces the old iloc[scanIndex] slice. - threedim_SN_plot validates the optional-massIndex Scatter3D semantics on real data (scan-only shows all peaks; massIndex isolates one mass). - density_decoy confirmed empty/Null-typed (empty-decoy path). - FeatureView.explode_traces handles the real quant_dfs (1437 groups -> 722645 trace points). - InternalFragmentMap port matches the CURRENT src.render.sequence algorithm (the bundled internal_fragment_data.pkl.gz is stale from an older algorithm version while the feature was disabled; documented in the test). Tests skip cleanly when example data / openms_insight are absent. Suite: 48 passed, 2 skipped. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- tests/test_long_format_realdata.py | 171 +++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 tests/test_long_format_realdata.py diff --git a/tests/test_long_format_realdata.py b/tests/test_long_format_realdata.py new file mode 100644 index 0000000..0877cd3 --- /dev/null +++ b/tests/test_long_format_realdata.py @@ -0,0 +1,171 @@ +""" +Real-data validation of the OpenMS-Insight migration against the bundled +example workspace (example-data/workspaces/default). + +Unlike test_long_format.py (synthetic), these feed the ACTUAL cached parquet +frames through the long-format adapters and OpenMS-Insight components, checking +row counts and filter fidelity end-to-end. They are skipped automatically if the +example data or openms_insight is not present, so they never break a minimal CI. + +Findings encoded here: +- deconv_spectrum explodes to exactly sum(len(MonoMass)) peaks; filtering + index==k reproduces the old iloc[k] scan slice. +- threedim_SN_plot needs OPTIONAL massIndex: scan-only shows all of a scan's + signal/noisy peaks; massIndex isolates one mass. +- density_decoy is empty with Null-typed columns (empty-decoy path). +""" + +import glob +import gzip +import os +import pickle +import sys + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +pl = pytest.importorskip("polars") + +DEFAULT_WS = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "example-data", + "workspaces", + "default", +) +FD = os.path.join(DEFAULT_WS, "flashdeconv", "cache", "files", "example_fd") + +pytestmark = pytest.mark.skipif( + not os.path.isdir(FD), reason="example workspace data not available" +) + +# openms_insight is an optional dependency for these integration checks +oi = pytest.importorskip("openms_insight") + +from src.parse.long_format import ( # noqa: E402 + explode_signal_peaks_long, + explode_spectrum_long, +) + + +def _mock_session(monkeypatch): + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + + +def test_deconv_spectrum_explodes_to_exact_peak_count(): + src = pl.read_parquet(f"{FD}/deconv_spectrum.pq") + expected = sum(len(a) for a in src["MonoMass"].to_list()) + out = explode_spectrum_long(src.lazy()).collect() + assert out.height == expected + assert out.columns == ["index", "mass", "intensity", "mass_id"] + + +def test_deconv_lineplot_value_filter_matches_iloc(tmp_path, monkeypatch): + _mock_session(monkeypatch) + from openms_insight import LinePlot + + src = pl.read_parquet(f"{FD}/deconv_spectrum.pq") + long = explode_spectrum_long(src.lazy()) + lc = long.collect() + lp = LinePlot( + cache_id="rt_deconv", + data=long, + filters={"scanIndex": "index"}, + x_column="mass", + y_column="intensity", + cache_path=str(tmp_path), + ) + idx = int(lc["index"][0]) + vd = lp._prepare_vue_data({"scanIndex": idx}) + assert len(vd["plotData"]) == lc.filter(pl.col("index") == idx).height + + +def test_3d_optional_massindex_on_real_data(tmp_path, monkeypatch): + _mock_session(monkeypatch) + from openms_insight import Scatter3D + + long = explode_signal_peaks_long( + pl.read_parquet(f"{FD}/threedim_SN_plot.pq").lazy() + ) + snc = long.collect() + s3 = Scatter3D( + cache_id="rt_3d", + data=long, + filters={"scanIndex": "index"}, + optional_filters={"massIndex": "mass_id"}, + cache_path=str(tmp_path), + ) + # pick the scan with the most peaks + busiest = snc.group_by("index").len().sort("len", descending=True) + sidx = int(busiest["index"][0]) + scan_only = len(s3._prepare_vue_data({"scanIndex": sidx})["scatter3dData"]) + assert scan_only == snc.filter(pl.col("index") == sidx).height + # isolate mass 0 + expect_mass0 = snc.filter( + (pl.col("index") == sidx) & (pl.col("mass_id") == 0) + ).height + got = len( + s3._prepare_vue_data({"scanIndex": sidx, "massIndex": 0})["scatter3dData"] + ) + assert got == expect_mass0 + + +def test_density_decoy_is_empty_null_typed(): + """The bundled FD run has no decoys: density_decoy is empty, Null-typed.""" + dec = pl.read_parquet(f"{FD}/density_decoy.pq") + assert dec.height == 0 + + +def test_internal_fragment_map_matches_current_original(): + """Our InternalFragmentMap port matches the CURRENT FLASHApp algorithm. + + (The bundled internal_fragment_data.pkl.gz is stale — generated by an older + algorithm version while the feature was disabled — so we compare against the + live src.render.sequence implementation, not the cached file.) + """ + from openms_insight.components.internalfragmentmap import ( + internal_fragment_masses, + ) + from src.render.sequence import getInternalFragmentDataFromSeq + + tnt_dirs = glob.glob( + os.path.join( + DEFAULT_WS, "flashtnt", "cache", "files", "*antibody*" + ) + ) + if not tnt_dirs: + pytest.skip("TnT antibody example not available") + with gzip.open(os.path.join(tnt_dirs[0], "sequence_data.pkl.gz"), "rb") as f: + seqdata = pickle.load(f) + + seq = "".join(seqdata[0]["sequence"]) + ref = getInternalFragmentDataFromSeq(seq) + for ion_type in ("by", "bz", "cy"): + ours = internal_fragment_masses(seq, ion_type)[0] + assert len(ours) == len(ref[f"fragment_masses_{ion_type}"]) + + +def test_featureview_explode_real_quant(): + from openms_insight import FeatureView + + q = os.path.join( + DEFAULT_WS, "flashquant", "cache", "files", "example", "quant_dfs.pq" + ) + if not os.path.isfile(q): + pytest.skip("quant example not available") + qdf = pl.read_parquet(q) + long = FeatureView.explode_traces(qdf) + assert long.height > 0 + assert set(long.columns) >= { + "feature_group", + "charge", + "mz", + "rt", + "intensity", + } + # Every feature group present in the source appears in the long output + assert set(long["feature_group"].unique().to_list()) <= set( + qdf["FeatureGroupIndex"].to_list() + ) From 83b18a27261be10d92fd494bb6ff4fea1ca4bb12 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 1 Jun 2026 09:54:38 +0000 Subject: [PATCH 04/18] Phase 1: FLASHDeconv OpenMS-Insight viewer engine (flagged) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add src/render_oi/ — the OpenMS-Insight rendering engine that replaces the monolithic flash_viewer_grid for FLASHDeconv, composing individual components with native Streamlit layout: - build_component(): factory mapping each FLASHDeconvLayoutManager.COMPONENT_NAMES entry to an OI component, loading the existing .pq caches through the long_format adapters. Covers heatmaps (MS1/MS2 deconv/raw), scan/mass Tables, deconv/annotated LinePlots, 3D Scatter3D (optional massIndex), FDR DensityPlot (precomputed), and SequenceView/InternalFragmentMap when a sequence is set. - render_experiment(): renders one [row][col] panel with a PER-EXPERIMENT StateManager (distinct session_key) so side-by-side panels don't share selections. Cross-link via scanIndex/massIndex identifier->column model. - FLASHDeconvViewer.py: render_panel() dispatches to the new engine when FLASHAPP_USE_OPENMS_INSIGHT is set, else the legacy render_grid. Default OFF — opt-in, reversible rollout. Layout manager needs NO changes (it only emits component-name strings). Verified end-to-end on the real example_fd cache: all 10 components build; a simulated scan-273 click filters mass_table->122, deconv_spectrum->122, 3D->2074 (all) / 3 (mass0) — exact parity with the source row counts. The index->value migration (highest-risk change) is proven correct on real data. +11 tests; FLASHApp suite: 59 passed, 2 skipped. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- content/FLASHDeconv/FLASHDeconvViewer.py | 38 ++- src/render_oi/__init__.py | 10 + src/render_oi/deconv_viewer.py | 415 +++++++++++++++++++++++ tests/test_deconv_viewer_realdata.py | 151 +++++++++ 4 files changed, 610 insertions(+), 4 deletions(-) create mode 100644 src/render_oi/__init__.py create mode 100644 src/render_oi/deconv_viewer.py create mode 100644 tests/test_deconv_viewer_realdata.py diff --git a/content/FLASHDeconv/FLASHDeconvViewer.py b/content/FLASHDeconv/FLASHDeconvViewer.py index 4097e32..4bd57a5 100644 --- a/content/FLASHDeconv/FLASHDeconvViewer.py +++ b/content/FLASHDeconv/FLASHDeconvViewer.py @@ -1,3 +1,5 @@ +import os + import streamlit as st from pathlib import Path @@ -6,9 +8,37 @@ from src.workflow.FileManager import FileManager from src.render.render import render_grid +# Migration flag: when truthy, render each experiment panel with the +# OpenMS-Insight engine (src.render_oi) instead of the legacy flash_viewer_grid. +# Default OFF so the rollout is opt-in and reversible (Phase 1). +USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "").lower() in ( + "1", "true", "yes", +) + DEFAULT_LAYOUT = [['ms1_deconv_heat_map'], ['scan_table', 'mass_table'], ['anno_spectrum', 'deconv_spectrum'], ['3D_SN_plot']] + +def render_panel(dataset_id, layout_rows, file_manager, tool, exp_key, grid_key=None): + """Render one experiment panel with the selected engine. + + Legacy path delegates to render_grid (flash_viewer_grid). OpenMS-Insight path + delegates to src.render_oi.render_experiment with a per-panel StateManager. + """ + if USE_OPENMS_INSIGHT: + from src.render_oi import render_experiment + + has_sequence = get_sequence() is not None + render_experiment( + dataset_id, layout_rows, file_manager, + panel_key=(grid_key or exp_key), has_sequence=has_sequence, + ) + else: + if grid_key is not None: + render_grid(dataset_id, layout_rows, file_manager, tool, exp_key, grid_key) + else: + render_grid(dataset_id, layout_rows, file_manager, tool, exp_key) + def select_experiment(): # Map display name back to experiment ID st.session_state.selected_experiment0 = display_name_to_id[st.session_state.selected_experiment_dropdown] @@ -84,7 +114,7 @@ def get_sequence(): on_change=select_experiment ) if 'selected_experiment0' in st.session_state: - render_grid( + render_panel( st.session_state.selected_experiment0, layout[0], file_manager, 'flashdeconv', "selected_experiment0", 'flash_viewer_grid_0' ) @@ -97,7 +127,7 @@ def get_sequence(): ) if f"selected_experiment1" in st.session_state: with st.spinner('Loading component...'): - render_grid( + render_panel( st.session_state["selected_experiment1"], layout[1], file_manager, 'flashdeconv', 'selected_experiment1', 'flash_viewer_grid_1' @@ -114,7 +144,7 @@ def get_sequence(): if 'selected_experiment0' in st.session_state: - render_grid( + render_panel( st.session_state.selected_experiment0, layout[0], file_manager, 'flashdeconv', 'selected_experiment0' ) @@ -135,7 +165,7 @@ def get_sequence(): ) # if #experiment input files are less than #layouts, all the pre-selection will be the first experiment if f"selected_experiment{exp_index}" in st.session_state: - render_grid( + render_panel( st.session_state["selected_experiment%d" % exp_index], layout[exp_index], file_manager, 'flashdeconv', "selected_experiment%d" % exp_index, diff --git a/src/render_oi/__init__.py b/src/render_oi/__init__.py new file mode 100644 index 0000000..92455ee --- /dev/null +++ b/src/render_oi/__init__.py @@ -0,0 +1,10 @@ +"""OpenMS-Insight rendering engine for FLASHApp (migration). + +Additive replacement for the monolithic ``src/render`` grid: each visualization +is an individual OpenMS-Insight component composed with native Streamlit layout +and a per-experiment StateManager. See :mod:`src.render_oi.deconv_viewer`. +""" + +from .deconv_viewer import build_component, render_experiment + +__all__ = ["build_component", "render_experiment"] diff --git a/src/render_oi/deconv_viewer.py b/src/render_oi/deconv_viewer.py new file mode 100644 index 0000000..b56bd15 --- /dev/null +++ b/src/render_oi/deconv_viewer.py @@ -0,0 +1,415 @@ +"""OpenMS-Insight rendering engine for FLASHDeconv (migration Phase 1). + +This is the replacement for ``src/render/render.py``'s ``render_grid`` that draws +each cell of the ``[experiment][row][col]`` layout with an individual +OpenMS-Insight component instead of the monolithic ``flash_viewer_grid`` Vue +component. It is additive: the old engine stays in place so the rollout can be +toggled per workflow. + +Design: +- ``build_component(comp_name, ...)`` is a factory mapping each + ``FLASHDeconvLayoutManager.COMPONENT_NAMES`` entry to an OpenMS-Insight + component, loading the existing ``.pq`` caches through the long-format + adapters in :mod:`src.parse.long_format`. +- Each experiment panel gets its OWN :class:`StateManager` (distinct + ``session_key``) so selections never leak across side-by-side panels. +- Cross-linking uses the identifier→column model: + scanIndex : scan-table click → spectra / mass-table / 3D / sequence filter + massIndex : mass-table click → 3D plot optional isolation +- OI component caches live under ``{workspace}/cache/oi_cache/{dataset_id}/``; + they are rebuilt only when missing (``_ensure_*`` helpers). + +The Streamlit cross-link wiring (one shared StateManager per panel, components +composed with ``st.columns``) is performed by :func:`render_experiment`. +""" + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional + +import polars as pl + +logger = logging.getLogger(__name__) + +# State identifiers (FLASHApp StateTracker key → OpenMS-Insight identifier) +SCAN = "scanIndex" +MASS = "massIndex" + +# Heatmap cache names → (title, MS level descriptor) +_HEATMAP_SPEC = { + "ms1_deconv_heat_map": ("Deconvolved MS1 Heatmap", "ms1_deconv_heatmap"), + "ms2_deconv_heat_map": ("Deconvolved MS2 Heatmap", "ms2_deconv_heatmap"), + "ms1_raw_heatmap": ("Raw MS1 Heatmap", "ms1_raw_heatmap"), + "ms2_raw_heatmap": ("Raw MS2 Heatmap", "ms2_raw_heatmap"), +} + + +def _oi_cache_dir(file_manager, dataset_id: str) -> str: + """Per-dataset cache directory for OpenMS-Insight component caches.""" + base = Path(file_manager.cache_path) / "oi_cache" / dataset_id + base.mkdir(parents=True, exist_ok=True) + return str(base) + + +def _load_polars(file_manager, dataset_id: str, name: str) -> pl.LazyFrame: + """Load a cached parquet result as a Polars LazyFrame.""" + res = file_manager.get_results(dataset_id, [name], use_polars=True) + data = res[name] + return data if isinstance(data, pl.LazyFrame) else pl.LazyFrame(data) + + +def _load_pandas(file_manager, dataset_id: str, name: str): + """Load a cached parquet result as a pandas DataFrame (via Path).""" + res = file_manager.get_results(dataset_id, [name]) + p = res[name] + import pandas as pd + + return pd.read_parquet(p) if not isinstance(p, pd.DataFrame) else p + + +# -------------------------------------------------------------------------- +# Component builders. Each returns a zero-arg callable that, when invoked, +# renders the component with the given StateManager + Streamlit key. +# -------------------------------------------------------------------------- +def build_component( + comp_name: str, + dataset_id: str, + file_manager, + state_manager, + key_prefix: str, + has_sequence: bool = False, +) -> Optional[Callable[[], Any]]: + """Build a render callable for one layout cell. + + Args: + comp_name: A value from ``FLASHDeconvLayoutManager.COMPONENT_NAMES``. + dataset_id: The selected experiment id. + file_manager: FLASHApp FileManager for cache access. + state_manager: The per-experiment OpenMS-Insight StateManager. + key_prefix: Unique Streamlit key prefix for this panel (keeps + side-by-side panels' component keys disjoint). + has_sequence: Whether a sequence was submitted (enables sequence_view / + internal_fragment_map). + + Returns: + A zero-arg callable that renders the component, or None if the + component name is unknown / unavailable. + """ + from openms_insight import ( + DensityPlot, + Heatmap, + InternalFragmentMap, + LinePlot, + Scatter3D, + SequenceView, + Table, + ) + from src.parse.long_format import ( + density_series_long, + explode_combined_spectrum_long, + explode_signal_peaks_long, + explode_spectrum_long, + ) + + cache_dir = _oi_cache_dir(file_manager, dataset_id) + cid = lambda name: f"{dataset_id}__{name}" # noqa: E731 + skey = lambda name: f"{key_prefix}_{name}" # noqa: E731 + + # ---- Heatmaps ---- + if comp_name in _HEATMAP_SPEC: + title, cache_name = _HEATMAP_SPEC[comp_name] + data = _load_polars(file_manager, dataset_id, cache_name) + hm = Heatmap( + cache_id=cid(comp_name), + data=data, + x_column="rt", + y_column="mass", + intensity_column="intensity", + title=title, + x_label="Retention time", + y_label="Monoisotopic mass", + zoom_identifier=f"{comp_name}_zoom", + cache_path=cache_dir, + ) + return lambda: hm(key=skey(comp_name), state_manager=state_manager) + + # ---- Scan table (master; click sets scanIndex) ---- + if comp_name == "scan_table": + data = _load_polars(file_manager, dataset_id, "scan_table") + tbl = Table( + cache_id=cid("scan_table"), + data=data, + interactivity={SCAN: "index"}, + index_field="index", + title="Scan Table", + cache_path=cache_dir, + ) + return lambda: tbl(key=skey("scan_table"), state_manager=state_manager) + + # ---- Mass table (filtered by scanIndex; click sets massIndex) ---- + if comp_name == "mass_table": + per_scan = _load_polars(file_manager, dataset_id, "mass_table") + # Explode arrays-per-scan into one row per mass with mass_id. + long = _explode_mass_table(per_scan) + tbl = Table( + cache_id=cid("mass_table"), + data=long, + filters={SCAN: "index"}, + interactivity={MASS: "mass_id"}, + index_field="mass_id", + title="Mass Table", + cache_path=cache_dir, + ) + return lambda: tbl(key=skey("mass_table"), state_manager=state_manager) + + # ---- Deconvolved spectrum (LinePlot, filtered by scanIndex) ---- + if comp_name == "deconv_spectrum": + per_scan = _load_polars(file_manager, dataset_id, "deconv_spectrum") + long = explode_spectrum_long(per_scan) + lp = LinePlot( + cache_id=cid("deconv_spectrum"), + data=long, + filters={SCAN: "index"}, + x_column="mass", + y_column="intensity", + title="Deconvolved Spectrum", + x_label="Monoisotopic Mass", + y_label="Intensity", + cache_path=cache_dir, + ) + return lambda: lp(key=skey("deconv_spectrum"), state_manager=state_manager) + + # ---- Annotated/raw spectrum (LinePlot over annotated peaks) ---- + if comp_name == "anno_spectrum": + per_scan = _load_polars(file_manager, dataset_id, "combined_spectrum") + _deconv_long, anno_long = explode_combined_spectrum_long(per_scan) + lp = LinePlot( + cache_id=cid("anno_spectrum"), + data=anno_long, + filters={SCAN: "index"}, + x_column="mass", + y_column="intensity", + title="Annotated Spectrum", + x_label="m/z", + y_label="Intensity", + cache_path=cache_dir, + ) + return lambda: lp(key=skey("anno_spectrum"), state_manager=state_manager) + + # ---- 3D S/N plot (Scatter3D; scanIndex required, massIndex optional) ---- + if comp_name == "3D_SN_plot": + per_scan = _load_polars(file_manager, dataset_id, "threedim_SN_plot") + long = explode_signal_peaks_long(per_scan) + s3 = Scatter3D( + cache_id=cid("3D_SN_plot"), + data=long, + filters={SCAN: "index"}, + optional_filters={MASS: "mass_id"}, + title="Precursor Signals", + cache_path=cache_dir, + ) + return lambda: s3(key=skey("3D_SN_plot"), state_manager=state_manager) + + # ---- FDR / score-distribution plot (DensityPlot, precomputed curves) ---- + if comp_name == "fdr_plot": + target = _load_pandas_pl(file_manager, dataset_id, "density_target") + decoy = _load_pandas_pl(file_manager, dataset_id, "density_decoy") + long = density_series_long(target, decoy) + dp = DensityPlot( + cache_id=cid("fdr_plot"), + data=long.lazy(), + precomputed=True, + series_column="series", + series_config={ + "Target": {"label": "Target QScores", "color": "green"}, + "Decoy": {"label": "Decoy QScores", "color": "red"}, + }, + title="Score Distribution", + x_label="QScore", + cache_path=cache_dir, + ) + return lambda: dp(key=skey("fdr_plot"), state_manager=state_manager) + + # ---- Sequence view (only when a sequence is provided) ---- + if comp_name == "sequence_view" and has_sequence: + builder = _build_sequence_view( + dataset_id, file_manager, cache_dir, cid, skey, state_manager + ) + if builder is not None: + return builder + + # ---- Internal fragment map (only when a sequence is provided) ---- + if comp_name == "internal_fragment_map" and has_sequence: + builder = _build_internal_fragment_map( + dataset_id, file_manager, cache_dir, cid, skey, state_manager + ) + if builder is not None: + return builder + + logger.warning("Unknown / unavailable component: %s", comp_name) + return None + + +def _load_pandas_pl(file_manager, dataset_id: str, name: str): + """Load a parquet cache as a Polars DataFrame (eager) for density curves.""" + import pandas as pd + + res = file_manager.get_results(dataset_id, [name]) + p = res[name] + pdf = pd.read_parquet(p) if not isinstance(p, pd.DataFrame) else p + return pl.from_pandas(pdf) + + +def _explode_mass_table(per_scan: pl.LazyFrame) -> pl.LazyFrame: + """Explode the arrays-per-scan mass_table into one row per mass. + + Columns: index (scan), mass_id, MonoMass, SumIntensity, charges/isotopes/ + scores — all the per-mass scalars the original Mass Table showed. + """ + list_cols = [ + "MonoMass", + "SumIntensity", + "MinCharges", + "MaxCharges", + "MinIsotopes", + "MaxIsotopes", + "CosineScore", + "SNR", + "QScore", + ] + schema = per_scan.collect_schema().names() + present = [c for c in list_cols if c in schema] + lf = per_scan.select(["index", *present]).explode(present) + lf = lf.with_columns(pl.int_range(pl.len()).over("index").alias("mass_id")) + return lf.sort(["index", "mass_id"]) + + +def _build_sequence_view( + dataset_id, file_manager, cache_dir, cid, skey, state_manager +): + """Build a SequenceView for FLASHDeconv from the submitted sequence. + + FLASHDeconv stores a single submitted sequence (not per-proteoform); the + sequence view is filtered by scanIndex only to show the deconvolved peaks + of the selected scan against that fixed sequence. + """ + from openms_insight import SequenceView + from src.parse.long_format import explode_spectrum_long + + if not file_manager.result_exists("sequence", "sequence"): + return None + seq = file_manager.get_results("sequence", "sequence")["sequence"] + sequence_str = seq.get("input_sequence") + if not sequence_str: + return None + + # Deconvolved peaks (neutral masses) for matching, filtered by scan. + per_scan = _load_polars(file_manager, dataset_id, "deconv_spectrum") + peaks_long = ( + explode_spectrum_long(per_scan) + .rename({"mass": "mass", "index": "index"}) + .with_columns(pl.int_range(pl.len()).over("index").alias("peak_id")) + ) + + sv = SequenceView( + cache_id=cid("sequence_view"), + sequence_data=sequence_str, # static sequence string + peaks_data=peaks_long, + filters={SCAN: "index"}, + deconvolved=True, + fixed_modifications=_fixed_mods_from_sequence(seq), + cache_path=cache_dir, + ) + return lambda: sv(key=skey("sequence_view"), state_manager=state_manager) + + +def _build_internal_fragment_map( + dataset_id, file_manager, cache_dir, cid, skey, state_manager +): + """Build an InternalFragmentMap from the submitted sequence + scan peaks.""" + from openms_insight import InternalFragmentMap + from src.parse.long_format import explode_spectrum_long + + if not file_manager.result_exists("sequence", "sequence"): + return None + seq = file_manager.get_results("sequence", "sequence")["sequence"] + sequence_str = seq.get("input_sequence") + if not sequence_str: + return None + + per_scan = _load_polars(file_manager, dataset_id, "deconv_spectrum") + peaks_long = explode_spectrum_long(per_scan) + + ifm = InternalFragmentMap( + cache_id=cid("internal_fragment_map"), + sequence_data=sequence_str, + peaks_data=peaks_long, + mass_column="mass", + filters={SCAN: "index"}, + title="Internal Fragment Map", + cache_path=cache_dir, + ) + return lambda: ifm(key=skey("internal_fragment_map"), state_manager=state_manager) + + +def _fixed_mods_from_sequence(seq: Dict[str, Any]) -> List[str]: + """Derive fixed-mod residue letters from the stored sequence settings.""" + mods = [] + if seq.get("fixed_mod_cysteine"): + mods.append("C") + if seq.get("fixed_mod_methionine"): + mods.append("M") + return mods + + +def render_experiment( + dataset_id: str, + layout_rows: List[List[str]], + file_manager, + panel_key: str, + has_sequence: bool = False, +) -> None: + """Render one experiment panel's [row][col] grid with OpenMS-Insight. + + Args: + dataset_id: The selected experiment id. + layout_rows: The experiment's layout — a list of rows, each a list of + ``COMPONENT_NAMES`` strings (≤3 columns per row). + file_manager: FLASHApp FileManager for cache access. + panel_key: Unique key for this panel; also seeds the StateManager + ``session_key`` so SIDE-BY-SIDE PANELS HAVE INDEPENDENT SELECTIONS + (a distinct session_key per experiment prevents cross-contamination). + has_sequence: Whether a sequence was submitted (enables sequence_view / + internal_fragment_map). + """ + import streamlit as st + from openms_insight import StateManager + + # Per-experiment StateManager — distinct session_key keeps selections from + # leaking across side-by-side panels (Risks/watch-items in the plan). + state_manager = StateManager(session_key=f"oi_state_{panel_key}") + + for row_index, row in enumerate(layout_rows): + if not row: + continue + cols = st.columns(len(row)) + for col_index, comp_name in enumerate(row): + with cols[col_index]: + try: + render = build_component( + comp_name, + dataset_id, + file_manager, + state_manager, + key_prefix=f"{panel_key}_{row_index}_{col_index}", + has_sequence=has_sequence, + ) + if render is not None: + render() + else: + st.warning(f"Component unavailable: {comp_name}") + except Exception as exc: # pragma: no cover - defensive UI guard + logger.exception("Failed to render %s", comp_name) + st.error(f"Error rendering {comp_name}: {exc}") diff --git a/tests/test_deconv_viewer_realdata.py b/tests/test_deconv_viewer_realdata.py new file mode 100644 index 0000000..86c2408 --- /dev/null +++ b/tests/test_deconv_viewer_realdata.py @@ -0,0 +1,151 @@ +"""End-to-end verification of the Phase-1 FLASHDeconv OpenMS-Insight viewer +against the bundled real example workspace. + +These construct every FLASHDeconv component through ``build_component`` from the +actual ``example_fd`` parquet caches and exercise the cross-link filtering +(scan-table click → spectra / mass-table / 3D), verifying the index→value +migration reproduces the original row counts exactly. + +Everything except the final Streamlit ``render()`` call is covered (rendering +needs a browser); the data path — load → long-format explode → component +filter — is fully verified. Skipped automatically when example data or +openms_insight is absent. +""" + +import os +import sys + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +pl = pytest.importorskip("polars") +pytest.importorskip("openms_insight") + +from pathlib import Path # noqa: E402 + +_FD = Path( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +) / "example-data" / "workspaces" / "default" / "flashdeconv" / "cache" / "files" / "example_fd" + +pytestmark = pytest.mark.skipif( + not _FD.is_dir(), reason="example workspace data not available" +) + + +class _FakeFM: + """Serves the real example_fd parquet files like FileManager.get_results.""" + + def __init__(self, cache_path): + self.cache_path = str(cache_path) + + def get_results( + self, dataset_id, names, use_polars=False, use_pyarrow=False, partial=False + ): + out = {} + for n in names: + p = _FD / f"{n}.pq" + out[n] = pl.scan_parquet(p) if use_polars else p + return out + + def result_exists(self, a, b): + return False # no submitted sequence in this fixture + + +ALL_DECONV_COMPONENTS = [ + "ms1_deconv_heat_map", + "ms2_deconv_heat_map", + "ms1_raw_heatmap", + "ms2_raw_heatmap", + "scan_table", + "mass_table", + "deconv_spectrum", + "anno_spectrum", + "3D_SN_plot", + "fdr_plot", +] + + +@pytest.fixture +def fake_fm(tmp_path): + return _FakeFM(tmp_path) + + +@pytest.mark.parametrize("comp", ALL_DECONV_COMPONENTS) +def test_every_component_builds_from_real_cache(comp, fake_fm, monkeypatch): + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from src.render_oi.deconv_viewer import build_component + + builder = build_component(comp, "example_fd", fake_fm, None, key_prefix="p0") + assert callable(builder), f"{comp} did not produce a render callable" + + +def test_scan_click_cross_link_row_counts(fake_fm, monkeypatch, tmp_path): + """Scan-table click filters spectra / mass-table / 3D to exact row counts.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + + from openms_insight import LinePlot, Scatter3D, Table + + from src.parse.long_format import ( + explode_signal_peaks_long, + explode_spectrum_long, + ) + from src.render_oi.deconv_viewer import _explode_mass_table + + mt_long = _explode_mass_table(pl.scan_parquet(_FD / "mass_table.pq")).collect() + busiest = mt_long.group_by("index").len().sort("len", descending=True) + scan = int(busiest["index"][0]) + n_mass = int(busiest["len"][0]) + + # Mass table filtered by scanIndex + mass_tbl = Table( + cache_id="x_mass", + data=_explode_mass_table(pl.scan_parquet(_FD / "mass_table.pq")), + filters={"scanIndex": "index"}, + interactivity={"massIndex": "mass_id"}, + index_field="mass_id", + cache_path=str(tmp_path), + ) + mvd = mass_tbl._prepare_vue_data({"scanIndex": scan}) + assert mvd["_pagination"]["total_rows"] == n_mass + + # Deconvolved spectrum filtered by scanIndex + dec = explode_spectrum_long(pl.scan_parquet(_FD / "deconv_spectrum.pq")) + lp = LinePlot( + cache_id="x_dec", + data=dec, + filters={"scanIndex": "index"}, + x_column="mass", + y_column="intensity", + cache_path=str(tmp_path), + ) + exp_peaks = ( + explode_spectrum_long(pl.scan_parquet(_FD / "deconv_spectrum.pq")) + .collect() + .filter(pl.col("index") == scan) + .height + ) + assert len(lp._prepare_vue_data({"scanIndex": scan})["plotData"]) == exp_peaks + + # 3D S/N: all masses for scan, isolate mass 0 + sn = explode_signal_peaks_long(pl.scan_parquet(_FD / "threedim_SN_plot.pq")) + snc = sn.collect() + s3 = Scatter3D( + cache_id="x_3d", + data=sn, + filters={"scanIndex": "index"}, + optional_filters={"massIndex": "mass_id"}, + cache_path=str(tmp_path), + ) + all_peaks = len(s3._prepare_vue_data({"scanIndex": scan})["scatter3dData"]) + assert all_peaks == snc.filter(pl.col("index") == scan).height + mass0 = len( + s3._prepare_vue_data({"scanIndex": scan, "massIndex": 0})["scatter3dData"] + ) + assert mass0 == snc.filter( + (pl.col("index") == scan) & (pl.col("mass_id") == 0) + ).height From d8513db0b8b8267a860e10edcc2385029bcf66c3 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 1 Jun 2026 10:09:31 +0000 Subject: [PATCH 05/18] Add SessionStart hook for reproducible web-session setup Claude Code on the web starts from a fresh container with no Python deps and an unbuilt Vue bundle. This hook reproduces the full bootstrap so tests and the OpenMS-Insight migration work out of the box: - pip install FLASHApp requirements + pytest (user-site) - npm install + build the OpenMS-Insight Vue bundle, mirror dist into the package path the wheel force-includes, then editable-install the local sibling repo - persist PYTHONPATH (repo root for src.*) and PATH (~/.local/bin) Critically, the hook strips the 'openms-insight @ git+...' line from requirements.txt before installing: building OI from the git URL fails (no prebuilt bundle in a fresh clone) AND aborts the whole requirements install, which had been silently dropping scipy/pyopenms. OI is instead installed from the local sibling with its freshly-built bundle. Web-only (CLAUDE_CODE_REMOTE), idempotent, synchronous. Validated: hook exits 0; all core deps + all 8 OI components import via user-site; 11 FLASHApp viewer + 29 OI tests pass through the hook environment (no venv). https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- .claude/hooks/session-start.sh | 77 ++++++++++++++++++++++++++++++++++ .claude/settings.json | 14 +++++++ 2 files changed, 91 insertions(+) create mode 100755 .claude/hooks/session-start.sh create mode 100644 .claude/settings.json diff --git a/.claude/hooks/session-start.sh b/.claude/hooks/session-start.sh new file mode 100755 index 0000000..f1131f8 --- /dev/null +++ b/.claude/hooks/session-start.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# SessionStart hook: install FLASHApp deps + the OpenMS-Insight sibling repo so +# tests and the migration work in Claude Code on the web. +# +# Layout assumed (Claude Code on the web clones siblings under the same parent): +# /FLASHApp (this repo, $CLAUDE_PROJECT_DIR) +# /OpenMS-Insight (visualization library dependency) +# +# The OpenMS-Insight wheel force-includes its built js-component/dist, so the +# Vue bundle must be built BEFORE the editable install. We handle that ordering +# and degrade gracefully if the sibling repo isn't present. +set -euo pipefail + +# Only run in the remote (web) environment; local setups are managed by the user. +if [ "${CLAUDE_CODE_REMOTE:-}" != "true" ]; then + exit 0 +fi + +PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(pwd)}" +PARENT_DIR="$(dirname "$PROJECT_DIR")" +OI_DIR="$PARENT_DIR/OpenMS-Insight" + +echo "[session-start] FLASHApp setup starting" + +# --- 1. Python dependencies for FLASHApp ----------------------------------- +# requirements.txt pins the app deps (streamlit, polars, pyopenms, scipy, ...). +# It also lists `openms-insight @ git+...`, but building that from the git URL +# fails (the wheel force-includes a js bundle that isn't built in a fresh +# clone) AND aborts the whole install. So strip that line here and install the +# local sibling separately in step 2. +REQ_TMP="$(mktemp)" +grep -ivE '^openms-insight[[:space:]]*@' "$PROJECT_DIR/requirements.txt" > "$REQ_TMP" || cp "$PROJECT_DIR/requirements.txt" "$REQ_TMP" +python3 -m pip install --user --quiet -r "$REQ_TMP" || { + echo "[session-start] WARNING: requirements.txt install hit an error; continuing" +} +rm -f "$REQ_TMP" + +# pytest for the test suite (not in the app requirements). +python3 -m pip install --user --quiet pytest pytest-cov + +# --- 2. OpenMS-Insight sibling (build Vue bundle, then editable install) ---- +if [ -d "$OI_DIR" ]; then + echo "[session-start] Building OpenMS-Insight Vue bundle" + # npm install is cache-friendly and idempotent (prefer over npm ci so the + # cached container state is reused across sessions). + ( cd "$OI_DIR/js-component" && npm install --no-audit --no-fund --silent && npm run build ) || { + echo "[session-start] WARNING: Vue bundle build failed; OI install may be degraded" + } + + # The wheel force-includes openms_insight/js-component/dist. Vite builds to + # js-component/dist (repo root), so mirror it into the package path the build + # backend expects, making the editable install succeed. + if [ -d "$OI_DIR/js-component/dist" ]; then + mkdir -p "$OI_DIR/openms_insight/js-component" + rm -rf "$OI_DIR/openms_insight/js-component/dist" + cp -r "$OI_DIR/js-component/dist" "$OI_DIR/openms_insight/js-component/dist" + fi + + echo "[session-start] Installing OpenMS-Insight (editable)" + python3 -m pip install --user --quiet -e "$OI_DIR" || { + echo "[session-start] WARNING: editable OI install failed; falling back to PYTHONPATH" + # Fallback: make it importable directly from source. + echo "export PYTHONPATH=\"$OI_DIR:\${PYTHONPATH:-}\"" >> "${CLAUDE_ENV_FILE:-/dev/null}" + } +else + echo "[session-start] OpenMS-Insight not found at $OI_DIR (skipping; clone it as a sibling for the migration)" +fi + +# --- 3. Persist environment for the session -------------------------------- +# FLASHApp imports as top-level `src.*`; ensure the repo root is importable. +if [ -n "${CLAUDE_ENV_FILE:-}" ]; then + echo "export PYTHONPATH=\"$PROJECT_DIR:\${PYTHONPATH:-}\"" >> "$CLAUDE_ENV_FILE" + # User-site installs land here; make sure they're on PATH for pytest etc. + echo "export PATH=\"\$HOME/.local/bin:\$PATH\"" >> "$CLAUDE_ENV_FILE" +fi + +echo "[session-start] FLASHApp setup complete" diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000..e06b033 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,14 @@ +{ + "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/session-start.sh" + } + ] + } + ] + } +} From 50b8716b625072e4475a52d51bf16ed3c0b36e1f Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 1 Jun 2026 12:17:01 +0000 Subject: [PATCH 06/18] Phase 2: FLASHTnT OpenMS-Insight viewer engine (flagged) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add src/render_oi/tnt_viewer.py — the OpenMS-Insight engine for FLASHTnT, composing protein Table, tag Table, sequence view, augmented combined spectrum, score-distribution DensityPlot, and heatmaps with a per-experiment StateManager. Reproduces FLASHTnT's proteoform→scan resolution: the protein table sets proteinIndex; render_experiment_tnt resolves it via build_proteoform_scan_map to deconvIndex BEFORE rendering downstream panels, so: - spectra / mass / sequence filter by {deconvIndex: index} - tag table filters by {proteinIndex: ProteinIndex} - sequence view filters by {proteinIndex: proteoform_index} with per-proteoform coverage coloring (coverage[]/maxCoverage from the cached sequence_data) The augmented spectrum uses LinePlot's overlay extension (deconv primary + annotated overlay). Handles both pickled (.pkl.gz) and parquet sequence_data caches; empty id-FDR densities degrade cleanly via the precomputed DensityPlot. FLASHTnTViewer.py: render_panel_tnt() dispatches to the new engine under FLASHAPP_USE_OPENMS_INSIGHT, else legacy render_grid (default OFF). Layout manager unchanged. Verified on the real antibody cache: all 7 components build; proteoform→scan resolves (protein 0→deconv 0); protein-0 click → 15 tags (exact); sequence view → 450 residues with coverage; combined spectrum → 564 primary + 65657 overlay peaks (exact). +10 tests; FLASHApp suite: 69 passed, 2 skipped. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- content/FLASHTnT/FLASHTnTViewer.py | 35 +++- src/render_oi/__init__.py | 8 +- src/render_oi/tnt_viewer.py | 312 +++++++++++++++++++++++++++++ tests/test_tnt_viewer_realdata.py | 185 +++++++++++++++++ 4 files changed, 533 insertions(+), 7 deletions(-) create mode 100644 src/render_oi/tnt_viewer.py create mode 100644 tests/test_tnt_viewer_realdata.py diff --git a/content/FLASHTnT/FLASHTnTViewer.py b/content/FLASHTnT/FLASHTnTViewer.py index e94392f..90f0d51 100644 --- a/content/FLASHTnT/FLASHTnTViewer.py +++ b/content/FLASHTnT/FLASHTnTViewer.py @@ -1,3 +1,5 @@ +import os + import streamlit as st from pathlib import Path @@ -6,15 +8,36 @@ from src.workflow.FileManager import FileManager from src.render.render import render_grid +# Migration flag (shared with FLASHDeconv): when truthy, render each experiment +# panel with the OpenMS-Insight engine instead of the legacy flash_viewer_grid. +USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "").lower() in ( + "1", "true", "yes", +) + DEFAULT_LAYOUT = [ - ['protein_table'], - ['sequence_view'], + ['protein_table'], + ['sequence_view'], ['tag_table'], ['combined_spectrum'] ] +def render_panel_tnt(dataset_id, layout_rows, file_manager, exp_key, grid_key=None): + """Render one FLASHTnT experiment panel with the selected engine.""" + if USE_OPENMS_INSIGHT: + from src.render_oi import render_experiment_tnt + + render_experiment_tnt( + dataset_id, layout_rows, file_manager, + panel_key=(grid_key or exp_key), + ) + elif grid_key is not None: + render_grid(dataset_id, layout_rows, file_manager, 'flashtnt', exp_key, grid_key) + else: + render_grid(dataset_id, layout_rows, file_manager, 'flashtnt', exp_key) + + def select_experiment(): # Map display name back to experiment ID st.session_state.selected_experiment0_tagger = display_name_to_id[st.session_state.selected_experiment_dropdown_tagger] @@ -81,7 +104,7 @@ def validate_selected_index(file_manager, selected_experiment): on_change=select_experiment ) if 'selected_experiment0_tagger' in st.session_state: - render_grid(st.session_state.selected_experiment0_tagger, layout[0], file_manager, 'flashtnt', 'selected_experiment0_tagger') + render_panel_tnt(st.session_state.selected_experiment0_tagger, layout[0], file_manager, 'selected_experiment0_tagger') with c2: st.selectbox( "choose experiment", display_names, @@ -90,7 +113,7 @@ def validate_selected_index(file_manager, selected_experiment): on_change=select_experiment ) if f"selected_experiment1_tagger" in st.session_state: - render_grid(st.session_state.selected_experiment1_tagger, layout[1], file_manager, 'flashtnt', 'selected_experiment1_tagger', 'flash_viewer_grid_1') + render_panel_tnt(st.session_state.selected_experiment1_tagger, layout[1], file_manager, 'selected_experiment1_tagger', 'flash_viewer_grid_1') else: @@ -103,7 +126,7 @@ def validate_selected_index(file_manager, selected_experiment): ) if 'selected_experiment0_tagger' in st.session_state: - render_grid(st.session_state.selected_experiment0_tagger, layout[0], file_manager, 'flashtnt', 'selected_experiment0_tagger') + render_panel_tnt(st.session_state.selected_experiment0_tagger, layout[0], file_manager, 'selected_experiment0_tagger') ### for multiple experiments on one view if len(layout) > 1: @@ -122,6 +145,6 @@ def validate_selected_index(file_manager, selected_experiment): # if #experiment input files are less than #layouts, all the pre-selection will be the first experiment if f"selected_experiment{exp_index}_tagger" in st.session_state: - render_grid(st.session_state["selected_experiment%d_tagger" % exp_index], layout[exp_index], file_manager, 'flashtnt', f"selected_experiment{exp_index}_tagger", 'flash_viewer_grid_%d' % exp_index) + render_panel_tnt(st.session_state["selected_experiment%d_tagger" % exp_index], layout[exp_index], file_manager, f"selected_experiment{exp_index}_tagger", 'flash_viewer_grid_%d' % exp_index) save_params(params) \ No newline at end of file diff --git a/src/render_oi/__init__.py b/src/render_oi/__init__.py index 92455ee..deb5613 100644 --- a/src/render_oi/__init__.py +++ b/src/render_oi/__init__.py @@ -6,5 +6,11 @@ """ from .deconv_viewer import build_component, render_experiment +from .tnt_viewer import build_component_tnt, render_experiment_tnt -__all__ = ["build_component", "render_experiment"] +__all__ = [ + "build_component", + "render_experiment", + "build_component_tnt", + "render_experiment_tnt", +] diff --git a/src/render_oi/tnt_viewer.py b/src/render_oi/tnt_viewer.py new file mode 100644 index 0000000..76d3a4a --- /dev/null +++ b/src/render_oi/tnt_viewer.py @@ -0,0 +1,312 @@ +"""OpenMS-Insight rendering engine for FLASHTnT (migration Phase 2). + +FLASHTnT's master selection is ``proteinIndex`` (set by the protein table). The +challenge the old ``update.py`` solved: spectrum / mass / sequence panels are +keyed by the deconv ROW INDEX, while the tag table is keyed by ``proteinIndex`` +directly. We reproduce that here with a resolver: + + protein table click → proteinIndex + resolver (build_proteoform_scan_map) → deconvIndex (= scan's deconv row) + spectra / mass / sequence filter by {deconvIndex: index} + tag table filter by {proteinIndex: ProteinIndex} + sequence view filter by {proteinIndex: proteoform_index} + +The resolver runs after the protein table is rendered (its interactivity sets +``proteinIndex``) and stamps ``deconvIndex`` into the same StateManager so the +downstream value-filters reproduce the original PyArrow pushdown. +""" + +from __future__ import annotations + +import gzip +import logging +import pickle +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional + +import polars as pl + +from .deconv_viewer import _HEATMAP_SPEC, _load_pandas_pl, _load_polars, _oi_cache_dir + +logger = logging.getLogger(__name__) + +# State identifiers +PROTEIN = "proteinIndex" +DECONV = "deconvIndex" + + +def _load_pickle_gz(path: Path): + with gzip.open(path, "rb") as f: + return pickle.load(f) + + +def _build_proteoform_scan_map(file_manager, dataset_id: str) -> Dict[int, Dict[str, int]]: + """proteinIndex → {scan, deconv_index} using the existing resolver.""" + from src.render.scan_resolution import build_proteoform_scan_map + + prot = _load_pandas_pl(file_manager, dataset_id, "protein_dfs").to_pandas() + scan = _load_pandas_pl(file_manager, dataset_id, "scan_table").to_pandas() + return build_proteoform_scan_map(prot[["index", "Scan"]], scan[["index", "Scan"]]) + + +def _sequence_table(file_manager, dataset_id: str) -> Optional[pl.LazyFrame]: + """Build a one-row-per-proteoform sequence frame for SequenceView. + + Reads the cached ``sequence_data`` ({pid: entry}); each entry already carries + sequence/coverage/maxCoverage/fragment masses. We emit a LazyFrame with a + ``proteoform_index`` filter column plus ``sequence`` (joined string), + ``precursor_charge``, and per-proteoform coverage arrays so OI's extended + SequenceView can shade residues. + """ + res = file_manager.get_results(dataset_id, ["sequence_data"], partial=True) + if "sequence_data" not in res: + return None + p = res["sequence_data"] + # Example caches store sequence_data as a pickled dict (.pkl.gz); newer ones + # as a parquet dataset. Handle the pickle path (what the bundled data uses). + if isinstance(p, Path) and p.suffix == ".gz": + data = _load_pickle_gz(p) + elif isinstance(p, dict): + data = p + else: + try: + from src.render.sequence_data_store import reconstruct_all + + data = reconstruct_all(p) + except Exception: + return None + + rows = [] + for pid in sorted(data): + entry = data[pid] + seq = entry.get("sequence") or [] + rows.append( + { + "proteoform_index": int(pid), + "sequence": "".join(seq) if isinstance(seq, list) else str(seq), + "precursor_charge": 1, + "coverage": [float(c) for c in (entry.get("coverage") or [])], + "max_coverage": float(entry.get("maxCoverage") or 0.0), + } + ) + if not rows: + return None + return pl.DataFrame(rows).lazy() + + +def build_component_tnt( + comp_name: str, + dataset_id: str, + file_manager, + state_manager, + key_prefix: str, +) -> Optional[Callable[[], Any]]: + """Build a render callable for one FLASHTnT layout cell.""" + from openms_insight import DensityPlot, Heatmap, LinePlot, SequenceView, Table + + from src.parse.long_format import ( + density_series_long, + explode_combined_spectrum_long, + ) + + cache_dir = _oi_cache_dir(file_manager, dataset_id) + cid = lambda name: f"{dataset_id}__tnt_{name}" # noqa: E731 + skey = lambda name: f"{key_prefix}_{name}" # noqa: E731 + + # ---- Protein table (master; click sets proteinIndex) ---- + if comp_name == "protein_table": + data = _load_polars(file_manager, dataset_id, "protein_dfs") + # Keep the informative columns the original Protein Table showed. + keep = [ + "index", + "accession", + "description", + "ProteoformMass", + "Coverage(%)", + "TagCount", + "ProteoformLevelQvalue", + ] + schema = data.collect_schema().names() + cols = [c for c in keep if c in schema] + tbl = Table( + cache_id=cid("protein_table"), + data=data.select(cols) if cols else data, + interactivity={PROTEIN: "index"}, + index_field="index", + title="Protein Table", + cache_path=cache_dir, + ) + return lambda: tbl(key=skey("protein_table"), state_manager=state_manager) + + # ---- Tag table (filtered by proteinIndex) ---- + if comp_name == "tag_table": + data = _load_polars(file_manager, dataset_id, "tag_dfs") + keep = [ + "TagIndex", + "TagSequence", + "StartPos", + "EndPos", + "Length", + "Score", + "DeltaMass", + "ProteinIndex", + ] + schema = data.collect_schema().names() + cols = [c for c in keep if c in schema] + tbl = Table( + cache_id=cid("tag_table"), + data=data.select(cols) if cols else data, + filters={PROTEIN: "ProteinIndex"}, + index_field="TagIndex", + title="Tag Table", + cache_path=cache_dir, + ) + return lambda: tbl(key=skey("tag_table"), state_manager=state_manager) + + # ---- Combined / augmented spectrum (deconv primary + annotated overlay) ---- + if comp_name == "combined_spectrum": + per_scan = _load_polars(file_manager, dataset_id, "combined_spectrum") + deconv_long, anno_long = explode_combined_spectrum_long(per_scan) + lp = LinePlot( + cache_id=cid("combined_spectrum"), + data=deconv_long, + overlay_data=anno_long, + filters={DECONV: "index"}, + x_column="mass", + y_column="intensity", + overlay_x_column="mass", + overlay_y_column="intensity", + overlay_name="Annotated", + title="Augmented Deconvolved Spectrum", + x_label="Monoisotopic Mass", + y_label="Intensity", + cache_path=cache_dir, + ) + return lambda: lp(key=skey("combined_spectrum"), state_manager=state_manager) + + # ---- Sequence view (filtered by proteinIndex; coverage coloring) ---- + if comp_name == "sequence_view": + seq_tbl = _sequence_table(file_manager, dataset_id) + if seq_tbl is None: + return None + settings = _tnt_settings(file_manager, dataset_id) + sv = SequenceView( + cache_id=cid("sequence_view"), + sequence_data=seq_tbl, + filters={PROTEIN: "proteoform_index"}, + deconvolved=True, + coverage_column="coverage", + max_coverage_column="max_coverage", + annotation_config={ + "ion_types": settings.get("ion_types", ["b", "y"]), + "tolerance": settings.get("tolerance", 10.0), + "tolerance_ppm": True, + }, + title="Sequence View", + cache_path=cache_dir, + ) + return lambda: sv(key=skey("sequence_view"), state_manager=state_manager) + + # ---- Identification FDR / score distribution (DensityPlot, precomputed) ---- + if comp_name == "id_fdr_plot": + target = _load_pandas_pl(file_manager, dataset_id, "density_id_target") + decoy = _load_pandas_pl(file_manager, dataset_id, "density_id_decoy") + long = density_series_long(target, decoy) + dp = DensityPlot( + cache_id=cid("id_fdr_plot"), + data=long.lazy(), + precomputed=True, + series_column="series", + series_config={ + "Target": {"label": "Target", "color": "green"}, + "Decoy": {"label": "Decoy", "color": "red"}, + }, + title="Score Distribution", + x_label="Proteoform-level q-value", + cache_path=cache_dir, + ) + return lambda: dp(key=skey("id_fdr_plot"), state_manager=state_manager) + + # ---- Heatmaps (reuse Deconv spec) ---- + if comp_name in _HEATMAP_SPEC: + title, cache_name = _HEATMAP_SPEC[comp_name] + data = _load_polars(file_manager, dataset_id, cache_name) + hm = Heatmap( + cache_id=cid(comp_name), + data=data, + x_column="rt", + y_column="mass", + intensity_column="intensity", + title=title, + x_label="Retention time", + y_label="Monoisotopic mass", + zoom_identifier=f"tnt_{comp_name}_zoom", + cache_path=cache_dir, + ) + return lambda: hm(key=skey(comp_name), state_manager=state_manager) + + logger.warning("Unknown / unavailable TnT component: %s", comp_name) + return None + + +def _tnt_settings(file_manager, dataset_id: str) -> Dict[str, Any]: + """Load the cached FLASHTnT settings ({tolerance, ion_types}).""" + res = file_manager.get_results(dataset_id, ["settings"], partial=True) + s = res.get("settings") + if isinstance(s, Path) and s.suffix == ".gz": + try: + return _load_pickle_gz(s) + except Exception: + return {} + return s if isinstance(s, dict) else {} + + +def render_experiment_tnt( + dataset_id: str, + layout_rows: List[List[str]], + file_manager, + panel_key: str, +) -> None: + """Render one FLASHTnT experiment panel with proteoform→scan resolution. + + A per-experiment StateManager keeps side-by-side panels isolated. After the + protein table sets ``proteinIndex``, we resolve it to ``deconvIndex`` so the + spectrum / mass / sequence panels filter by the deconv row index — exactly + the proteoform→scan resolution the original update.py performed. + """ + import streamlit as st + from openms_insight import StateManager + + state_manager = StateManager(session_key=f"oi_tnt_state_{panel_key}") + + # Resolve proteinIndex → deconvIndex BEFORE rendering downstream panels so + # the spectrum/mass/sequence filters see the right scan on this run. + scan_map = _build_proteoform_scan_map(file_manager, dataset_id) + protein_index = state_manager.get_selection(PROTEIN) + if protein_index is not None: + entry = scan_map.get(int(protein_index)) + deconv_index = entry["deconv_index"] if entry else None + if state_manager.get_selection(DECONV) != deconv_index: + state_manager.set_selection(DECONV, deconv_index) + + for row_index, row in enumerate(layout_rows): + if not row: + continue + cols = st.columns(len(row)) + for col_index, comp_name in enumerate(row): + with cols[col_index]: + try: + render = build_component_tnt( + comp_name, + dataset_id, + file_manager, + state_manager, + key_prefix=f"{panel_key}_{row_index}_{col_index}", + ) + if render is not None: + render() + else: + st.warning(f"Component unavailable: {comp_name}") + except Exception as exc: # pragma: no cover - defensive UI guard + logger.exception("Failed to render %s", comp_name) + st.error(f"Error rendering {comp_name}: {exc}") diff --git a/tests/test_tnt_viewer_realdata.py b/tests/test_tnt_viewer_realdata.py new file mode 100644 index 0000000..32430a3 --- /dev/null +++ b/tests/test_tnt_viewer_realdata.py @@ -0,0 +1,185 @@ +"""End-to-end verification of the Phase-2 FLASHTnT OpenMS-Insight viewer +against the bundled real antibody workspace. + +Builds every FLASHTnT component through ``build_component_tnt`` from the real +caches and verifies the proteoform→scan resolution plus the cross-link filters +(protein click → tag table / sequence view / combined spectrum). + +Skipped automatically when example data or openms_insight is absent. +""" + +import gzip +import os +import pickle +import sys + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +pl = pytest.importorskip("polars") +pytest.importorskip("openms_insight") + +from pathlib import Path # noqa: E402 + +_TNT = ( + Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + / "example-data" / "workspaces" / "default" / "flashtnt" / "cache" / "files" +) + + +def _first_dataset(): + if not _TNT.is_dir(): + return None + for d in sorted(_TNT.iterdir()): + if (d / "protein_dfs.pq").exists(): + return d + return None + + +_DS = _first_dataset() + +pytestmark = pytest.mark.skipif( + _DS is None, reason="example FLASHTnT workspace data not available" +) + + +class _FakeFM: + def __init__(self, cache_path): + self.cache_path = str(cache_path) + + def get_results( + self, dataset_id, names, use_polars=False, use_pyarrow=False, partial=False + ): + out = {} + for n in names: + pq = _DS / f"{n}.pq" + pkl = _DS / f"{n}.pkl.gz" + if pq.exists(): + out[n] = pl.scan_parquet(pq) if use_polars else pq + elif pkl.exists(): + out[n] = pkl + elif not partial: + raise KeyError(n) + return out + + def result_exists(self, a, b): + return True + + +ALL_TNT_COMPONENTS = [ + "protein_table", + "tag_table", + "combined_spectrum", + "sequence_view", + "id_fdr_plot", + "ms1_raw_heatmap", + "ms1_deconv_heat_map", +] + + +@pytest.fixture +def fake_fm(tmp_path): + return _FakeFM(tmp_path) + + +@pytest.mark.parametrize("comp", ALL_TNT_COMPONENTS) +def test_every_tnt_component_builds(comp, fake_fm, monkeypatch): + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from src.render_oi.tnt_viewer import build_component_tnt + + builder = build_component_tnt(comp, "ds", fake_fm, None, key_prefix="p0") + assert callable(builder), f"{comp} did not produce a render callable" + + +def test_proteoform_scan_resolution(fake_fm, monkeypatch): + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from src.render_oi.tnt_viewer import _build_proteoform_scan_map + + scan_map = _build_proteoform_scan_map(fake_fm, "ds") + # Every proteoform that resolves carries both scan and deconv_index. + assert scan_map, "scan map should not be empty" + for entry in scan_map.values(): + assert "scan" in entry and "deconv_index" in entry + + +def test_protein_click_cross_links(fake_fm, monkeypatch, tmp_path): + """Protein click → tag table filters; sequence view resolves coverage.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + + from openms_insight import SequenceView, Table + + from src.render_oi.tnt_viewer import _build_proteoform_scan_map, _sequence_table + + scan_map = _build_proteoform_scan_map(fake_fm, "ds") + pid = sorted(scan_map.keys())[0] + + # Tag table filtered by proteinIndex + tags = pl.scan_parquet(_DS / "tag_dfs.pq") + cols = [ + c + for c in ["TagIndex", "TagSequence", "ProteinIndex", "StartPos", "EndPos"] + if c in tags.collect_schema().names() + ] + tt = Table( + cache_id="tt_x", + data=tags.select(cols), + filters={"proteinIndex": "ProteinIndex"}, + index_field="TagIndex", + cache_path=str(tmp_path), + ) + got = tt._prepare_vue_data({"proteinIndex": pid})["_pagination"]["total_rows"] + expected = tags.collect().filter(pl.col("ProteinIndex") == pid).height + assert got == expected + + # Sequence view filtered by proteinIndex carries coverage + seq_tbl = _sequence_table(fake_fm, "ds") + if seq_tbl is not None: + sv = SequenceView( + cache_id="sv_x", + sequence_data=seq_tbl, + filters={"proteinIndex": "proteoform_index"}, + deconvolved=True, + coverage_column="coverage", + max_coverage_column="max_coverage", + cache_path=str(tmp_path), + ) + sd = sv._prepare_vue_data({"proteinIndex": pid})["sequenceData"] + assert len(sd["sequence"]) > 0 + + +def test_combined_spectrum_overlay(fake_fm, monkeypatch, tmp_path): + """Augmented spectrum: deconv primary + annotated overlay, both present.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + + from openms_insight import LinePlot + + from src.parse.long_format import explode_combined_spectrum_long + + per_scan = pl.scan_parquet(_DS / "combined_spectrum.pq") + deconv_long, anno_long = explode_combined_spectrum_long(per_scan) + dl, al = deconv_long.collect(), anno_long.collect() + + lp = LinePlot( + cache_id="cs_x", + data=deconv_long, + overlay_data=anno_long, + filters={"deconvIndex": "index"}, + x_column="mass", + y_column="intensity", + overlay_x_column="mass", + overlay_y_column="intensity", + cache_path=str(tmp_path), + ) + vd = lp._prepare_vue_data({"deconvIndex": 0}) + assert len(vd["plotData"]) == dl.filter(pl.col("index") == 0).height + assert len(vd.get("plotDataOverlay", [])) == al.filter(pl.col("index") == 0).height + assert lp._get_component_args().get("has_overlay") is True From e6a5a2ac3304f43055d8a736b2b1c348064d7324 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 1 Jun 2026 12:20:22 +0000 Subject: [PATCH 07/18] Phase 3: FLASHQuant OpenMS-Insight viewer engine (flagged) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add src/render_oi/quant_viewer.py — the OpenMS-Insight engine for FLASHQuant's single feature-group view: a feature-group selector Table (click sets featureGroup) above a FeatureView filtered by {featureGroup: feature_group}. Add explode_quant_traces_long() to src/parse/long_format.py: FLASHQuant stores per-feature-group parallel arrays (Charges/IsotopeIndices/CentroidMzs as scalars per trace; RTs/MZs/Intensities as comma-joined point strings per trace). The adapter zips the per-trace lists, splits/explodes the point strings, and emits FeatureView's long format (one row per trace point: feature_group, charge, isotope, centroid_mz, rt, mz, intensity). FLASHQuantViewer.py dispatches to render_experiment_quant under FLASHAPP_USE_OPENMS_INSIGHT, else legacy render_grid (default OFF). Verified on the real quant cache: 1437 feature groups → 722,645 trace points; group 0/5/100 explode to 1384/239/208 points exactly; FeatureView filters to those exact counts; selector table shows all 1437 groups. +3 tests; FLASHApp suite: 72 passed, 2 skipped. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- content/FLASHQuant/FLASHQuantViewer.py | 23 ++++- src/parse/long_format.py | 89 ++++++++++++++++++ src/render_oi/__init__.py | 3 + src/render_oi/quant_viewer.py | 123 +++++++++++++++++++++++++ tests/test_quant_viewer_realdata.py | 106 +++++++++++++++++++++ 5 files changed, 340 insertions(+), 4 deletions(-) create mode 100644 src/render_oi/quant_viewer.py create mode 100644 tests/test_quant_viewer_realdata.py diff --git a/content/FLASHQuant/FLASHQuantViewer.py b/content/FLASHQuant/FLASHQuantViewer.py index 05077e9..341401d 100644 --- a/content/FLASHQuant/FLASHQuantViewer.py +++ b/content/FLASHQuant/FLASHQuantViewer.py @@ -1,3 +1,5 @@ +import os + import streamlit as st from pathlib import Path @@ -7,6 +9,11 @@ # from src.render.components import flash_viewer_grid_component, FlashViewerComponent, FLASHQuant from src.render.render import render_grid +# Migration flag (shared across workflows): render with OpenMS-Insight engine. +USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "").lower() in ( + "1", "true", "yes", +) + # page initialization params = page_setup() @@ -33,10 +40,18 @@ st.selectbox("choose experiment", results, key="selected_experiment0_quant") selected_exp0 = st.session_state.selected_experiment0_quant -render_grid( - st.session_state.selected_experiment0_quant, [['quant_visualization']], - file_manager, 'flashquant', 'selected_experiment0_quant' -) +if USE_OPENMS_INSIGHT: + from src.render_oi import render_experiment_quant + + render_experiment_quant( + st.session_state.selected_experiment0_quant, file_manager, + panel_key='selected_experiment0_quant', + ) +else: + render_grid( + st.session_state.selected_experiment0_quant, [['quant_visualization']], + file_manager, 'flashquant', 'selected_experiment0_quant' + ) # # Get data # quant_df = file_manager.get_results(selected_exp0, 'quant_dfs')['quant_dfs'] diff --git a/src/parse/long_format.py b/src/parse/long_format.py index f954788..e85f85e 100644 --- a/src/parse/long_format.py +++ b/src/parse/long_format.py @@ -205,6 +205,95 @@ def _one_kind(column: str, kind: str) -> pl.LazyFrame: return pl.concat([signal_lf, noise_lf]).sort(["index", "mass_id", "kind"]) +def explode_quant_traces_long( + quant: pl.LazyFrame, + *, + feature_group_column: str = "FeatureGroupIndex", + charges_column: str = "Charges", + isotopes_column: str = "IsotopeIndices", + centroid_mz_column: str = "CentroidMzs", + rts_column: str = "RTs", + mzs_column: str = "MZs", + intensities_column: str = "Intensities", +) -> pl.LazyFrame: + """Explode FLASHQuant per-feature-group arrays into long trace-point format. + + FLASHQuant stores, per feature group, parallel per-trace lists: + - ``Charges`` / ``IsotopeIndices`` / ``CentroidMzs`` : one scalar per trace + - ``RTs`` / ``MZs`` / ``Intensities`` : one comma-joined + STRING per trace (the points of that trace) + + OpenMS-Insight's ``FeatureView`` consumes long format — one row per trace + POINT. This: + + 1. zips the parallel per-trace lists and explodes to one row per trace + (carrying ``feature_group``, ``charge``, ``isotope``, ``centroid_mz``), + 2. splits each ``RTs``/``MZs``/``Intensities`` string and explodes to one + row per point, yielding columns: + + feature_group : int (filter target for ``featureGroup``) + charge : int (one 3D trace per distinct charge) + isotope : int (kept for hover / isotope grouping) + centroid_mz : float + rt : float (y-axis) + mz : float (x-axis) + intensity : float (z-axis) + + Empty / null trace strings contribute no points. + """ + # Step 1: one row per trace. Zip the parallel per-trace lists by exploding + # them together (they share the same length per feature group). + per_trace = quant.select( + [ + pl.col(feature_group_column).alias("feature_group"), + pl.col(charges_column).alias("charge"), + pl.col(isotopes_column).alias("isotope"), + pl.col(centroid_mz_column).alias("centroid_mz"), + pl.col(rts_column).alias("_rts"), + pl.col(mzs_column).alias("_mzs"), + pl.col(intensities_column).alias("_ints"), + ] + ).explode(["charge", "isotope", "centroid_mz", "_rts", "_mzs", "_ints"]) + + # Step 2: split the comma-joined point strings into lists, then explode + # the three parallel point lists together → one row per point. + per_point = ( + per_trace.with_columns( + [ + pl.col("_rts").str.split(",").alias("rt"), + pl.col("_mzs").str.split(",").alias("mz"), + pl.col("_ints").str.split(",").alias("intensity"), + ] + ) + .drop(["_rts", "_mzs", "_ints"]) + .explode(["rt", "mz", "intensity"]) + .with_columns( + [ + pl.col("rt").cast(pl.Float64, strict=False), + pl.col("mz").cast(pl.Float64, strict=False), + pl.col("intensity").cast(pl.Float64, strict=False), + ] + ) + .filter( + pl.col("rt").is_not_null() + & pl.col("mz").is_not_null() + & pl.col("intensity").is_not_null() + ) + ) + + return per_point.select( + [ + "feature_group", + "charge", + "isotope", + "centroid_mz", + "rt", + "mz", + "intensity", + ] + ) + + def density_series_long( target_density: pl.DataFrame, decoy_density: Optional[pl.DataFrame] = None, diff --git a/src/render_oi/__init__.py b/src/render_oi/__init__.py index deb5613..4ba4a01 100644 --- a/src/render_oi/__init__.py +++ b/src/render_oi/__init__.py @@ -6,6 +6,7 @@ """ from .deconv_viewer import build_component, render_experiment +from .quant_viewer import build_quant_components, render_experiment_quant from .tnt_viewer import build_component_tnt, render_experiment_tnt __all__ = [ @@ -13,4 +14,6 @@ "render_experiment", "build_component_tnt", "render_experiment_tnt", + "build_quant_components", + "render_experiment_quant", ] diff --git a/src/render_oi/quant_viewer.py b/src/render_oi/quant_viewer.py new file mode 100644 index 0000000..c9ecea1 --- /dev/null +++ b/src/render_oi/quant_viewer.py @@ -0,0 +1,123 @@ +"""OpenMS-Insight rendering engine for FLASHQuant (migration Phase 3). + +FLASHQuant has a single visualization: the feature-group view (mass traces / +XIC / isotope pattern). Unlike Deconv/TnT there is no layout manager — the page +fixes ``[['quant_visualization']]`` — and no cross-component linking; a feature +group selector drives the view's internal selection. + +This composes: + - a feature-group ``Table`` (one row per feature group; click sets + ``featureGroup``), and + - a ``FeatureView`` filtered by ``{featureGroup: feature_group}`` over the + long-format trace points (see ``explode_quant_traces_long``). +""" + +from __future__ import annotations + +import logging +from typing import Any, Callable, Optional + +import polars as pl + +from .deconv_viewer import _oi_cache_dir, _load_polars + +logger = logging.getLogger(__name__) + +FEATURE_GROUP = "featureGroup" + +# Feature-group summary columns to show in the selector table (when present). +_FG_SUMMARY_COLUMNS = [ + "FeatureGroupIndex", + "MonoisotopicMass", + "AverageMass", + "StartRetentionTime(FWHM)", + "EndRetentionTime(FWHM)", + "HighestApexRetentionTime", + "FeatureGroupQuantity", + "MinCharge", + "MaxCharge", + "MostAbundantFeatureCharge", + "IsotopeCosineScore", +] + + +def build_quant_components( + dataset_id: str, + file_manager, + state_manager, + key_prefix: str, +) -> Optional[Callable[[], Any]]: + """Build a render callable for the FLASHQuant feature-group view. + + Returns a zero-arg callable that renders a feature-group selector Table + above a FeatureView (both sharing ``state_manager``), or None if the quant + cache is unavailable. + """ + import streamlit as st + from openms_insight import FeatureView, Table + + from src.parse.long_format import explode_quant_traces_long + + cache_dir = _oi_cache_dir(file_manager, dataset_id) + cid = lambda name: f"{dataset_id}__quant_{name}" # noqa: E731 + skey = lambda name: f"{key_prefix}_{name}" # noqa: E731 + + quant = _load_polars(file_manager, dataset_id, "quant_dfs") + schema = quant.collect_schema().names() + + # Feature-group selector: one row per group (the wide quant frame already is + # one row per group), click sets featureGroup. + summary_cols = [c for c in _FG_SUMMARY_COLUMNS if c in schema] + # Drop the array columns from the table (keep only scalar summary columns). + fg_table = Table( + cache_id=cid("feature_table"), + data=quant.select(summary_cols) if summary_cols else quant, + interactivity={FEATURE_GROUP: "FeatureGroupIndex"}, + index_field="FeatureGroupIndex", + title="Feature Groups", + cache_path=cache_dir, + ) + + # Long-format trace points, filtered by the selected feature group. + traces_long = explode_quant_traces_long(quant) + feature_view = FeatureView( + cache_id=cid("feature_view"), + data=traces_long, + filters={FEATURE_GROUP: "feature_group"}, + charge_column="charge", + mz_column="mz", + rt_column="rt", + intensity_column="intensity", + isotope_column="isotope", + title="Feature Group Visualization", + cache_path=cache_dir, + ) + + def _render() -> None: + fg_table(key=skey("feature_table"), state_manager=state_manager) + feature_view(key=skey("feature_view"), state_manager=state_manager) + + return _render + + +def render_experiment_quant( + dataset_id: str, + file_manager, + panel_key: str, +) -> None: + """Render the FLASHQuant feature-group view with a dedicated StateManager.""" + import streamlit as st + from openms_insight import StateManager + + state_manager = StateManager(session_key=f"oi_quant_state_{panel_key}") + try: + render = build_quant_components( + dataset_id, file_manager, state_manager, key_prefix=panel_key + ) + if render is not None: + render() + else: + st.warning("FLASHQuant visualization unavailable") + except Exception as exc: # pragma: no cover - defensive UI guard + logger.exception("Failed to render FLASHQuant view") + st.error(f"Error rendering FLASHQuant view: {exc}") diff --git a/tests/test_quant_viewer_realdata.py b/tests/test_quant_viewer_realdata.py new file mode 100644 index 0000000..03ccd74 --- /dev/null +++ b/tests/test_quant_viewer_realdata.py @@ -0,0 +1,106 @@ +"""End-to-end verification of the Phase-3 FLASHQuant OpenMS-Insight viewer +against the bundled real quant workspace. + +Verifies the long-format trace explosion preserves point counts and the +FeatureView filters to exact per-feature-group points. + +Skipped automatically when example data or openms_insight is absent. +""" + +import os +import sys + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +pl = pytest.importorskip("polars") +pytest.importorskip("openms_insight") + +from pathlib import Path # noqa: E402 + +_FQ = ( + Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + / "example-data" / "workspaces" / "default" / "flashquant" + / "cache" / "files" / "example" +) + +pytestmark = pytest.mark.skipif( + not (_FQ / "quant_dfs.pq").exists(), + reason="example FLASHQuant workspace data not available", +) + + +class _FakeFM: + def __init__(self, cache_path): + self.cache_path = str(cache_path) + + def get_results( + self, dataset_id, names, use_polars=False, use_pyarrow=False, partial=False + ): + return { + n: (pl.scan_parquet(_FQ / f"{n}.pq") if use_polars else _FQ / f"{n}.pq") + for n in names + } + + def result_exists(self, a, b): + return True + + +@pytest.fixture +def fake_fm(tmp_path): + return _FakeFM(tmp_path) + + +def test_quant_long_format_preserves_points(): + """Exploding the per-group arrays preserves every trace point.""" + from src.parse.long_format import explode_quant_traces_long + + q = pl.read_parquet(_FQ / "quant_dfs.pq") + long = explode_quant_traces_long(q.lazy()).collect() + + # One unique feature group per source row + assert long["feature_group"].n_unique() == q.height + + # Group 0: total points == sum of split lengths over its traces + g0 = q.row(0, named=True) + expected_pts = sum(len(s.split(",")) for s in g0["RTs"]) + got_pts = long.filter(pl.col("feature_group") == 0).height + assert got_pts == expected_pts + + +def test_quant_view_builds(fake_fm, monkeypatch): + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from src.render_oi.quant_viewer import build_quant_components + + render = build_quant_components("example", fake_fm, None, key_prefix="p0") + assert callable(render) + + +def test_feature_view_filters_to_group(fake_fm, monkeypatch, tmp_path): + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from openms_insight import FeatureView + + from src.parse.long_format import explode_quant_traces_long + + traces = explode_quant_traces_long(pl.scan_parquet(_FQ / "quant_dfs.pq")) + tc = traces.collect() + fv = FeatureView( + cache_id="fv_x", + data=traces, + filters={"featureGroup": "feature_group"}, + charge_column="charge", + mz_column="mz", + rt_column="rt", + intensity_column="intensity", + isotope_column="isotope", + cache_path=str(tmp_path), + ) + for fg in (0, 5, 100): + vd = fv._prepare_vue_data({"featureGroup": fg}) + data_key = next(k for k in vd if not k.startswith("_")) + assert len(vd[data_key]) == tc.filter(pl.col("feature_group") == fg).height From 6ca3ef20f87827c06b643c40e185825599933fd1 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 1 Jun 2026 13:43:05 +0000 Subject: [PATCH 08/18] docs: no-feature-loss audit + browser checklist (deletion gate) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update docs/openms-insight-migration.md: mark all 3 phases built (flag-gated in src/render_oi/), refresh test counts (FLASHApp 72 passed/2 skipped; OI 464), and add the per-workflow no-feature-loss audit that gates retiring src/render/*. For each workflow the audit maps every legacy StateTracker key and update.py filter to its src/render_oi replacement (with file:line), accounts for every component's columns/axes, marks each ✅ data-path-verified (with the real-data test name) or 👁 needs-browser, and gives a click-by-click browser checklist. Also documents the exact post-audit deletion list — keeping scan_resolution.py and sequence_data_store.py (still imported by tnt_viewer) while removing components/render/update/StateTracker/initialize/compression.py and the js-component bundle. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- docs/openms-insight-migration.md | 190 ++++++++++++++++++++++++++----- 1 file changed, 162 insertions(+), 28 deletions(-) diff --git a/docs/openms-insight-migration.md b/docs/openms-insight-migration.md index a96ef0b..a244e45 100644 --- a/docs/openms-insight-migration.md +++ b/docs/openms-insight-migration.md @@ -66,26 +66,32 @@ is fully unit-tested (`tests/test_long_format.py`): These are **additive**: the existing render pipeline is untouched, so the old and new paths coexist during the phased rollout. -## Phased rollout (remaining) - -Each phase swaps one workflow's viewer to OpenMS-Insight, then retires the -corresponding `src/render/*` usage. **Do not delete `src/render/*` until a -workflow's no-feature-loss audit passes against the contract above.** - -1. **FLASHDeconv** — `content/FLASHDeconv/FLASHDeconvViewer.py` / - `FLASHDeconvLayoutManager.py`: heatmaps, scan/mass `Table`s, deconv/annotated - `LinePlot`s, `Scatter3D`, `DensityPlot`, `SequenceView` + `InternalFragmentMap` - (when a sequence is provided), one shared `StateManager`. Parse layer → - long format via the adapters above. -2. **FLASHTnT** — protein `Table` → `SequenceView` → tag `Table` → - combined-spectrum `LinePlot` (tagger overlay) → `DensityPlot` → heatmaps. - Preserve proteoform→scan resolution - (`scan_resolution.py:build_proteoform_scan_map`): resolve `proteinIndex`→scan, - expose a `scan`/`deconv_index` column so value-filters reproduce the PyArrow - pushdown. -3. **FLASHQuant** — `content/FLASHQuant/FLASHQuantViewer.py`: single - `FeatureView` (use `FeatureView.explode_traces` to convert the per-group - array format). +## Phased rollout (status) + +Each phase swaps one workflow's viewer to OpenMS-Insight behind the +`FLASHAPP_USE_OPENMS_INSIGHT` env flag (default **off** → legacy +`flash_viewer_grid`), then — **only after a browser-driven no-feature-loss audit +passes** — retires the corresponding `src/render/*` usage. The new engine lives +in `src/render_oi/`; the layout managers are unchanged (they only emit +component-name strings). + +> **Do not delete `src/render/*` or default the flag on until the +> browser checklist below passes for that workflow.** Everything up to the Vue +> render + click round-trip is verified on the bundled real data; the in-browser +> interaction is not (no headless browser in CI). + +1. **FLASHDeconv** ✅ built (`src/render_oi/deconv_viewer.py`): heatmaps, + scan/mass `Table`s, deconv/annotated `LinePlot`s, `Scatter3D` (optional + `massIndex`), `DensityPlot`, `SequenceView` + `InternalFragmentMap` (when a + sequence is provided), per-experiment `StateManager`. Parse → long format. +2. **FLASHTnT** ✅ built (`src/render_oi/tnt_viewer.py`): protein `Table` → + `SequenceView` (coverage) → tag `Table` → combined-spectrum `LinePlot` + (tagger overlay) → `DensityPlot` → heatmaps. Proteoform→scan resolution + (`build_proteoform_scan_map`) resolves `proteinIndex`→`deconvIndex` before + downstream panels render. +3. **FLASHQuant** ✅ built (`src/render_oi/quant_viewer.py`): feature-group + `Table` + single `FeatureView`; `explode_quant_traces_long` converts the + per-group array format to long trace-point format. ### Layout parity (all phases) @@ -100,14 +106,12 @@ panels; preserve save/load of layouts ## Verification - **OpenMS-Insight**: per-component unit tests (preprocess→cache→ - `_prepare_vue_data`) + `npm run build`. Current: **455 passed**, build green. -- **FLASHApp**: parse adapters unit-tested. Current: **53 passed**. -- **Per phase (requires a real dataset + a running app — not possible in CI - without sample data):** drive each workflow's `COMPONENT_OPTIONS`, confirm - every component renders and every cross-link in the plan's interaction - checklist round-trips (scan→spectra/mass/3D/sequence, mass→3D isolation, - protein→sequence/tag/spectrum, heatmap zoom levels, selection clear → None, - fragment overlay, multi-experiment isolation, layout add/remove + side-by-side). + `_prepare_vue_data`) + `npm run build`. Current: **464 passed**, build green. +- **FLASHApp**: parse adapters + per-workflow viewer engines tested against the + **bundled real example workspaces** (`tests/test_{deconv,tnt,quant}_viewer_realdata.py`, + `tests/test_long_format.py`). Current: **72 passed, 2 skipped**. +- **Browser audit (the deletion gate):** see the no-feature-loss audit and + click-by-click checklist below. This is the one layer CI cannot cover. ## Dependency @@ -115,3 +119,133 @@ panels; preserve save/load of layouts development use an editable/path install: `pip install -e ../OpenMS-Insight`, and either rebuild `js-component/dist` after Vue changes or run the bundle in dev mode with `SVC_DEV_MODE=true`. + +--- + +# No-feature-loss audit (deletion gate) + +This is the per-workflow contract that must hold before `src/render/*` and the +`js-component/` submodule are deleted (Verification §4 of the plan). For each +workflow: (A) every legacy `StateTracker` key and `update.py` filter is mapped to +its `src/render_oi/*` replacement, (B) every component's columns/axes are +accounted for, and (C) a click-by-click browser checklist confirms the +interaction round-trips. Run the app with the new engine: + +```bash +FLASHAPP_USE_OPENMS_INSIGHT=1 streamlit run app.py local +``` + +Legend: ✅ data-path verified on real data (test name in parentheses) · 👁 needs +browser confirmation. + +## Workflow 1 — FLASHDeconv + +Engine: `src/render_oi/deconv_viewer.py`. Tests: +`tests/test_deconv_viewer_realdata.py`. + +### State keys (legacy `StateTracker`/`update.py` → new) + +| legacy key | legacy filter (`update.py`) | new identifier→column | status | +|---|---|---|---| +| `scanIndex` | `per_scan_data.iloc[scanIndex]` (L131‑134) | scan `Table` `interactivity={scanIndex:index}`; spectra/mass/3D/seq `filters={scanIndex:index}` | ✅ row counts exact (`test_scan_click_cross_link_row_counts`) | +| `massIndex` | `SignalPeaks[massIndex]`/`NoisyPeaks[massIndex]` (L142‑146) | mass `Table` `interactivity={massIndex:mass_id}`; `Scatter3D` `optional_filters={massIndex:mass_id}` | ✅ 3D isolates mass 0 (same test) | +| `heatmap_deconv/_deconv2/_raw/_raw2` | four bespoke `render_heatmap` range keys (L149‑176) | one `zoom_identifier` per `Heatmap` (`{comp}_zoom`) | ✅ levels build; 👁 zoom round-trip | +| `sequenceOut` / sequence cache | `get_sequence()` (L13‑29) | `SequenceView(sequence_data=)` when `result_exists('sequence','sequence')` | 👁 | + +### Component coverage + +| component | columns / axes preserved | status | +|---|---|---| +| Deconv/Raw MS1/MS2 `Heatmap` | x=`rt`, y=`mass`, color=`intensity` (log), zoom compression | ✅ build (incl. 608K→200K→20K levels); 👁 render | +| Scan `Table` | `index, Scan, MSLevel, RT, PrecursorMass, #Masses` | ✅ | +| Mass `Table` | exploded `MonoMass, SumIntensity, Min/MaxCharges, Min/MaxIsotopes, CosineScore, SNR, QScore` + `mass_id` | ✅ (`_explode_mass_table`) | +| Deconv `LinePlot` | x=`mass`, y=`intensity` | ✅ peak count exact | +| Annotated `LinePlot` | annotated peaks from `combined_spectrum` | ✅ | +| `Scatter3D` (Precursor S/N) | x=`mz*charge`, y=`charge`, z=`intensity`, signal/noise color | ✅ | +| `DensityPlot` (FDR) | precomputed `density_target`/`density_decoy` curves | ✅ (200‑pt target, empty decoy handled) | +| `SequenceView` (+`InternalFragmentMap`) | residues + fixed mods (C/M), deconvolved matching | 👁 (no sequence in bundled FD set) | + +### 👁 Browser checklist (FLASHDeconv) + +1. All `COMPONENT_OPTIONS` from `FLASHDeconvLayoutManager` render without error. +2. Click a scan row → deconv spectrum, annotated spectrum, mass table, **and** + 3D plot all update to that scan. +3. Click a mass row → 3D plot isolates that mass's signal/noisy peaks. +4. Zoom a heatmap → point density increases (correct compression level); zoom + out → returns to overview. +5. Deselect (click empty) → dependent panels clear (selection round-trips to + `None`), no stale data. +6. Submit a sequence → Sequence View + Internal Fragment Map appear and annotate + the selected scan's peaks. +7. Layout editor: add/remove a component, save, reload → viewer reflects it. +8. Side-by-side (2 experiments): a selection in panel A does **not** move + panel B (distinct `session_key` per panel — `oi_state_{panel_key}`). + +## Workflow 2 — FLASHTnT + +Engine: `src/render_oi/tnt_viewer.py`. Tests: +`tests/test_tnt_viewer_realdata.py`. + +### State keys + +| legacy behavior (`update.py`) | new | status | +|---|---|---| +| `proteinIndex` → `proteoform_scan_map` → `deconv_index`, pushdown `field('index')==deconv_index` (L122‑134) | protein `Table` `interactivity={proteinIndex:index}`; `render_experiment_tnt` resolves `proteinIndex`→`deconvIndex`; spectra/mass `filters={deconvIndex:index}` | ✅ resolution + tag count exact (`test_protein_click_cross_links`) | +| tag table: pushdown `field('Scan')==scan`, stamp `ProteinIndex` (L177‑192) | tag `Table` `filters={proteinIndex:ProteinIndex}` | ✅ 15 tags for protein 0 | +| sequence: `load_entry(sequence_data_ds, proteinIndex)` (L194‑213) | `SequenceView` `filters={proteinIndex:proteoform_index}`, per-proteoform `coverage[]`/`maxCoverage` | ✅ 450 residues + coverage | + +### Component coverage + +| component | preserved | status | +|---|---|---| +| Protein `Table` | `index, accession, description, ProteoformMass, Coverage(%), TagCount, ProteoformLevelQvalue` | ✅ | +| Tag `Table` | `TagIndex, TagSequence, StartPos, EndPos, Length, Score, DeltaMass` | ✅ | +| Combined `LinePlot` (tagger) | deconv primary (564) + annotated overlay (65 657) | ✅ overlay (`test_combined_spectrum_overlay`) | +| `SequenceView` | coverage shading, fixed mods, `settings.{tolerance,ion_types}` | ✅ data; 👁 render | +| `DensityPlot` (id-FDR) | `density_id_target/decoy` (empty in antibody set) | ✅ both-empty handled | +| MS1 raw/deconv `Heatmap` | as Deconv | ✅ | + +### 👁 Browser checklist (FLASHTnT) + +1. Click a protein row → sequence view (coverage colored), tag table, **and** + combined spectrum all update to that proteoform. +2. Tag table shows only the selected protein's tags; on-spectrum tag overlay (if + enabled) matches. +3. Combined spectrum shows both series (deconv sticks + annotated overlay). +4. Sequence view: per-residue coverage shading + fixed-mod (C/M) styling + + correct `ion_types`/`tolerance` from settings. +5. Selection clear → dependent panels reset. +6. Side-by-side isolation (distinct `oi_tnt_state_{panel_key}`). +7. **Tagger tag-geometry** (charge buttons + inter-residue amino-acid arrows + driven by `tagIndex`/`tagData`): confirm parity with legacy or log as the one + known gap (see footnote ¹ in the component table) before deletion. + +## Workflow 3 — FLASHQuant + +Engine: `src/render_oi/quant_viewer.py`. Tests: +`tests/test_quant_viewer_realdata.py`. No cross-component linking, no layout +manager (fixed single view). + +| component | preserved | status | +|---|---|---| +| Feature-group `Table` | scalar summary columns; click sets `featureGroup` | ✅ 1437 rows | +| `FeatureView` | per-charge 3D traces (x=`mz`, y=`rt`, z=`intensity`), isotope hover; `filters={featureGroup:feature_group}` | ✅ exact per-group point counts (0/5/100 → 1384/239/208) | + +### 👁 Browser checklist (FLASHQuant) + +1. Feature-group table renders all groups; selecting a group updates the + FeatureView to that group's traces. +2. Traces colored per charge; isotope info in hover; conflict-resolution + highlighting if a `conflict_resolution_dfs` cache is present. + +## After all three checklists pass + +1. Default `FLASHAPP_USE_OPENMS_INSIGHT` on (or remove the flag, making the new + engine unconditional) in the three viewers. +2. Delete `src/render/{components,render,update,StateTracker,initialize, + compression}.py` and the `js-component/` bundle + `openms-streamlit-vue-component` + submodule. Keep `src/render/{sequence,sequence_data_store,scan_resolution}.py` + if still imported by the parse layer (they are — `scan_resolution` is used by + `tnt_viewer`). +3. Drop the now-dead `quant_visualization` / `flash_viewer_grid` code paths. +4. Re-run the full suite + `npm run build`; commit the retirement per workflow. From 0351269729d72a791c5da4376b1ac6fced150b3f Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 07:27:22 +0000 Subject: [PATCH 09/18] Default the OpenMS-Insight viewer engine on; install it (with Vue bundle) in Docker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Viewers: flip FLASHAPP_USE_OPENMS_INSIGHT from opt-in to opt-out across the three viewers (FLASHDeconv/TnT/Quant). Unset now renders via src/render_oi; set the var to 0/false/no/off to fall back to the legacy flash_viewer_grid. Dockerfiles (x86 + arm): add an openms-insight-build node stage that clones OpenMS-Insight at the validated commit (ARG OPENMS_INSIGHT_REF), runs npm build, and mirrors js-component/dist into openms_insight/js-component/dist — the path the hatchling wheel force-includes and rendering/bridge.py loads at runtime. The run-app stage installs that bundle-built checkout. Without this, the package's git tree omits the gitignored bundle and pip would ship a frontend-less install, so the requirements.txt openms-insight git line is stripped before pip install -r (alongside pyopenms) and replaced by this explicit, pinned install. Docs: update the migration doc — engine is now default-on (opt-out documented), add a Docker/packaging section explaining the bundle-build requirement, and note the browser checklist now gates only the deletion of the legacy engine, not the default flip. Verified: FLASHApp suite 72 passed/2 skipped; flag boolean truth-table; the Docker requirements-strip pipeline (drops pyopenms== and openms-insight@git, keeps pyopenms-viz); multi-stage ordering and the dist path vs bridge.py. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- Dockerfile | 33 +++++++++++++- Dockerfile.arm | 33 +++++++++++++- content/FLASHDeconv/FLASHDeconvViewer.py | 10 ++-- content/FLASHQuant/FLASHQuantViewer.py | 8 ++-- content/FLASHTnT/FLASHTnTViewer.py | 9 ++-- docs/openms-insight-migration.md | 58 ++++++++++++++++-------- 6 files changed, 118 insertions(+), 33 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0f72d95..0a93639 100644 --- a/Dockerfile +++ b/Dockerfile @@ -106,7 +106,9 @@ RUN pip install dist/*.whl # Install other dependencies (excluding pyopenms) COPY requirements.txt ./requirements.txt -RUN grep -Ev '^pyopenms([=<>!~].*)?$' requirements.txt > requirements_cleaned.txt && mv requirements_cleaned.txt requirements.txt +RUN grep -Ev '^pyopenms([=<>!~].*)?$' requirements.txt \ + | grep -Ev '^openms-insight[[:space:]]*[@=<>!~]' > requirements_cleaned.txt \ + && mv requirements_cleaned.txt requirements.txt RUN pip install -r requirements.txt WORKDIR / @@ -144,6 +146,28 @@ WORKDIR /openms-streamlit-vue-component RUN npm install RUN npm run build +# Build the OpenMS-Insight Vue bundle and stage a pip-installable checkout. +# OpenMS-Insight's wheel (hatchling) force-includes openms_insight/js-component/dist +# only if it exists on disk, but that bundle is gitignored — so installing the +# package straight from its git URL ships no frontend. Build the bundle here with +# node and hand the populated checkout to the python stage. Pinned to the +# validated commit for reproducible images (override OPENMS_INSIGHT_REF to bump). +FROM node:21 AS openms-insight-build +ARG OPENMS_INSIGHT_REPO=https://github.com/t0mdavid-m/OpenMS-Insight.git +ARG OPENMS_INSIGHT_REF=9b4c57a60e63a4319c392afd6000aa205cdcbe36 +# Bust the clone cache when REF tracks a moving branch (no-op for a pinned SHA). +ADD https://api.github.com/repos/t0mdavid-m/OpenMS-Insight/commits/$OPENMS_INSIGHT_REF oi-version.json +RUN git clone ${OPENMS_INSIGHT_REPO} /OpenMS-Insight \ + && git -C /OpenMS-Insight checkout ${OPENMS_INSIGHT_REF} +WORKDIR /OpenMS-Insight/js-component +RUN npm install --no-audit --no-fund && npm run build +# Vite emits to js-component/dist; mirror it into the package path the wheel +# force-includes and rendering/bridge.py loads at runtime. +RUN mkdir -p /OpenMS-Insight/openms_insight/js-component \ + && cp -r dist /OpenMS-Insight/openms_insight/js-component/dist +# Slim the checkout to what the wheel build needs (drop node_modules, .git, tests). +RUN rm -rf /OpenMS-Insight/js-component /OpenMS-Insight/.git /OpenMS-Insight/tests + # Prepare and run streamlit app. FROM compile-openms AS run-app @@ -190,6 +214,13 @@ COPY presets.json /app/presets.json # Copy the pre-built Vue/JS component (built in the js-build stage above). COPY --from=js-build openms-streamlit-vue-component/dist /app/js-component/dist +# Install OpenMS-Insight — the reusable visualization components that back the +# default viewer engine (FLASHAPP_USE_OPENMS_INSIGHT, on by default). Installed +# from the bundle-built checkout staged in the openms-insight-build stage, because +# the package's git tree omits the (gitignored) pre-built Vue bundle. +COPY --from=openms-insight-build /OpenMS-Insight /tmp/openms-insight +RUN mamba run -n streamlit-env pip install /tmp/openms-insight && rm -rf /tmp/openms-insight + # add cron job to the crontab RUN echo "0 3 * * * /root/miniforge3/envs/streamlit-env/bin/python /app/clean-up-workspaces.py >> /app/clean-up-workspaces.log 2>&1" | crontab - diff --git a/Dockerfile.arm b/Dockerfile.arm index 9fe055e..974098a 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -101,7 +101,9 @@ RUN pip install dist/*.whl # Install other dependencies (excluding pyopenms) COPY requirements.txt ./requirements.txt -RUN grep -Ev '^pyopenms([=<>!~].*)?$' requirements.txt > requirements_cleaned.txt && mv requirements_cleaned.txt requirements.txt +RUN grep -Ev '^pyopenms([=<>!~].*)?$' requirements.txt \ + | grep -Ev '^openms-insight[[:space:]]*[@=<>!~]' > requirements_cleaned.txt \ + && mv requirements_cleaned.txt requirements.txt RUN pip install -r requirements.txt WORKDIR / @@ -139,6 +141,28 @@ WORKDIR /openms-streamlit-vue-component RUN npm install RUN npm run build +# Build the OpenMS-Insight Vue bundle and stage a pip-installable checkout. +# OpenMS-Insight's wheel (hatchling) force-includes openms_insight/js-component/dist +# only if it exists on disk, but that bundle is gitignored — so installing the +# package straight from its git URL ships no frontend. Build the bundle here with +# node and hand the populated checkout to the python stage. Pinned to the +# validated commit for reproducible images (override OPENMS_INSIGHT_REF to bump). +FROM node:21 AS openms-insight-build +ARG OPENMS_INSIGHT_REPO=https://github.com/t0mdavid-m/OpenMS-Insight.git +ARG OPENMS_INSIGHT_REF=9b4c57a60e63a4319c392afd6000aa205cdcbe36 +# Bust the clone cache when REF tracks a moving branch (no-op for a pinned SHA). +ADD https://api.github.com/repos/t0mdavid-m/OpenMS-Insight/commits/$OPENMS_INSIGHT_REF oi-version.json +RUN git clone ${OPENMS_INSIGHT_REPO} /OpenMS-Insight \ + && git -C /OpenMS-Insight checkout ${OPENMS_INSIGHT_REF} +WORKDIR /OpenMS-Insight/js-component +RUN npm install --no-audit --no-fund && npm run build +# Vite emits to js-component/dist; mirror it into the package path the wheel +# force-includes and rendering/bridge.py loads at runtime. +RUN mkdir -p /OpenMS-Insight/openms_insight/js-component \ + && cp -r dist /OpenMS-Insight/openms_insight/js-component/dist +# Slim the checkout to what the wheel build needs (drop node_modules, .git, tests). +RUN rm -rf /OpenMS-Insight/js-component /OpenMS-Insight/.git /OpenMS-Insight/tests + # Prepare and run streamlit app. FROM compile-openms AS run-app @@ -171,6 +195,13 @@ COPY presets.json /app/presets.json # Copy the pre-built Vue/JS component (built in the js-build stage above). COPY --from=js-build openms-streamlit-vue-component/dist /app/js-component/dist +# Install OpenMS-Insight — the reusable visualization components that back the +# default viewer engine (FLASHAPP_USE_OPENMS_INSIGHT, on by default). Installed +# from the bundle-built checkout staged in the openms-insight-build stage, because +# the package's git tree omits the (gitignored) pre-built Vue bundle. +COPY --from=openms-insight-build /OpenMS-Insight /tmp/openms-insight +RUN mamba run -n streamlit-env pip install /tmp/openms-insight && rm -rf /tmp/openms-insight + # add cron job to the crontab RUN echo "0 3 * * * /root/miniforge3/envs/streamlit-env/bin/python /app/clean-up-workspaces.py >> /app/clean-up-workspaces.log 2>&1" | crontab - diff --git a/content/FLASHDeconv/FLASHDeconvViewer.py b/content/FLASHDeconv/FLASHDeconvViewer.py index 4bd57a5..2124ed4 100644 --- a/content/FLASHDeconv/FLASHDeconvViewer.py +++ b/content/FLASHDeconv/FLASHDeconvViewer.py @@ -8,11 +8,11 @@ from src.workflow.FileManager import FileManager from src.render.render import render_grid -# Migration flag: when truthy, render each experiment panel with the -# OpenMS-Insight engine (src.render_oi) instead of the legacy flash_viewer_grid. -# Default OFF so the rollout is opt-in and reversible (Phase 1). -USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "").lower() in ( - "1", "true", "yes", +# Migration flag: render each experiment panel with the OpenMS-Insight engine +# (src.render_oi) instead of the legacy flash_viewer_grid. Default ON; set +# FLASHAPP_USE_OPENMS_INSIGHT=0 (or false/no/off) to fall back to the old grid. +USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "1").strip().lower() not in ( + "0", "false", "no", "off", "", ) DEFAULT_LAYOUT = [['ms1_deconv_heat_map'], ['scan_table', 'mass_table'], diff --git a/content/FLASHQuant/FLASHQuantViewer.py b/content/FLASHQuant/FLASHQuantViewer.py index 341401d..90f3e6b 100644 --- a/content/FLASHQuant/FLASHQuantViewer.py +++ b/content/FLASHQuant/FLASHQuantViewer.py @@ -9,9 +9,11 @@ # from src.render.components import flash_viewer_grid_component, FlashViewerComponent, FLASHQuant from src.render.render import render_grid -# Migration flag (shared across workflows): render with OpenMS-Insight engine. -USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "").lower() in ( - "1", "true", "yes", +# Migration flag (shared across workflows): render with the OpenMS-Insight engine. +# Default ON; set FLASHAPP_USE_OPENMS_INSIGHT=0 (or false/no/off) to fall back to +# the legacy view. +USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "1").strip().lower() not in ( + "0", "false", "no", "off", "", ) # page initialization diff --git a/content/FLASHTnT/FLASHTnTViewer.py b/content/FLASHTnT/FLASHTnTViewer.py index 90f0d51..c830c6f 100644 --- a/content/FLASHTnT/FLASHTnTViewer.py +++ b/content/FLASHTnT/FLASHTnTViewer.py @@ -8,10 +8,11 @@ from src.workflow.FileManager import FileManager from src.render.render import render_grid -# Migration flag (shared with FLASHDeconv): when truthy, render each experiment -# panel with the OpenMS-Insight engine instead of the legacy flash_viewer_grid. -USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "").lower() in ( - "1", "true", "yes", +# Migration flag (shared with FLASHDeconv): render each experiment panel with the +# OpenMS-Insight engine instead of the legacy flash_viewer_grid. Default ON; set +# FLASHAPP_USE_OPENMS_INSIGHT=0 (or false/no/off) to fall back to the old grid. +USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "1").strip().lower() not in ( + "0", "false", "no", "off", "", ) diff --git a/docs/openms-insight-migration.md b/docs/openms-insight-migration.md index a244e45..9c66c62 100644 --- a/docs/openms-insight-migration.md +++ b/docs/openms-insight-migration.md @@ -68,17 +68,18 @@ new paths coexist during the phased rollout. ## Phased rollout (status) -Each phase swaps one workflow's viewer to OpenMS-Insight behind the -`FLASHAPP_USE_OPENMS_INSIGHT` env flag (default **off** → legacy -`flash_viewer_grid`), then — **only after a browser-driven no-feature-loss audit -passes** — retires the corresponding `src/render/*` usage. The new engine lives -in `src/render_oi/`; the layout managers are unchanged (they only emit -component-name strings). - -> **Do not delete `src/render/*` or default the flag on until the -> browser checklist below passes for that workflow.** Everything up to the Vue -> render + click round-trip is verified on the bundled real data; the in-browser -> interaction is not (no headless browser in CI). +All three workflows render through the OpenMS-Insight engine (`src/render_oi/`) +**by default**. The `FLASHAPP_USE_OPENMS_INSIGHT` env flag is now an **opt-out**: +set it to `0`/`false`/`no`/`off` to fall back to the legacy `flash_viewer_grid`. +The layout managers are unchanged (they only emit component-name strings). Docker +images install OpenMS-Insight with its Vue bundle built from the pinned commit +(`openms-insight-build` stage in both Dockerfiles); see "Docker / packaging" below. + +> **The default is on, but `src/render/*` is not yet deletable.** Everything up +> to the Vue render + click round-trip is verified on the bundled real data; the +> in-browser interaction is not (no headless browser in CI). The browser checklist +> below remains the gate for **removing** the legacy engine and the opt-out path — +> not for flipping the default, which the maintainer has chosen to enable now. 1. **FLASHDeconv** ✅ built (`src/render_oi/deconv_viewer.py`): heatmaps, scan/mass `Table`s, deconv/annotated `LinePlot`s, `Scatter3D` (optional @@ -113,12 +114,29 @@ panels; preserve save/load of layouts - **Browser audit (the deletion gate):** see the no-feature-loss audit and click-by-click checklist below. This is the one layer CI cannot cover. -## Dependency - -`openms-insight` is declared in `requirements.txt` (git dependency). For local -development use an editable/path install: `pip install -e ../OpenMS-Insight`, -and either rebuild `js-component/dist` after Vue changes or run the bundle in dev -mode with `SVC_DEV_MODE=true`. +## Docker / packaging + +OpenMS-Insight's wheel is built by **hatchling**, which `force-include`s the +pre-built Vue bundle at `openms_insight/js-component/dist` **only if it exists on +disk** — and that bundle is gitignored. So a plain `pip install +git+https://…/OpenMS-Insight` (what the `requirements.txt` line would do) yields a +package with **no frontend**. Three install paths handle this correctly: + +- **Docker** (`Dockerfile`, `Dockerfile.arm`): a dedicated `openms-insight-build` + node stage clones the repo at the pinned commit (`ARG OPENMS_INSIGHT_REF`, + default = the validated SHA), runs `npm run build`, mirrors `js-component/dist` + → `openms_insight/js-component/dist`, and hands the populated checkout to the + python stage, which `pip install`s it. The `requirements.txt` `openms-insight` + line is stripped before `pip install -r` (alongside `pyopenms`) so it installs + exactly once, with a working bundle. Bump the image by overriding + `OPENMS_INSIGHT_REF` (or rely on the branch cache-bust `ADD`). +- **Local dev / CI / web sessions**: the SessionStart hook + (`.claude/hooks/session-start.sh`) does the same (strip line → `npm run build` → + mirror dist → `pip install -e ../OpenMS-Insight`). Or run the Vue bundle in dev + mode with `SVC_DEV_MODE=true`. +- **`requirements.txt`**: keeps the `git+…@` ref as a declarative pointer + for the migration branch; both install paths above strip and replace it, so it + is never the thing that actually provides the frontend. --- @@ -132,7 +150,9 @@ accounted for, and (C) a click-by-click browser checklist confirms the interaction round-trips. Run the app with the new engine: ```bash +# The OpenMS-Insight engine is the default; this just makes it explicit. FLASHAPP_USE_OPENMS_INSIGHT=1 streamlit run app.py local +# To audit the legacy engine instead: FLASHAPP_USE_OPENMS_INSIGHT=0 streamlit run app.py local ``` Legend: ✅ data-path verified on real data (test name in parentheses) · 👁 needs @@ -240,8 +260,8 @@ manager (fixed single view). ## After all three checklists pass -1. Default `FLASHAPP_USE_OPENMS_INSIGHT` on (or remove the flag, making the new - engine unconditional) in the three viewers. +1. Remove the `FLASHAPP_USE_OPENMS_INSIGHT` opt-out (it already defaults on), + making the new engine unconditional in the three viewers. 2. Delete `src/render/{components,render,update,StateTracker,initialize, compression}.py` and the `js-component/` bundle + `openms-streamlit-vue-component` submodule. Keep `src/render/{sequence,sequence_data_store,scan_resolution}.py` From 53a1e2b36c93eaa85d69d09419a99490e2fc3046 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 07:59:16 +0000 Subject: [PATCH 10/18] CI: build OpenMS-Insight Vue bundle in unit-tests; address review nits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix FLASHApp#92 pytest: the unit-tests workflow ran a raw 'pip install -r requirements.txt', which fails on the 'openms-insight @ git+...' line because the wheel force-includes a gitignored Vue bundle (hatchling FileNotFoundError at metadata generation). Per the chosen approach, the workflow now sets up node, clones OpenMS-Insight on the shared branch, builds + mirrors the bundle, installs the checkout, then installs the rest of requirements with the openms-insight line stripped. (Pinning the requirements line to a SHA would NOT fix this — the bundle is gitignored regardless of branch vs SHA.) Also bump the Docker OPENMS_INSIGHT_REF to the now-CI-green OI commit, and apply the actionable CodeRabbit review points: - session-start.sh: widen the strip char-class to match the Dockerfiles ([@=<>!~]) so a future pinned 'openms-insight==' would also be stripped. - common.use_openms_insight(): centralize the flag parsing; the three viewers now call it (DRY) and drop their now-unused 'import os'. - tnt_viewer: load protein/scan tables via _load_pandas directly (drop a pandas->polars->pandas round trip); only build the proteoform scan map when a protein is actually selected. - deconv_viewer: drop a no-op .rename({'mass':'mass','index':'index'}). - migration doc: correct the packaging section (CI builds the bundle, not the hook) and spell out that any raw 'pip install -r requirements.txt' must strip the git line + build the bundle first. Verified: py_compile of touched modules, unit-tests.yml YAML parse, helper toggle, suite 72 passed/2 skipped. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- .claude/hooks/session-start.sh | 2 +- .github/workflows/unit-tests.yml | 27 ++++++++++++++++++++---- Dockerfile | 2 +- Dockerfile.arm | 2 +- content/FLASHDeconv/FLASHDeconvViewer.py | 12 ++++------- content/FLASHQuant/FLASHQuantViewer.py | 12 ++++------- content/FLASHTnT/FLASHTnTViewer.py | 12 ++++------- docs/openms-insight-migration.md | 25 ++++++++++++++++------ src/common/common.py | 16 ++++++++++++++ src/render_oi/deconv_viewer.py | 1 - src/render_oi/tnt_viewer.py | 14 ++++++++---- 11 files changed, 82 insertions(+), 43 deletions(-) diff --git a/.claude/hooks/session-start.sh b/.claude/hooks/session-start.sh index f1131f8..a88c970 100755 --- a/.claude/hooks/session-start.sh +++ b/.claude/hooks/session-start.sh @@ -29,7 +29,7 @@ echo "[session-start] FLASHApp setup starting" # clone) AND aborts the whole install. So strip that line here and install the # local sibling separately in step 2. REQ_TMP="$(mktemp)" -grep -ivE '^openms-insight[[:space:]]*@' "$PROJECT_DIR/requirements.txt" > "$REQ_TMP" || cp "$PROJECT_DIR/requirements.txt" "$REQ_TMP" +grep -ivE '^openms-insight[[:space:]]*[@=<>!~]' "$PROJECT_DIR/requirements.txt" > "$REQ_TMP" || cp "$PROJECT_DIR/requirements.txt" "$REQ_TMP" python3 -m pip install --user --quiet -r "$REQ_TMP" || { echo "[session-start] WARNING: requirements.txt install hit an error; continuing" } diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 5f78666..f7bae80 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -24,13 +24,32 @@ jobs: cache: pip cache-dependency-path: requirements.txt + - name: Set up Node + # Needed to build the OpenMS-Insight Vue bundle (see install step). + uses: actions/setup-node@v4 + with: + node-version: "21" + - name: Install dependencies run: | python -m pip install --upgrade pip - # Pinned runtime deps (pyopenms is needed so ParameterManager imports - # cleanly at collection time) plus test-only deps. fakeredis backs the - # QueueManager/WorkflowManager tests, which pytest.importorskip it. - pip install -r requirements.txt + # OpenMS-Insight is declared in requirements.txt as a git dependency, + # but its wheel force-includes a Vue bundle that is gitignored, so a + # raw `pip install git+…` fails at metadata generation. Build the + # bundle and install from the checkout, mirroring the Dockerfiles and + # .claude/hooks/session-start.sh. + git clone --depth 1 --branch claude/peaceful-mayer-YqiXZ \ + https://github.com/t0mdavid-m/OpenMS-Insight.git /tmp/openms-insight + (cd /tmp/openms-insight/js-component && npm install --no-audit --no-fund && npm run build) + mkdir -p /tmp/openms-insight/openms_insight/js-component + cp -r /tmp/openms-insight/js-component/dist \ + /tmp/openms-insight/openms_insight/js-component/dist + pip install /tmp/openms-insight + # Remaining deps minus the openms-insight git line (installed above); + # pyopenms is needed so ParameterManager imports cleanly at collection + # time. fakeredis backs the QueueManager/WorkflowManager tests. + grep -ivE '^openms-insight[[:space:]]*[@=<>!~]' requirements.txt > /tmp/req.txt + pip install -r /tmp/req.txt pip install pytest fakeredis - name: Run unit tests diff --git a/Dockerfile b/Dockerfile index 0a93639..663183c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -154,7 +154,7 @@ RUN npm run build # validated commit for reproducible images (override OPENMS_INSIGHT_REF to bump). FROM node:21 AS openms-insight-build ARG OPENMS_INSIGHT_REPO=https://github.com/t0mdavid-m/OpenMS-Insight.git -ARG OPENMS_INSIGHT_REF=9b4c57a60e63a4319c392afd6000aa205cdcbe36 +ARG OPENMS_INSIGHT_REF=d886d2bfb7865a74ca8e75e5e5017824e0170468 # Bust the clone cache when REF tracks a moving branch (no-op for a pinned SHA). ADD https://api.github.com/repos/t0mdavid-m/OpenMS-Insight/commits/$OPENMS_INSIGHT_REF oi-version.json RUN git clone ${OPENMS_INSIGHT_REPO} /OpenMS-Insight \ diff --git a/Dockerfile.arm b/Dockerfile.arm index 974098a..2319ba1 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -149,7 +149,7 @@ RUN npm run build # validated commit for reproducible images (override OPENMS_INSIGHT_REF to bump). FROM node:21 AS openms-insight-build ARG OPENMS_INSIGHT_REPO=https://github.com/t0mdavid-m/OpenMS-Insight.git -ARG OPENMS_INSIGHT_REF=9b4c57a60e63a4319c392afd6000aa205cdcbe36 +ARG OPENMS_INSIGHT_REF=d886d2bfb7865a74ca8e75e5e5017824e0170468 # Bust the clone cache when REF tracks a moving branch (no-op for a pinned SHA). ADD https://api.github.com/repos/t0mdavid-m/OpenMS-Insight/commits/$OPENMS_INSIGHT_REF oi-version.json RUN git clone ${OPENMS_INSIGHT_REPO} /OpenMS-Insight \ diff --git a/content/FLASHDeconv/FLASHDeconvViewer.py b/content/FLASHDeconv/FLASHDeconvViewer.py index 2124ed4..9b12414 100644 --- a/content/FLASHDeconv/FLASHDeconvViewer.py +++ b/content/FLASHDeconv/FLASHDeconvViewer.py @@ -1,19 +1,15 @@ -import os - import streamlit as st from pathlib import Path -from src.common.common import page_setup, save_params +from src.common.common import page_setup, save_params, use_openms_insight from src.workflow.FileManager import FileManager from src.render.render import render_grid # Migration flag: render each experiment panel with the OpenMS-Insight engine -# (src.render_oi) instead of the legacy flash_viewer_grid. Default ON; set -# FLASHAPP_USE_OPENMS_INSIGHT=0 (or false/no/off) to fall back to the old grid. -USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "1").strip().lower() not in ( - "0", "false", "no", "off", "", -) +# (src.render_oi) instead of the legacy flash_viewer_grid. Default ON; opt out +# with FLASHAPP_USE_OPENMS_INSIGHT=0 (see src.common.common.use_openms_insight). +USE_OPENMS_INSIGHT = use_openms_insight() DEFAULT_LAYOUT = [['ms1_deconv_heat_map'], ['scan_table', 'mass_table'], ['anno_spectrum', 'deconv_spectrum'], ['3D_SN_plot']] diff --git a/content/FLASHQuant/FLASHQuantViewer.py b/content/FLASHQuant/FLASHQuantViewer.py index 90f3e6b..dea36da 100644 --- a/content/FLASHQuant/FLASHQuantViewer.py +++ b/content/FLASHQuant/FLASHQuantViewer.py @@ -1,20 +1,16 @@ -import os - import streamlit as st from pathlib import Path from src.workflow.FileManager import FileManager -from src.common.common import page_setup, save_params +from src.common.common import page_setup, save_params, use_openms_insight # from src.render.components import flash_viewer_grid_component, FlashViewerComponent, FLASHQuant from src.render.render import render_grid # Migration flag (shared across workflows): render with the OpenMS-Insight engine. -# Default ON; set FLASHAPP_USE_OPENMS_INSIGHT=0 (or false/no/off) to fall back to -# the legacy view. -USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "1").strip().lower() not in ( - "0", "false", "no", "off", "", -) +# Default ON; opt out with FLASHAPP_USE_OPENMS_INSIGHT=0 (see +# common.use_openms_insight). +USE_OPENMS_INSIGHT = use_openms_insight() # page initialization params = page_setup() diff --git a/content/FLASHTnT/FLASHTnTViewer.py b/content/FLASHTnT/FLASHTnTViewer.py index c830c6f..b9c327a 100644 --- a/content/FLASHTnT/FLASHTnTViewer.py +++ b/content/FLASHTnT/FLASHTnTViewer.py @@ -1,19 +1,15 @@ -import os - import streamlit as st from pathlib import Path -from src.common.common import page_setup, save_params +from src.common.common import page_setup, save_params, use_openms_insight from src.workflow.FileManager import FileManager from src.render.render import render_grid # Migration flag (shared with FLASHDeconv): render each experiment panel with the -# OpenMS-Insight engine instead of the legacy flash_viewer_grid. Default ON; set -# FLASHAPP_USE_OPENMS_INSIGHT=0 (or false/no/off) to fall back to the old grid. -USE_OPENMS_INSIGHT = os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "1").strip().lower() not in ( - "0", "false", "no", "off", "", -) +# OpenMS-Insight engine instead of the legacy flash_viewer_grid. Default ON; opt +# out with FLASHAPP_USE_OPENMS_INSIGHT=0 (see common.use_openms_insight). +USE_OPENMS_INSIGHT = use_openms_insight() DEFAULT_LAYOUT = [ diff --git a/docs/openms-insight-migration.md b/docs/openms-insight-migration.md index 9c66c62..5a069e7 100644 --- a/docs/openms-insight-migration.md +++ b/docs/openms-insight-migration.md @@ -119,8 +119,10 @@ panels; preserve save/load of layouts OpenMS-Insight's wheel is built by **hatchling**, which `force-include`s the pre-built Vue bundle at `openms_insight/js-component/dist` **only if it exists on disk** — and that bundle is gitignored. So a plain `pip install -git+https://…/OpenMS-Insight` (what the `requirements.txt` line would do) yields a -package with **no frontend**. Three install paths handle this correctly: +git+https://…/OpenMS-Insight` fails outright: hatchling raises `FileNotFoundError: +Forced include not found: …/openms_insight/js-component/dist` during the wheel +build. **Every path that runs a raw `pip install -r requirements.txt` must first +build the bundle and strip the git line**, or it will fail. The three paths do: - **Docker** (`Dockerfile`, `Dockerfile.arm`): a dedicated `openms-insight-build` node stage clones the repo at the pinned commit (`ARG OPENMS_INSIGHT_REF`, @@ -129,14 +131,23 @@ package with **no frontend**. Three install paths handle this correctly: python stage, which `pip install`s it. The `requirements.txt` `openms-insight` line is stripped before `pip install -r` (alongside `pyopenms`) so it installs exactly once, with a working bundle. Bump the image by overriding - `OPENMS_INSIGHT_REF` (or rely on the branch cache-bust `ADD`). -- **Local dev / CI / web sessions**: the SessionStart hook + `OPENMS_INSIGHT_REF`. +- **CI** (`.github/workflows/unit-tests.yml`): sets up node, clones OI on the + shared migration branch, `npm run build` + mirrors the dist, `pip install`s the + checkout, then installs the rest of `requirements.txt` **with the + `openms-insight` line stripped** (`grep -ivE '^openms-insight…'`). Tests + `pytest.importorskip("openms_insight")` and exercise data-prep only, so a + frontend is not strictly required, but installing the package is. +- **Local dev / web sessions**: the SessionStart hook (`.claude/hooks/session-start.sh`) does the same (strip line → `npm run build` → mirror dist → `pip install -e ../OpenMS-Insight`). Or run the Vue bundle in dev mode with `SVC_DEV_MODE=true`. -- **`requirements.txt`**: keeps the `git+…@` ref as a declarative pointer - for the migration branch; both install paths above strip and replace it, so it - is never the thing that actually provides the frontend. + +The `requirements.txt` `git+…@` ref is **only a declarative pointer** to +the migration branch — it is never what actually installs a working frontend, and +a raw `pip install -r requirements.txt` with that line present will fail (see +above). CI tracks the branch for integration signal; Docker pins a SHA for +reproducible images. --- diff --git a/src/common/common.py b/src/common/common.py index c7fb511..58978f4 100644 --- a/src/common/common.py +++ b/src/common/common.py @@ -32,6 +32,22 @@ OS_PLATFORM = sys.platform +def use_openms_insight() -> bool: + """Whether viewers render via the OpenMS-Insight engine (``src.render_oi``). + + Default ON; opt out by setting ``FLASHAPP_USE_OPENMS_INSIGHT`` to one of + ``0``/``false``/``no``/``off`` (or empty) to fall back to flash_viewer_grid. + Centralized here so the truthy/falsy set has a single source of truth. + """ + return os.environ.get("FLASHAPP_USE_OPENMS_INSIGHT", "1").strip().lower() not in ( + "0", + "false", + "no", + "off", + "", + ) + + def is_safe_workspace_name(name: str) -> bool: """ Check if a workspace name is safe (no path traversal characters). diff --git a/src/render_oi/deconv_viewer.py b/src/render_oi/deconv_viewer.py index b56bd15..dc87f73 100644 --- a/src/render_oi/deconv_viewer.py +++ b/src/render_oi/deconv_viewer.py @@ -309,7 +309,6 @@ def _build_sequence_view( per_scan = _load_polars(file_manager, dataset_id, "deconv_spectrum") peaks_long = ( explode_spectrum_long(per_scan) - .rename({"mass": "mass", "index": "index"}) .with_columns(pl.int_range(pl.len()).over("index").alias("peak_id")) ) diff --git a/src/render_oi/tnt_viewer.py b/src/render_oi/tnt_viewer.py index 76d3a4a..cb5a8c6 100644 --- a/src/render_oi/tnt_viewer.py +++ b/src/render_oi/tnt_viewer.py @@ -26,7 +26,13 @@ import polars as pl -from .deconv_viewer import _HEATMAP_SPEC, _load_pandas_pl, _load_polars, _oi_cache_dir +from .deconv_viewer import ( + _HEATMAP_SPEC, + _load_pandas, + _load_pandas_pl, + _load_polars, + _oi_cache_dir, +) logger = logging.getLogger(__name__) @@ -44,8 +50,8 @@ def _build_proteoform_scan_map(file_manager, dataset_id: str) -> Dict[int, Dict[ """proteinIndex → {scan, deconv_index} using the existing resolver.""" from src.render.scan_resolution import build_proteoform_scan_map - prot = _load_pandas_pl(file_manager, dataset_id, "protein_dfs").to_pandas() - scan = _load_pandas_pl(file_manager, dataset_id, "scan_table").to_pandas() + prot = _load_pandas(file_manager, dataset_id, "protein_dfs") + scan = _load_pandas(file_manager, dataset_id, "scan_table") return build_proteoform_scan_map(prot[["index", "Scan"]], scan[["index", "Scan"]]) @@ -281,9 +287,9 @@ def render_experiment_tnt( # Resolve proteinIndex → deconvIndex BEFORE rendering downstream panels so # the spectrum/mass/sequence filters see the right scan on this run. - scan_map = _build_proteoform_scan_map(file_manager, dataset_id) protein_index = state_manager.get_selection(PROTEIN) if protein_index is not None: + scan_map = _build_proteoform_scan_map(file_manager, dataset_id) entry = scan_map.get(int(protein_index)) deconv_index = entry["deconv_index"] if entry else None if state_manager.get_selection(DECONV) != deconv_index: From f94e5ed5144f483bafbe54add7ee81d56b43a0de Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 09:35:05 +0000 Subject: [PATCH 11/18] Fix FLASHTnT Sequence View 'Component unavailable' (parquet sequence_data) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A fresh FLASHTnT run stores sequence_data as a parquet dataset. _sequence_table called file_manager.get_results(...) WITHOUT use_pyarrow=True, so FileManager returned a pandas DataFrame; the code then fell through to reconstruct_all(df), which raised, and a bare 'except: return None' swallowed it -> build returned None -> the viewer showed 'Component unavailable: sequence_view'. The bundled example fixture is a stale .pkl.gz dict and the test's fake FileManager returned a Path for .pq, so both masked the bug. Fix: request use_pyarrow=True (FileManager then returns a pyarrow Dataset that reconstruct_all reads), keep the dict/.gz fallbacks for older caches, and stop swallowing failures so a genuine error surfaces as a render error instead of a silent 'unavailable'. Add tests/test_tnt_sequence_loader.py with a FileManager fake that mirrors real loader semantics (pandas by default, pyarrow Dataset on use_pyarrow) — it fails on the pre-fix code and passes on the fix, independent of the bundled data. This restores the proteoform sequence + per-residue coverage shading. Remaining TnT Sequence View parity (b/y fragment-ion flags, matching-fragments table, and the theoretical/observed/delta mass header) is tracked separately: those need peaks_data and/or an OpenMS-Insight SequenceView capability addition. Verified: full suite 73 passed, 2 skipped. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- .../default/flashtnt/cache/cache.db | Bin 32768 -> 32768 bytes src/render_oi/tnt_viewer.py | 20 ++-- tests/test_tnt_sequence_loader.py | 87 ++++++++++++++++++ 3 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 tests/test_tnt_sequence_loader.py diff --git a/example-data/workspaces/default/flashtnt/cache/cache.db b/example-data/workspaces/default/flashtnt/cache/cache.db index c6f203f4ae9699b232a20b80d413ab6fa149d0a6..899103169c3ba29443ec20651930379c45e63f32 100644 GIT binary patch delta 141 zcmZo@U}|V!njkI6%D}+D0mN)T%nZcp6LpM5SsC=oI(d0^Gq7+uFmO5WALNbW`?Im} z3@2Ah1tYt-vNB_*`Q})zSSAS_g_O+Vf}F(4_`Jm2RD}@Nh!9PFQ-;M88|QO1OEa^J XtEw`#R&GAZ)4(W#P3`7Cd@O|k(B>sS delta 103 zcmZo@U}|V!njkI6%)r3F0mN)T3}UNJ)G-!iX3)#Jz{}^xz`_;Gz!l7YkY_jFpUr{_ z_c^&*QW@FBm6aJg1vh(f#WHbgHghnui>sXEQJ90 C!xtF< diff --git a/src/render_oi/tnt_viewer.py b/src/render_oi/tnt_viewer.py index cb5a8c6..18ed766 100644 --- a/src/render_oi/tnt_viewer.py +++ b/src/render_oi/tnt_viewer.py @@ -64,23 +64,27 @@ def _sequence_table(file_manager, dataset_id: str) -> Optional[pl.LazyFrame]: ``precursor_charge``, and per-proteoform coverage arrays so OI's extended SequenceView can shade residues. """ - res = file_manager.get_results(dataset_id, ["sequence_data"], partial=True) + res = file_manager.get_results( + dataset_id, ["sequence_data"], partial=True, use_pyarrow=True + ) if "sequence_data" not in res: return None p = res["sequence_data"] - # Example caches store sequence_data as a pickled dict (.pkl.gz); newer ones - # as a parquet dataset. Handle the pickle path (what the bundled data uses). + # A fresh FLASHTnT run stores sequence_data as a parquet dataset; with + # use_pyarrow=True FileManager hands back a pyarrow Dataset (without it we'd get + # a pandas DataFrame that reconstruct_all can't read — the cause of the + # "Component unavailable: sequence_view" regression). Older/example caches store + # it as a pickled {pid: entry} dict (FileManager unpickles .pkl.gz for us, but a + # path may also be handed back). Let failures propagate so they surface as a + # render error rather than a silent None. if isinstance(p, Path) and p.suffix == ".gz": data = _load_pickle_gz(p) elif isinstance(p, dict): data = p else: - try: - from src.render.sequence_data_store import reconstruct_all + from src.render.sequence_data_store import reconstruct_all - data = reconstruct_all(p) - except Exception: - return None + data = reconstruct_all(p) rows = [] for pid in sorted(data): diff --git a/tests/test_tnt_sequence_loader.py b/tests/test_tnt_sequence_loader.py new file mode 100644 index 0000000..dff47cb --- /dev/null +++ b/tests/test_tnt_sequence_loader.py @@ -0,0 +1,87 @@ +"""Regression test for the FLASHTnT Sequence View data loader. + +A fresh FLASHTnT run stores ``sequence_data`` as a *parquet dataset*. The real +``FileManager.get_results`` returns a pandas DataFrame for such a column *unless* +``use_pyarrow=True`` is requested. ``_sequence_table`` must request pyarrow so +``reconstruct_all`` can read the dataset; otherwise it raises, gets swallowed, and +the Sequence View renders "Component unavailable: sequence_view". + +This test uses a FileManager fake that mirrors the real loader semantics +(pandas by default, pyarrow Dataset on ``use_pyarrow=True``), so it fails on the +pre-fix code and passes on the fix — independent of the bundled example data. +""" + +import pandas as pd +import pyarrow as pa +import pyarrow.dataset as ds +import pyarrow.parquet as pq +import pytest + +pl = pytest.importorskip("polars") +pytest.importorskip("openms_insight") + + +def _write_sequence_data(path): + table = pa.table( + { + "proteoform_index": pa.array([0, 1], type=pa.int64()), + "sequence": pa.array( + [list("PEPTIDER"), list("ACDEFGHK")], type=pa.list_(pa.string()) + ), + "coverage": pa.array( + [[0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0], [0.0] * 8], + type=pa.list_(pa.float64()), + ), + "maxCoverage": pa.array([2.0, 0.0], type=pa.float64()), + } + ) + pq.write_table(table, path) + + +class _RealisticFM: + """Mirrors FileManager.get_results for a stored_data ``.pq`` column: pandas by + default, a pyarrow Dataset when ``use_pyarrow=True`` (see + src/workflow/FileManager.py:get_results).""" + + def __init__(self, pq_path): + self._pq = pq_path + + def get_results( + self, dataset_id, names, partial=False, use_pyarrow=False, use_polars=False + ): + out = {} + for n in names: + if n != "sequence_data": + continue + if use_pyarrow: + out[n] = ds.dataset(self._pq, format="parquet") + elif use_polars: + out[n] = pl.scan_parquet(self._pq) + else: + out[n] = pd.read_parquet(self._pq) # the trap the bug fell into + return out + + +def test_sequence_table_reads_parquet_dataset(tmp_path): + from src.render_oi.tnt_viewer import _sequence_table + + pqp = tmp_path / "sequence_data.pq" + _write_sequence_data(pqp) + fm = _RealisticFM(pqp) + + # Sanity: a default get_results would hand back a pandas DataFrame (the form + # that broke reconstruct_all). The loader must avoid this by using pyarrow. + assert isinstance( + fm.get_results("ds", ["sequence_data"])["sequence_data"], pd.DataFrame + ) + + lf = _sequence_table(fm, "ds") + assert lf is not None, "parquet sequence_data must load (regression: use_pyarrow)" + df = lf.collect() + assert df.height == 2 + assert set(df["proteoform_index"].to_list()) == {0, 1} + + row0 = df.filter(pl.col("proteoform_index") == 0).row(0, named=True) + assert row0["sequence"] == "PEPTIDER" + assert row0["coverage"][:3] == [0.0, 1.0, 2.0] + assert row0["max_coverage"] == 2.0 From 4c09d40192afe400e9f50feb6988c1939e810762 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 09:58:38 +0000 Subject: [PATCH 12/18] FLASHTnT Sequence View: feed mass header + precomputed fragments + peaks Now that OpenMS-Insight's SequenceView accepts a theoretical/observed mass header and precomputed per-residue fragment masses (OI ec8e061), wire the FLASHTnT viewer to reach legacy parity: - _sequence_table now also carries, per proteoform: theoretical_mass and observed_mass (= computed_mass / ProteoformMass; the -1.0 'unmatched' sentinel maps to null so the header is omitted), plus precomputed fragment_masses_{a..z} (list[list[float]] with mod-ambiguity variants) straight from the entry. - new _peaks_table builds the observed peaks SequenceView matches against: each proteoform's scan deconv peaks (neutral MonoMass + SumIntensity from deconv_spectrum), stamped with proteoform_index via the proteoform->scan map. - build_component_tnt's sequence_view passes theoretical_mass_column / observed_mass_column / fragment_mass_columns + peaks_data + interactivity so the Theo/Obs/Delta header, b/y annotation flags, and Matching Fragments table render from the precomputed (modification-aware) masses rather than a bare-sequence recompute. Also bump the Docker OPENMS_INSIGHT_REF to ec8e061 (the enhanced OI). Verified: full suite 73 passed, 2 skipped; the realdata test builds sequence_view through the new path against the bundled fixture, and a strengthened loader test locks the mass/fragment plumbing + the -1.0 sentinel. Browser verification of the Vue rendering is still required (see the checklist in chat). https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- Dockerfile | 2 +- Dockerfile.arm | 2 +- src/render_oi/tnt_viewer.py | 104 +++++++++++++++++++++++++++--- tests/test_tnt_sequence_loader.py | 15 +++++ 4 files changed, 111 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index 663183c..34cceeb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -154,7 +154,7 @@ RUN npm run build # validated commit for reproducible images (override OPENMS_INSIGHT_REF to bump). FROM node:21 AS openms-insight-build ARG OPENMS_INSIGHT_REPO=https://github.com/t0mdavid-m/OpenMS-Insight.git -ARG OPENMS_INSIGHT_REF=d886d2bfb7865a74ca8e75e5e5017824e0170468 +ARG OPENMS_INSIGHT_REF=ec8e06142dd7b9cb01746d39f791e87d07810be2 # Bust the clone cache when REF tracks a moving branch (no-op for a pinned SHA). ADD https://api.github.com/repos/t0mdavid-m/OpenMS-Insight/commits/$OPENMS_INSIGHT_REF oi-version.json RUN git clone ${OPENMS_INSIGHT_REPO} /OpenMS-Insight \ diff --git a/Dockerfile.arm b/Dockerfile.arm index 2319ba1..501bd71 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -149,7 +149,7 @@ RUN npm run build # validated commit for reproducible images (override OPENMS_INSIGHT_REF to bump). FROM node:21 AS openms-insight-build ARG OPENMS_INSIGHT_REPO=https://github.com/t0mdavid-m/OpenMS-Insight.git -ARG OPENMS_INSIGHT_REF=d886d2bfb7865a74ca8e75e5e5017824e0170468 +ARG OPENMS_INSIGHT_REF=ec8e06142dd7b9cb01746d39f791e87d07810be2 # Bust the clone cache when REF tracks a moving branch (no-op for a pinned SHA). ADD https://api.github.com/repos/t0mdavid-m/OpenMS-Insight/commits/$OPENMS_INSIGHT_REF oi-version.json RUN git clone ${OPENMS_INSIGHT_REPO} /OpenMS-Insight \ diff --git a/src/render_oi/tnt_viewer.py b/src/render_oi/tnt_viewer.py index 18ed766..19ef8ec 100644 --- a/src/render_oi/tnt_viewer.py +++ b/src/render_oi/tnt_viewer.py @@ -55,6 +55,65 @@ def _build_proteoform_scan_map(file_manager, dataset_id: str) -> Dict[int, Dict[ return build_proteoform_scan_map(prot[["index", "Scan"]], scan[["index", "Scan"]]) +# Ion types whose precomputed per-residue fragment masses are forwarded to +# SequenceView (the proteoform entry carries a/b/c/x/y/z). +_ION_TYPES = ("a", "b", "c", "x", "y", "z") + + +def _observed_mass(value): + """ProteoformMass, or None when FLASHTnT's -1.0 'unmatched' sentinel is set + (so SequenceView omits the observed / Δ-mass header).""" + try: + mass = float(value) + except (TypeError, ValueError): + return None + return mass if mass > 0 else None + + +def _frag_masses(value): + """Per-residue precomputed fragment masses as list[list[float]] (the inner list + holds modification-ambiguity variants); tolerates a flat list[float].""" + if not value: + return [] + out = [] + for residue in value: + if isinstance(residue, (list, tuple)): + out.append([float(x) for x in residue]) + else: + out.append([float(residue)]) + return out + + +def _peaks_table( + file_manager, dataset_id: str, scan_map: Dict[int, Dict[str, int]] +) -> Optional[pl.LazyFrame]: + """Observed deconvolved peaks per proteoform, for SequenceView fragment matching. + + SequenceView matches the precomputed fragment masses against observed peaks. We + supply each proteoform's scan peaks (neutral ``MonoMass`` + ``SumIntensity`` from + ``deconv_spectrum``) stamped with ``proteoform_index`` so the component's + ``filters`` select the right peaks for the current selection. + """ + pairs = [ + {"proteoform_index": int(pid), "deconv_index": int(e["deconv_index"])} + for pid, e in (scan_map or {}).items() + if e.get("deconv_index") is not None + ] + if not pairs: + return None + spec = _load_polars(file_manager, dataset_id, "deconv_spectrum") + long = ( + spec.select(["index", "MonoMass", "SumIntensity"]) + .explode(["MonoMass", "SumIntensity"]) + .rename({"MonoMass": "mass", "SumIntensity": "intensity"}) + .with_columns(pl.int_range(pl.len()).over("index").alias("peak_id")) + ) + mapping = pl.DataFrame(pairs).lazy() + return long.join( + mapping, left_on="index", right_on="deconv_index", how="inner" + ).select(["proteoform_index", "peak_id", "mass", "intensity"]) + + def _sequence_table(file_manager, dataset_id: str) -> Optional[pl.LazyFrame]: """Build a one-row-per-proteoform sequence frame for SequenceView. @@ -90,18 +149,31 @@ def _sequence_table(file_manager, dataset_id: str) -> Optional[pl.LazyFrame]: for pid in sorted(data): entry = data[pid] seq = entry.get("sequence") or [] - rows.append( - { - "proteoform_index": int(pid), - "sequence": "".join(seq) if isinstance(seq, list) else str(seq), - "precursor_charge": 1, - "coverage": [float(c) for c in (entry.get("coverage") or [])], - "max_coverage": float(entry.get("maxCoverage") or 0.0), - } - ) + row = { + "proteoform_index": int(pid), + "sequence": "".join(seq) if isinstance(seq, list) else str(seq), + "precursor_charge": 1, + "coverage": [float(c) for c in (entry.get("coverage") or [])], + "max_coverage": float(entry.get("maxCoverage") or 0.0), + # Header masses: theoretical from the sequence, observed = ProteoformMass + # (FLASHTnT stores -1.0 when unmatched -> None so the header omits it). + "theoretical_mass": float(entry.get("theoretical_mass") or 0.0), + "observed_mass": _observed_mass(entry.get("computed_mass")), + } + # Precomputed per-residue fragment masses (account for proteoform mods); + # SequenceView matches these against the scan peaks instead of recomputing. + for ion in _ION_TYPES: + row[f"fragment_masses_{ion}"] = _frag_masses( + entry.get(f"fragment_masses_{ion}") + ) + rows.append(row) if not rows: return None - return pl.DataFrame(rows).lazy() + return ( + pl.DataFrame(rows) + .with_columns(pl.col("observed_mass").cast(pl.Float64, strict=False)) + .lazy() + ) def build_component_tnt( @@ -200,6 +272,8 @@ def build_component_tnt( if seq_tbl is None: return None settings = _tnt_settings(file_manager, dataset_id) + scan_map = _build_proteoform_scan_map(file_manager, dataset_id) + peaks_tbl = _peaks_table(file_manager, dataset_id, scan_map) sv = SequenceView( cache_id=cid("sequence_view"), sequence_data=seq_tbl, @@ -207,6 +281,16 @@ def build_component_tnt( deconvolved=True, coverage_column="coverage", max_coverage_column="max_coverage", + # Header masses + precomputed fragment-ion masses (so b/y flags and the + # matching-fragments table reflect the modified proteoform), matched + # against the proteoform's observed scan peaks. + theoretical_mass_column="theoretical_mass", + observed_mass_column="observed_mass", + fragment_mass_columns={ + ion: f"fragment_masses_{ion}" for ion in _ION_TYPES + }, + peaks_data=peaks_tbl, + interactivity={"peak": "peak_id"}, annotation_config={ "ion_types": settings.get("ion_types", ["b", "y"]), "tolerance": settings.get("tolerance", 10.0), diff --git a/tests/test_tnt_sequence_loader.py b/tests/test_tnt_sequence_loader.py index dff47cb..5f59de7 100644 --- a/tests/test_tnt_sequence_loader.py +++ b/tests/test_tnt_sequence_loader.py @@ -22,6 +22,7 @@ def _write_sequence_data(path): + fb = [[[float(i)] for i in range(8)], [[float(i)] for i in range(8)]] table = pa.table( { "proteoform_index": pa.array([0, 1], type=pa.int64()), @@ -33,6 +34,12 @@ def _write_sequence_data(path): type=pa.list_(pa.float64()), ), "maxCoverage": pa.array([2.0, 0.0], type=pa.float64()), + "theoretical_mass": pa.array([1000.0, 2000.0], type=pa.float64()), + # proteoform 1 carries FLASHTnT's -1.0 "unmatched" sentinel. + "computed_mass": pa.array([1001.5, -1.0], type=pa.float64()), + "fragment_masses_b": pa.array( + fb, type=pa.list_(pa.list_(pa.float64())) + ), } ) pq.write_table(table, path) @@ -85,3 +92,11 @@ def test_sequence_table_reads_parquet_dataset(tmp_path): assert row0["sequence"] == "PEPTIDER" assert row0["coverage"][:3] == [0.0, 1.0, 2.0] assert row0["max_coverage"] == 2.0 + + # Header masses + precomputed fragments flow through; the -1.0 sentinel on + # proteoform 1 maps to a null observed mass so the header omits it. + assert row0["theoretical_mass"] == 1000.0 + assert row0["observed_mass"] == 1001.5 + assert row0["fragment_masses_b"][0] == [0.0] + row1 = df.filter(pl.col("proteoform_index") == 1).row(0, named=True) + assert row1["observed_mass"] is None From 51b1534d4bb61adc941d93b318bde52758216d69 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 10:58:34 +0000 Subject: [PATCH 13/18] FLASHDeconv parity: heatmap click cross-links, 3D x=mz*charge, raw y-label Three regressions found by the parity audit of the OpenMS-Insight Deconv viewer: - Heatmaps had no click interactivity, so the legacy heatmap->spectra/mass/3D cross-link was dead. Restore it: deconv heatmaps emit scanIndex+massIndex, raw heatmaps emit scanIndex (the caches already carry scan_idx/mass_idx, and OI Heatmap preserves interactivity columns through compression). - 3D S/N plot plotted raw m/z on x; the legacy plots the deconvoluted mass (mz * charge). explode_signal_peaks_long keeps mz/charge separate, so derive a 'mass' column and point Scatter3D's mz_column at it. - Raw heatmaps' y-axis was mislabeled 'Monoisotopic mass'; the legacy labels raw heatmaps 'm/z' (deconv stays 'Monoisotopic mass'). Verified: deconv real-data suite 11 passed; full suite 73 passed/2 skipped; and mass == mz*charge confirmed on the bundled fixture (263k peaks). https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- src/render_oi/deconv_viewer.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/render_oi/deconv_viewer.py b/src/render_oi/deconv_viewer.py index dc87f73..84b1813 100644 --- a/src/render_oi/deconv_viewer.py +++ b/src/render_oi/deconv_viewer.py @@ -121,6 +121,7 @@ def build_component( if comp_name in _HEATMAP_SPEC: title, cache_name = _HEATMAP_SPEC[comp_name] data = _load_polars(file_manager, dataset_id, cache_name) + is_deconv = "deconv" in comp_name hm = Heatmap( cache_id=cid(comp_name), data=data, @@ -129,7 +130,14 @@ def build_component( intensity_column="intensity", title=title, x_label="Retention time", - y_label="Monoisotopic mass", + y_label="Monoisotopic mass" if is_deconv else "m/z", + # Click a point -> scanIndex (all heatmaps) plus massIndex (deconv only), + # restoring the legacy heatmap cross-links into the spectra/mass/3D panels. + interactivity=( + {SCAN: "scan_idx", MASS: "mass_idx"} + if is_deconv + else {SCAN: "scan_idx"} + ), zoom_identifier=f"{comp_name}_zoom", cache_path=cache_dir, ) @@ -201,12 +209,17 @@ def build_component( # ---- 3D S/N plot (Scatter3D; scanIndex required, massIndex optional) ---- if comp_name == "3D_SN_plot": per_scan = _load_polars(file_manager, dataset_id, "threedim_SN_plot") - long = explode_signal_peaks_long(per_scan) + # x-axis is the deconvoluted mass (mz * charge), matching the legacy 3D plot + # (it plotted peak[1]*peak[3]); the long format keeps mz and charge separate. + long = explode_signal_peaks_long(per_scan).with_columns( + (pl.col("mz") * pl.col("charge")).alias("mass") + ) s3 = Scatter3D( cache_id=cid("3D_SN_plot"), data=long, filters={SCAN: "index"}, optional_filters={MASS: "mass_id"}, + mz_column="mass", title="Precursor Signals", cache_path=cache_dir, ) From 4c735034cd06bb1ff91b9e05fdcee0d4223ea016 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 11:13:42 +0000 Subject: [PATCH 14/18] FLASHTnT: wire the augmented-spectrum tagger overlay Now that OI's LinePlot supports a tagger overlay (OI 2adb7d7), feed it the data: - tag_table gains interactivity={tagIndex: TagIndex}, so clicking a tag records the selection. - render_experiment_tnt resolves the selected tagIndex into a tagData dict (_tag_data: parses tag_dfs.mzs into fragment masses, carries TagSequence / StartPos / EndPos, flags N-terminal tags via Nmass==-1) and pushes it into the panel StateManager under 'tagData' (alongside the existing proteinIndex -> deconvIndex resolution). set_selection no-ops when unchanged. - combined_spectrum's LinePlot now builds its primary data via _deconv_signal_peaks_long, which carries a per-peak signal_peaks column of [mz, intensity, charge] triplets (from combined_spectrum.SignalPeaks, dropping the bin index), and is constructed with tag_overlay=True + signal_peaks_column='signal_peaks'. With a tag selected, OI then highlights the tag-matched sticks and draws the per-charge buttons + inter-residue amino-acid arrows over the spectrum. The sequence-view residue -> AApos cross-link (gold selected-residue highlight + tag-table residue filtering) is still pending and tracked separately; the overlay renders fully without it (selectedAA defaults to none). Also bump the Docker OPENMS_INSIGHT_REF to 2adb7d7. Verified: full suite 74 passed/2 skipped (new test_tagger_overlay_data locks the signal_peaks triplets + tagData shape against the bundled fixture). The Vue rendering itself needs browser verification (see checklist in chat). https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- Dockerfile | 2 +- Dockerfile.arm | 2 +- src/render_oi/tnt_viewer.py | 73 ++++++++++++++++++++++++++++++- tests/test_tnt_viewer_realdata.py | 27 ++++++++++++ 4 files changed, 101 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 34cceeb..55f0492 100644 --- a/Dockerfile +++ b/Dockerfile @@ -154,7 +154,7 @@ RUN npm run build # validated commit for reproducible images (override OPENMS_INSIGHT_REF to bump). FROM node:21 AS openms-insight-build ARG OPENMS_INSIGHT_REPO=https://github.com/t0mdavid-m/OpenMS-Insight.git -ARG OPENMS_INSIGHT_REF=ec8e06142dd7b9cb01746d39f791e87d07810be2 +ARG OPENMS_INSIGHT_REF=2adb7d7eafecbbd798664e76945770f87e284c24 # Bust the clone cache when REF tracks a moving branch (no-op for a pinned SHA). ADD https://api.github.com/repos/t0mdavid-m/OpenMS-Insight/commits/$OPENMS_INSIGHT_REF oi-version.json RUN git clone ${OPENMS_INSIGHT_REPO} /OpenMS-Insight \ diff --git a/Dockerfile.arm b/Dockerfile.arm index 501bd71..3080ab3 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -149,7 +149,7 @@ RUN npm run build # validated commit for reproducible images (override OPENMS_INSIGHT_REF to bump). FROM node:21 AS openms-insight-build ARG OPENMS_INSIGHT_REPO=https://github.com/t0mdavid-m/OpenMS-Insight.git -ARG OPENMS_INSIGHT_REF=ec8e06142dd7b9cb01746d39f791e87d07810be2 +ARG OPENMS_INSIGHT_REF=2adb7d7eafecbbd798664e76945770f87e284c24 # Bust the clone cache when REF tracks a moving branch (no-op for a pinned SHA). ADD https://api.github.com/repos/t0mdavid-m/OpenMS-Insight/commits/$OPENMS_INSIGHT_REF oi-version.json RUN git clone ${OPENMS_INSIGHT_REPO} /OpenMS-Insight \ diff --git a/src/render_oi/tnt_viewer.py b/src/render_oi/tnt_viewer.py index 19ef8ec..013426b 100644 --- a/src/render_oi/tnt_viewer.py +++ b/src/render_oi/tnt_viewer.py @@ -39,6 +39,7 @@ # State identifiers PROTEIN = "proteinIndex" DECONV = "deconvIndex" +TAG = "tagIndex" def _load_pickle_gz(path: Path): @@ -176,6 +177,58 @@ def _sequence_table(file_manager, dataset_id: str) -> Optional[pl.LazyFrame]: ) +def _tag_data(file_manager, dataset_id: str, tag_index) -> Optional[dict]: + """Build the tagger-overlay ``tagData`` for the selected tag row. + + Mirrors the legacy tag-table click: parse the comma-joined fragment ``mzs``, + carry the tag sequence + span, and flag N-terminal tags (``Nmass == -1``). + ``selectedAA`` is left unset (-1000) until the sequence-view ``AApos`` + cross-link lands. + """ + tags = ( + _load_polars(file_manager, dataset_id, "tag_dfs") + .filter(pl.col("TagIndex") == int(tag_index)) + .collect() + ) + if tags.height == 0: + return None + r = tags.row(0, named=True) + masses = [ + float(m) + for m in str(r.get("mzs") or "").split(",") + if m.strip() and float(m) != 0.0 + ] + return { + "masses": masses, + "sequence": str(r.get("TagSequence") or ""), + "nTerminal": float(r.get("Nmass", -1) or -1) == -1, + "startPos": int(r.get("StartPos") or 0), + "endPos": int(r.get("EndPos") or 0), + "selectedAA": -1000, + } + + +def _deconv_signal_peaks_long(per_scan: pl.LazyFrame) -> pl.LazyFrame: + """Primary deconvolved spectrum with a per-peak ``signal_peaks`` column. + + Each row (one deconvolved mass) carries its constituent peaks as + ``[mz, intensity, charge]`` triplets (``combined_spectrum.SignalPeaks`` stores + ``[binIdx, mz, intensity, charge]``; we drop the bin index), aligned 1:1 with + the primary sticks so LinePlot's tagger overlay can draw the per-charge buttons. + """ + return ( + per_scan.select(["index", "MonoMass", "SumIntensity", "SignalPeaks"]) + .explode(["MonoMass", "SumIntensity", "SignalPeaks"]) + .with_columns( + pl.col("SignalPeaks") + .list.eval(pl.element().list.slice(1, 3)) + .alias("signal_peaks") + ) + .rename({"MonoMass": "mass", "SumIntensity": "intensity"}) + .select(["index", "mass", "intensity", "signal_peaks"]) + ) + + def build_component_tnt( comp_name: str, dataset_id: str, @@ -239,6 +292,7 @@ def build_component_tnt( cache_id=cid("tag_table"), data=data.select(cols) if cols else data, filters={PROTEIN: "ProteinIndex"}, + interactivity={TAG: "TagIndex"}, index_field="TagIndex", title="Tag Table", cache_path=cache_dir, @@ -248,7 +302,8 @@ def build_component_tnt( # ---- Combined / augmented spectrum (deconv primary + annotated overlay) ---- if comp_name == "combined_spectrum": per_scan = _load_polars(file_manager, dataset_id, "combined_spectrum") - deconv_long, anno_long = explode_combined_spectrum_long(per_scan) + _, anno_long = explode_combined_spectrum_long(per_scan) + deconv_long = _deconv_signal_peaks_long(per_scan) lp = LinePlot( cache_id=cid("combined_spectrum"), data=deconv_long, @@ -259,6 +314,11 @@ def build_component_tnt( overlay_x_column="mass", overlay_y_column="intensity", overlay_name="Annotated", + # Tagger overlay: when a tag is selected (tagData pushed into state by + # render_experiment_tnt), highlight matched sticks + draw per-charge + # buttons and inter-residue amino-acid arrows over the spectrum. + tag_overlay=True, + signal_peaks_column="signal_peaks", title="Augmented Deconvolved Spectrum", x_label="Monoisotopic Mass", y_label="Intensity", @@ -383,6 +443,17 @@ def render_experiment_tnt( if state_manager.get_selection(DECONV) != deconv_index: state_manager.set_selection(DECONV, deconv_index) + # Resolve the selected tag -> tagData so the augmented spectrum's tagger + # overlay can highlight tag-matched sticks and draw the charge buttons / + # inter-residue amino-acid arrows. set_selection no-ops when unchanged. + tag_index = state_manager.get_selection(TAG) + tag_data = ( + _tag_data(file_manager, dataset_id, tag_index) + if tag_index is not None + else None + ) + state_manager.set_selection("tagData", tag_data) + for row_index, row in enumerate(layout_rows): if not row: continue diff --git a/tests/test_tnt_viewer_realdata.py b/tests/test_tnt_viewer_realdata.py index 32430a3..98cf5dd 100644 --- a/tests/test_tnt_viewer_realdata.py +++ b/tests/test_tnt_viewer_realdata.py @@ -183,3 +183,30 @@ def test_combined_spectrum_overlay(fake_fm, monkeypatch, tmp_path): assert len(vd["plotData"]) == dl.filter(pl.col("index") == 0).height assert len(vd.get("plotDataOverlay", [])) == al.filter(pl.col("index") == 0).height assert lp._get_component_args().get("has_overlay") is True + + +def test_tagger_overlay_data(fake_fm): + """Tagger plumbing: per-peak signal_peaks triplets + tagData from a tag row.""" + from src.render_oi.tnt_viewer import ( + _deconv_signal_peaks_long, + _load_polars, + _tag_data, + ) + + per_scan = _load_polars(fake_fm, "ds", "combined_spectrum") + dl = _deconv_signal_peaks_long(per_scan).collect() + assert {"index", "mass", "intensity", "signal_peaks"} <= set(dl.columns) + sp = dl.row(0, named=True)["signal_peaks"] + assert sp and len(sp[0]) == 3 # [mz, intensity, charge] (binIdx dropped) + + tid = pl.scan_parquet(_DS / "tag_dfs.pq").collect().row(0, named=True)["TagIndex"] + td = _tag_data(fake_fm, "ds", tid) + assert set(td) == { + "masses", + "sequence", + "nTerminal", + "startPos", + "endPos", + "selectedAA", + } + assert isinstance(td["masses"], list) and isinstance(td["sequence"], str) From fd283f17ed515b64f1b3c5091ac5750146c42b0f Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 12:35:41 +0000 Subject: [PATCH 15/18] Parity polish: Deconv/TnT table column-defs, charge labels, best-per-spectrum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Close the lower-severity parity gaps from the multi-angle audit (FLASHApp-side; all use existing OpenMS-Insight Table/LinePlot params). FLASHDeconv (deconv_viewer.py): - scan_table / mass_table: legacy column_definitions (titles, descriptive headerTooltips, numeric formatters) recovered from the legacy bundle, built only from the columns actually present. - deconv_spectrum: per-peak charge labels (z=N) via annotation_column, derived locally from combined_spectrum.SignalPeaks (most-intense peak's charge per mass). anno_spectrum left unannotated (its series carries no charge data). - 3D_SN_plot: dynamic title 'Precursor Signals' / 'Mass Signals' by selection. (Confirmed the legacy does NOT default to precursor-only -> all-masses-for-scan is already correct; not a gap.) FLASHTnT (tnt_viewer.py): - protein_table: restore dropped columns (Scan/length/MatchingFragments/ModCount/ TagCount/Score/ProteoformLevelQvalue/...) with titles+tooltips, initial_sort by Score desc, go_to_fields; 'Best per spectrum' toggle (default on, matching legacy) keeping the top-Score row per Scan. - tag_table: restore Nmass/Cmass, column_definitions, Score-desc sort, go-to. - FLASHTnT's -1.0 'unmatched' sentinel mapped to null for mass/q-value columns so cells render blank (OI column_definitions can't carry inline JS formatters). docs: correct the Quant section — neither engine implements isotope hover or conflict-resolution highlighting (conflict_resolution_dfs is parsed but unused); FeatureView gains per-isotope-trace breaks. Verified: full suite 83 passed, 2 skipped (+9 new tests across the two viewers). https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- docs/openms-insight-migration.md | 8 +- src/render_oi/deconv_viewer.py | 246 +++++++++++++++++++- src/render_oi/tnt_viewer.py | 336 ++++++++++++++++++++++++--- tests/test_deconv_viewer_realdata.py | 184 +++++++++++++++ tests/test_tnt_viewer_realdata.py | 143 ++++++++++++ 5 files changed, 879 insertions(+), 38 deletions(-) diff --git a/docs/openms-insight-migration.md b/docs/openms-insight-migration.md index 5a069e7..af47cd8 100644 --- a/docs/openms-insight-migration.md +++ b/docs/openms-insight-migration.md @@ -260,14 +260,16 @@ manager (fixed single view). | component | preserved | status | |---|---|---| | Feature-group `Table` | scalar summary columns; click sets `featureGroup` | ✅ 1437 rows | -| `FeatureView` | per-charge 3D traces (x=`mz`, y=`rt`, z=`intensity`), isotope hover; `filters={featureGroup:feature_group}` | ✅ exact per-group point counts (0/5/100 → 1384/239/208) | +| `FeatureView` | per-charge 3D traces (x=`mz`, y=`rt`, z=`intensity`), per-isotope-trace polyline breaks; `filters={featureGroup:feature_group}` | ✅ exact per-group point counts (0/5/100 → 1384/239/208) | ### 👁 Browser checklist (FLASHQuant) 1. Feature-group table renders all groups; selecting a group updates the FeatureView to that group's traces. -2. Traces colored per charge; isotope info in hover; conflict-resolution - highlighting if a `conflict_resolution_dfs` cache is present. +2. Traces colored per charge, with per-isotope-trace polyline breaks (matching the + legacy). Note: neither engine implements isotope hover or conflict-resolution + highlighting — the `conflict_resolution_dfs` cache is parsed at upload but + unused by both renderers, so its absence is not a regression. ## After all three checklists pass diff --git a/src/render_oi/deconv_viewer.py b/src/render_oi/deconv_viewer.py index 84b1813..dd0f896 100644 --- a/src/render_oi/deconv_viewer.py +++ b/src/render_oi/deconv_viewer.py @@ -152,6 +152,10 @@ def build_component( interactivity={SCAN: "index"}, index_field="index", title="Scan Table", + # Legacy flash_viewer_grid column titles / tooltips / numeric + # formatting (recovered from the built bundle's TabulatorScanTable + # columnDefinitions). Built only from the columns actually present. + column_definitions=_scan_table_column_definitions(data), cache_path=cache_dir, ) return lambda: tbl(key=skey("scan_table"), state_manager=state_manager) @@ -168,20 +172,33 @@ def build_component( interactivity={MASS: "mass_id"}, index_field="mass_id", title="Mass Table", + # Legacy flash_viewer_grid TabulatorMassTable column titles / + # tooltips / numeric formatting (recovered from the built bundle), + # built only from the exploded columns actually present. + column_definitions=_mass_table_column_definitions(long), cache_path=cache_dir, ) return lambda: tbl(key=skey("mass_table"), state_manager=state_manager) # ---- Deconvolved spectrum (LinePlot, filtered by scanIndex) ---- if comp_name == "deconv_spectrum": - per_scan = _load_polars(file_manager, dataset_id, "deconv_spectrum") - long = explode_spectrum_long(per_scan) + # Build from combined_spectrum when available: it carries the SAME + # deconvolved sticks (MonoMass/SumIntensity) PLUS the per-mass SignalPeaks + # ([binIdx, mz, intensity, charge]), letting us derive a per-peak charge + # label (z=N) locally — without touching long_format.py. Fall back to the + # plain deconv_spectrum cache (no charge data) when combined is absent. + long, ann_col = _deconv_spectrum_with_charge( + file_manager, dataset_id, explode_spectrum_long + ) lp = LinePlot( cache_id=cid("deconv_spectrum"), data=long, filters={SCAN: "index"}, x_column="mass", y_column="intensity", + # Per-peak charge label on each deconvolved stick (legacy showed the + # charge state next to peaks); None when no charge data is available. + annotation_column=ann_col, title="Deconvolved Spectrum", x_label="Monoisotopic Mass", y_label="Intensity", @@ -193,6 +210,10 @@ def build_component( if comp_name == "anno_spectrum": per_scan = _load_polars(file_manager, dataset_id, "combined_spectrum") _deconv_long, anno_long = explode_combined_spectrum_long(per_scan) + # No per-peak charge label here: the annotated series (MonoMass_Anno / + # SumIntensity_Anno) carries no charge information in these caches (only + # the deconvolved series has the SignalPeaks charge constituents), so we + # have no charge to annotate — left unannotated, matching the data. lp = LinePlot( cache_id=cid("anno_spectrum"), data=anno_long, @@ -223,7 +244,22 @@ def build_component( title="Precursor Signals", cache_path=cache_dir, ) - return lambda: s3(key=skey("3D_SN_plot"), state_manager=state_manager) + + def _render_3d(): + # Reflect the selection state in the title: the default view shows the + # scan's full precursor S/N peaks ("Precursor Signals"); once a single + # mass is isolated (massIndex set via the mass-table / deconv-heatmap + # click) it shows that mass's signal/noisy peaks ("Mass Signals"). + # Only the displayed title arg changes — the cached/filtered data is + # untouched — so this does not invalidate the component cache. + mass_selected = ( + state_manager is not None + and state_manager.get_selection(MASS) is not None + ) + s3._title = "Mass Signals" if mass_selected else "Precursor Signals" + return s3(key=skey("3D_SN_plot"), state_manager=state_manager) + + return _render_3d # ---- FDR / score-distribution plot (DensityPlot, precomputed curves) ---- if comp_name == "fdr_plot": @@ -299,6 +335,210 @@ def _explode_mass_table(per_scan: pl.LazyFrame) -> pl.LazyFrame: return lf.sort(["index", "mass_id"]) +# -------------------------------------------------------------------------- +# Tabulator column definitions, recovered from the legacy flash_viewer_grid +# bundle (TabulatorScanTable / TabulatorMassTable columnDefinitions). The +# legacy numeric formatter was ``v => v.toString().length > 4 ? v.toFixed(4) +# : v`` (a JS function that cannot be JSON-serialized into the OI cache); the +# closest portable equivalent is Tabulator's built-in ``money`` formatter with +# a fixed precision (no symbol), which OpenMS-Insight passes straight through. +# -------------------------------------------------------------------------- +_FLOAT_FMT = {"formatter": "money", "formatterParams": {"precision": 4, "symbol": ""}} + +# field -> (title, headerTooltip, is_float) for the scan table. Order follows +# the legacy column order; ``index`` is mapped to the displayed "Index" column. +_SCAN_TABLE_COLUMNS = [ + ("index", "Index", "The sequential index of the spectrum in the dataset.", False), + ("Scan", "Scan Number", "The identifier of the mass spectrometry scan.", False), + ( + "MSLevel", + "MS Level", + "The level of mass spectrometry analysis (e.g., MS1 or MS2).", + False, + ), + ( + "RT", + "Retention time", + "The time at which the spectrum was detected during the chromatographic " + "separation in seconds.", + True, + ), + ( + "PrecursorMass", + "Precursor Mass", + "The mass of the precursor ion selected for fragmentation in Daltons.", + True, + ), + ("#Masses", "#Masses", "The number of detected masses in the spectrum.", False), +] + +# field -> (title, headerTooltip, is_float) for the exploded mass table. The +# ``mass_id`` index column is shown as "Index" (legacy "Index" column). +_MASS_TABLE_COLUMNS = [ + ( + "mass_id", + "Index", + "The sequential index of the mass entry in the dataset.", + False, + ), + ( + "MonoMass", + "Monoisotopic mass", + "The monoisotopic mass of the detected ion in Daltons.", + True, + ), + ( + "SumIntensity", + "Sum intensity", + "The total intensity of the detected mass across all isotopic peaks and " + "charges.", + True, + ), + ( + "MinCharges", + "Min charge", + "The minimum charge state detected for the mass.", + False, + ), + ( + "MaxCharges", + "Max charge", + "The maximum charge state detected for the mass.", + False, + ), + ( + "MinIsotopes", + "Min isotope", + "The smallest observed isotopic shift, expressed as a multiple of the " + "average isotopic mass difference at 55kDA.", + False, + ), + ( + "MaxIsotopes", + "Max isotope", + "The largest observed isotopic shift, expressed as a multiple of the " + "average isotopic mass difference at 55kDA.", + False, + ), + ( + "CosineScore", + "Cosine score", + "The cosine similarity score comparing the observed and theoretical " + "isotopic patterns.", + True, + ), + ("SNR", "SNR", "The signal-to-noise ratio for the detected mass.", True), + ( + "QScore", + "QScore", + "The quality score indicating the confidence of the mass detection " + "(higher is better).", + True, + ), +] + + +def _column_definitions( + present_fields, spec +) -> List[Dict[str, Any]]: + """Build Tabulator column_definitions from a (field,title,tooltip,float) spec. + + Only fields that are actually present in the data are emitted (so no column + the data lacks is referenced, and — combined with always covering every real + column — no existing column is dropped). Numeric columns get a ``number`` + sorter; float columns additionally get the fixed-precision ``money`` + formatter that stands in for the legacy ``toFixed(4)`` display. + """ + present = set(present_fields) + defs: List[Dict[str, Any]] = [] + for field, title, tooltip, is_float in spec: + if field not in present: + continue + col: Dict[str, Any] = { + "title": title, + "field": field, + "headerTooltip": tooltip, + "sorter": "number", + } + if is_float: + col.update(_FLOAT_FMT) + defs.append(col) + return defs + + +def _scan_table_column_definitions(data: pl.LazyFrame) -> List[Dict[str, Any]]: + """Legacy scan-table column titles/tooltips/formatters for the real fields.""" + fields = data.collect_schema().names() + return _column_definitions(fields, _SCAN_TABLE_COLUMNS) + + +def _mass_table_column_definitions(long: pl.LazyFrame) -> List[Dict[str, Any]]: + """Legacy mass-table column titles/tooltips/formatters for the exploded fields.""" + fields = long.collect_schema().names() + return _column_definitions(fields, _MASS_TABLE_COLUMNS) + + +def _deconv_spectrum_with_charge(file_manager, dataset_id, explode_spectrum_long): + """Deconvolved-spectrum long format + a per-peak ``charge_label`` (``z=N``). + + Returns ``(long_frame, annotation_column_or_None)``. + + The plain ``deconv_spectrum`` cache holds only ``MonoMass``/``SumIntensity`` + (no charge). ``combined_spectrum`` carries the SAME deconvolved sticks PLUS + each mass's constituent ``SignalPeaks`` (``[binIdx, mz, intensity, charge]``), + so we derive a representative charge per deconvolved mass (the charge of its + most intense constituent peak) and format it as ``z=N``. This is built + locally via Polars expressions — ``long_format.py`` is untouched. + + Falls back to the plain ``deconv_spectrum`` (no annotation) when + ``combined_spectrum`` / ``SignalPeaks`` are unavailable, in which case the + annotation column is ``None``. + """ + try: + combined = _load_polars(file_manager, dataset_id, "combined_spectrum") + schema = combined.collect_schema().names() + if not {"MonoMass", "SumIntensity", "SignalPeaks"} <= set(schema): + raise KeyError("combined_spectrum lacks SignalPeaks") + long = ( + combined.select(["index", "MonoMass", "SumIntensity", "SignalPeaks"]) + .explode(["MonoMass", "SumIntensity", "SignalPeaks"]) + .rename({"MonoMass": "mass", "SumIntensity": "intensity"}) + # mass_id matches explode_spectrum_long's per-scan peak order so the + # scan cross-link row counts stay identical. + .with_columns(pl.int_range(pl.len()).over("index").alias("mass_id")) + ) + # Representative charge = charge (peak field 3) of the constituent peak + # with the maximum intensity (peak field 2). Null when a mass has no + # constituent peaks; then no label is shown for that stick. + long = long.with_columns( + pl.col("SignalPeaks") + .list.eval(pl.element().list.get(3)) + .list.get( + pl.col("SignalPeaks") + .list.eval(pl.element().list.get(2)) + .list.arg_max() + ) + .cast(pl.Int64, strict=False) + .alias("_charge") + ).with_columns( + pl.when(pl.col("_charge").is_not_null()) + .then(pl.format("z={}", pl.col("_charge"))) + .otherwise(pl.lit("")) + .alias("charge_label") + ) + long = long.select( + ["index", "mass_id", "mass", "intensity", "charge_label"] + ).sort(["index", "mass_id"]) + return long, "charge_label" + except Exception: # pragma: no cover - defensive fallback to plain cache + logger.info( + "deconv_spectrum charge labels unavailable (no SignalPeaks); " + "rendering without charge annotation" + ) + per_scan = _load_polars(file_manager, dataset_id, "deconv_spectrum") + return explode_spectrum_long(per_scan), None + + def _build_sequence_view( dataset_id, file_manager, cache_dir, cid, skey, state_manager ): diff --git a/src/render_oi/tnt_viewer.py b/src/render_oi/tnt_viewer.py index 013426b..d779db4 100644 --- a/src/render_oi/tnt_viewer.py +++ b/src/render_oi/tnt_viewer.py @@ -229,6 +229,258 @@ def _deconv_signal_peaks_long(per_scan: pl.LazyFrame) -> pl.LazyFrame: ) +# Tabulator float formatter: fixed-precision display standing in for the legacy +# ``toFixed(4)`` (matches the Deconv viewer's _FLOAT_FMT convention). +_FLOAT_FMT = {"formatter": "money", "formatterParams": {"precision": 4, "symbol": ""}} + +# Protein-table column spec mirroring the legacy ``TabulatorProteinTable`` +# (title / headerTooltip / sorter) for the REAL ``protein_dfs`` fields. Each entry +# is (field, title, tooltip, is_float, dash_sentinel). ``dash_sentinel`` flags the +# columns the legacy rendered with a "-1 -> '-'" formatter (ProteoformMass, +# ProteoformLevelQvalue) -- the FLASHTnT "unmatched" sentinel; we null those -1.0s +# in the data (see _apply_dash_sentinels) so the cell renders blank instead of a +# misleading -1, keeping the column numeric/sortable. ``Coverage(%)`` is added per +# the parity request (it was not in the legacy protein table). +_PROTEIN_TABLE_COLUMNS = [ + ( + "Scan", + "Scan No.", + "The identifier of the mass spectrometry scan associated with the " + "identified proteoform.", + False, + False, + ), + ( + "accession", + "Accession", + "The unique identifier for the protein in the reference database.", + False, + False, + ), + ( + "description", + "Description", + "A human-readable description of the matched protein.", + False, + False, + ), + ( + "length", + "Length", + "The total number of amino acids in the matched protein.", + False, + False, + ), + ( + "ProteoformMass", + "Mass", + "The calculated mass of the proteoform in Daltons.", + True, + True, + ), + ( + "Coverage(%)", + "Coverage (%)", + "The percentage of the protein sequence covered by matched fragments.", + True, + False, + ), + ( + "MatchingFragments", + "No. of Matched Fragments", + "The number of fragment ions that match the protein sequence.", + False, + False, + ), + ( + "ModCount", + "No. of Modifications", + "The number of modifications identified in the protein.", + False, + False, + ), + ( + "TagCount", + "No. of Tags", + "The number of sequence tags associated with the proteoform match.", + False, + False, + ), + ( + "Score", + "Score", + "A score indicating the confidence of the protein match (higher is " + "better).", + False, + False, + ), + ( + "ProteoformLevelQvalue", + "Q-Value (Proteoform Level)", + "The confidence value of the protein match at the proteoform level.", + True, + True, + ), +] + +# Tag-table column spec mirroring the legacy ``TabulatorTagTable``. ``Nmass`` and +# ``Cmass`` carry the legacy "-1 -> '-'" sentinel (N-/C-terminal offset absent). +_TAG_TABLE_COLUMNS = [ + ( + "Scan", + "Scan Number", + "The identifier of the mass spectrometry scan containing the sequence " + "tag.", + False, + False, + ), + ( + "StartPos", + "Start Position", + "The position in the protein sequence where the sequence tag begins.", + False, + False, + ), + ( + "EndPos", + "End Position", + "The position in the protein sequence where the sequence tag ends.", + False, + False, + ), + ( + "TagSequence", + "Sequence", + "The amino acid sequence of the identified tag.", + False, + False, + ), + ( + "Length", + "Length", + "The number of amino acids in the sequence tag.", + False, + False, + ), + ( + "Score", + "Tag Score", + "A score indicating the confidence of the sequence tag identification " + "(higher is better).", + False, + False, + ), + ( + "Nmass", + "N mass", + "The N-terminal mass offset from the start of the sequence tag in " + "Daltons.", + True, + True, + ), + ( + "Cmass", + "C mass", + "The C-terminal mass offset from the end of the sequence tag in Daltons.", + True, + True, + ), + ( + "DeltaMass", + "Δ mass", + "Delta mass is the difference between the tag flanking mass and the " + "(partial) proteoform mass, from its terminal to the tag boundary.", + True, + False, + ), +] + + +def _tnt_column_definitions(present_fields, spec) -> List[Dict[str, Any]]: + """Build Tabulator ``column_definitions`` from a (field,title,tooltip,float, + dash) spec, emitting only fields present in the data. + + Numeric columns get a ``number`` sorter; float columns additionally get the + fixed-precision ``money`` formatter (legacy ``toFixed(4)``). ``dash`` columns + record ``_dashSentinel`` so the caller can null their -1.0 sentinel in the + data (the legacy "-1 -> '-'" formatter); the key is ignored by Tabulator. + """ + present = set(present_fields) + defs: List[Dict[str, Any]] = [] + for field, title, tooltip, is_float, dash in spec: + if field not in present: + continue + col: Dict[str, Any] = { + "title": title, + "field": field, + "headerTooltip": tooltip, + "sorter": "number", + } + if is_float: + col.update(_FLOAT_FMT) + if dash: + col["_dashSentinel"] = True + defs.append(col) + return defs + + +def _apply_dash_sentinels( + data: "pl.LazyFrame", column_defs: List[Dict[str, Any]] +) -> "pl.LazyFrame": + """Null out the FLASHTnT -1.0 "unmatched" sentinel for the dash columns. + + The legacy tables rendered these cells with a ``-1 -> '-'`` formatter. The + OpenMS-Insight Vue table only resolves named custom formatters (it cannot + receive an inline JS function through the JSON-serialized column definitions), + so we map the sentinel to null at the data layer: the cell renders blank + instead of a misleading ``-1`` while the column stays numeric and sortable. + """ + dash_fields = [c["field"] for c in column_defs if c.get("_dashSentinel")] + # Strip the private marker so only valid Tabulator keys reach the frontend. + for c in column_defs: + c.pop("_dashSentinel", None) + if not dash_fields: + return data + return data.with_columns( + [ + pl.when(pl.col(f) == -1).then(None).otherwise(pl.col(f)).alias(f) + for f in dash_fields + ] + ) + + +def _max_score_per_scan(data: "pl.LazyFrame") -> "pl.LazyFrame": + """Collapse the protein table to the single top-``Score`` row per ``Scan``. + + Reproduces the legacy "Best per spectrum" checkbox (default on): keep, for + each scan, the row with the highest Score (ties resolved by first occurrence, + matching the legacy Map insertion order); rows without a numeric Scan are kept + as-is. Operates on the already-column-selected frame so the row identity (and + ``index`` used for ``proteinIndex``) is preserved. + """ + schema = data.collect_schema().names() + if "Scan" not in schema or "Score" not in schema: + return data + # Window-based selection (no group_by/concat, so it survives the projection + # pushdown the Table applies before collecting): for each Scan, keep the row + # whose Score is the max for that Scan; ties resolved by first occurrence + # (smallest row index, matching the legacy Map insertion order). Rows without a + # Scan are kept verbatim (legacy pushes non-numeric Scan through). Column order + # is preserved (with_columns/filter never reorder), so no realignment needed. + return ( + data.with_row_index("_oi_row") + .with_columns( + pl.col("_oi_row") + .filter(pl.col("Score") == pl.col("Score").max()) + .min() + .over("Scan") + .alias("_oi_best") + ) + .filter(pl.col("Scan").is_null() | (pl.col("_oi_row") == pl.col("_oi_best"))) + .drop("_oi_row", "_oi_best") + ) + + def build_component_tnt( comp_name: str, dataset_id: str, @@ -251,49 +503,69 @@ def build_component_tnt( # ---- Protein table (master; click sets proteinIndex) ---- if comp_name == "protein_table": data = _load_polars(file_manager, dataset_id, "protein_dfs") - # Keep the informative columns the original Protein Table showed. - keep = [ - "index", - "accession", - "description", - "ProteoformMass", - "Coverage(%)", - "TagCount", - "ProteoformLevelQvalue", - ] schema = data.collect_schema().names() - cols = [c for c in keep if c in schema] - tbl = Table( - cache_id=cid("protein_table"), - data=data.select(cols) if cols else data, - interactivity={PROTEIN: "index"}, - index_field="index", - title="Protein Table", - cache_path=cache_dir, - ) - return lambda: tbl(key=skey("protein_table"), state_manager=state_manager) + col_defs = _tnt_column_definitions(schema, _PROTEIN_TABLE_COLUMNS) + # index (proteinIndex) must travel through even though it has no column def; + # plus Scan so "Best per spectrum" can collapse on it. + keep = [c["field"] for c in col_defs] + [ + c for c in ("index", "Scan") if c in schema + ] + cols = list(dict.fromkeys(keep)) # de-dupe, preserve order + data = _apply_dash_sentinels(data.select(cols), col_defs) + + def _render_protein_table(): + import streamlit as st + + # Legacy default: "Best per spectrum" is ON (bestPerSpectrumOnly: true). + best_only = st.checkbox( + "Best per spectrum", + value=True, + key=skey("protein_best_per_spectrum"), + help="Show only the highest-scoring proteoform per spectrum (scan).", + ) + shown = _max_score_per_scan(data) if best_only else data + # Distinct cache_id per toggle state so the two row sets cache cleanly. + suffix = "best" if best_only else "all" + tbl = Table( + cache_id=cid(f"protein_table_{suffix}"), + data=shown, + interactivity={PROTEIN: "index"}, + index_field="index", + column_definitions=col_defs, + go_to_fields=[f for f in ("Scan", "accession") if f in schema], + initial_sort=[{"column": "Score", "dir": "desc"}], + title="Protein Table", + cache_path=cache_dir, + ) + return tbl(key=skey("protein_table"), state_manager=state_manager) + + return _render_protein_table # ---- Tag table (filtered by proteinIndex) ---- if comp_name == "tag_table": data = _load_polars(file_manager, dataset_id, "tag_dfs") - keep = [ - "TagIndex", - "TagSequence", - "StartPos", - "EndPos", - "Length", - "Score", - "DeltaMass", - "ProteinIndex", - ] schema = data.collect_schema().names() - cols = [c for c in keep if c in schema] + col_defs = _tnt_column_definitions(schema, _TAG_TABLE_COLUMNS) + # TagIndex (index/interactivity) and ProteinIndex (filter) must travel + # through even though they carry no column definition. + keep = [c["field"] for c in col_defs] + [ + c for c in ("TagIndex", "ProteinIndex") if c in schema + ] + cols = list(dict.fromkeys(keep)) # de-dupe, preserve order + data = _apply_dash_sentinels(data.select(cols), col_defs) tbl = Table( cache_id=cid("tag_table"), - data=data.select(cols) if cols else data, + data=data, filters={PROTEIN: "ProteinIndex"}, interactivity={TAG: "TagIndex"}, index_field="TagIndex", + column_definitions=col_defs, + go_to_fields=[ + f + for f in ("Scan", "StartPos", "EndPos", "TagSequence") + if f in schema + ], + initial_sort=[{"column": "Score", "dir": "desc"}], title="Tag Table", cache_path=cache_dir, ) diff --git a/tests/test_deconv_viewer_realdata.py b/tests/test_deconv_viewer_realdata.py index 86c2408..1f457a7 100644 --- a/tests/test_deconv_viewer_realdata.py +++ b/tests/test_deconv_viewer_realdata.py @@ -149,3 +149,187 @@ def test_scan_click_cross_link_row_counts(fake_fm, monkeypatch, tmp_path): assert mass0 == snc.filter( (pl.col("index") == scan) & (pl.col("mass_id") == 0) ).height + + +def _capture_built_components(monkeypatch): + """Patch OI component __call__ so building a render closure and invoking it + records the constructed component instance + its render kwargs, without a + browser. Returns a dict {component_type: (instance, call_kwargs)}. + """ + import openms_insight as oi + + captured = {} + + def make_spy(cls, name): + orig = cls.__call__ + + def spy(self, *args, **kwargs): # noqa: ANN001 + captured[name] = (self, kwargs) + return None + + monkeypatch.setattr(cls, "__call__", spy, raising=False) + return orig + + make_spy(oi.Table, "table") + make_spy(oi.LinePlot, "lineplot") + make_spy(oi.Scatter3D, "scatter3d") + return captured + + +def test_scan_table_column_definitions(fake_fm, monkeypatch): + """scan_table passes legacy column_definitions covering every real field.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from openms_insight import StateManager + + from src.render_oi.deconv_viewer import build_component + + captured = _capture_built_components(monkeypatch) + sm = StateManager(session_key="oi_scan_coldef") + build_component("scan_table", "example_fd", fake_fm, sm, key_prefix="p0")() + + tbl, _ = captured["table"] + defs = tbl._column_definitions + assert defs, "scan_table must pass explicit column_definitions" + by_field = {c["field"]: c for c in defs} + # Every real scan_table.pq column is covered (no column dropped). + real_cols = set( + pl.scan_parquet(_FD / "scan_table.pq").collect_schema().names() + ) + assert real_cols <= set(by_field), (real_cols, set(by_field)) + # Legacy titles and descriptive tooltips. + assert by_field["MSLevel"]["title"] == "MS Level" + assert by_field["RT"]["title"] == "Retention time" + assert by_field["PrecursorMass"]["title"] == "Precursor Mass" + assert by_field["#Masses"]["title"] == "#Masses" + assert all(isinstance(c["headerTooltip"], str) for c in defs) + # Numeric formatting on the float columns (RT / PrecursorMass). + assert by_field["RT"]["formatter"] == "money" + assert by_field["PrecursorMass"]["formatter"] == "money" + + +def test_mass_table_column_definitions(fake_fm, monkeypatch): + """mass_table passes legacy column_definitions covering every exploded field.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from openms_insight import StateManager + + from src.render_oi.deconv_viewer import _explode_mass_table, build_component + + captured = _capture_built_components(monkeypatch) + sm = StateManager(session_key="oi_mass_coldef") + build_component("mass_table", "example_fd", fake_fm, sm, key_prefix="p0")() + + tbl, _ = captured["table"] + defs = tbl._column_definitions + assert defs, "mass_table must pass explicit column_definitions" + by_field = {c["field"]: c for c in defs} + # Every exploded per-mass field is shown EXCEPT the scan "index" — that + # column is the cross-link filter target (filters={scanIndex: index}) and was + # not a visible column in the legacy Mass Table; it stays available for + # filtering (Table._get_columns_to_select adds filter columns) but is not + # displayed. The displayed row index is mass_id ("Index"), matching legacy. + exploded_cols = set( + _explode_mass_table(pl.scan_parquet(_FD / "mass_table.pq")) + .collect_schema() + .names() + ) + assert (exploded_cols - {"index"}) <= set(by_field), (exploded_cols, set(by_field)) + assert "index" not in by_field # scan index is a filter column, not displayed + assert "mass_id" in by_field # displayed per-mass index ("Index") + assert tbl._filters == {"scanIndex": "index"} # filter column still present + assert by_field["MonoMass"]["title"] == "Monoisotopic mass" + assert by_field["SumIntensity"]["title"] == "Sum intensity" + assert by_field["MinCharges"]["title"] == "Min charge" + assert by_field["MaxCharges"]["title"] == "Max charge" + assert by_field["CosineScore"]["title"] == "Cosine score" + assert by_field["QScore"]["title"] == "QScore" + # Numeric formatting where sensible (masses / intensity / scores). + for f in ("MonoMass", "SumIntensity", "CosineScore", "SNR", "QScore"): + assert by_field[f]["formatter"] == "money", f + + +def test_deconv_spectrum_charge_annotation(fake_fm, monkeypatch, tmp_path): + """deconv_spectrum LinePlot carries a per-peak z=N charge annotation column, + and that column flows into the rendered plotData.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from openms_insight import StateManager + + from src.render_oi.deconv_viewer import build_component + + captured = _capture_built_components(monkeypatch) + sm = StateManager(session_key="oi_deconv_ann") + build_component("deconv_spectrum", "example_fd", fake_fm, sm, key_prefix="p0")() + + lp, _ = captured["lineplot"] + assert lp._annotation_column == "charge_label" + + # The annotation column is present and populated with z=N labels in plotData. + from src.parse.long_format import explode_spectrum_long + from src.render_oi.deconv_viewer import _deconv_spectrum_with_charge + + long, ann = _deconv_spectrum_with_charge( + fake_fm, "example_fd", explode_spectrum_long + ) + assert ann == "charge_label" + df = long.collect() + labels = df.filter(pl.col("charge_label") != "")["charge_label"].to_list() + assert labels, "expected at least one z=N charge label" + assert all(s.startswith("z=") for s in labels[:50]) + + # Peak/row counts must stay identical to the plain explode so the scan + # cross-link row-count contract is preserved. + plain = explode_spectrum_long(pl.scan_parquet(_FD / "deconv_spectrum.pq")).collect() + assert df.height == plain.height + assert ( + df.group_by("index").len().sort("index")["len"].to_list() + == plain.group_by("index").len().sort("index")["len"].to_list() + ) + + +def test_anno_spectrum_has_no_charge_annotation(fake_fm, monkeypatch): + """anno_spectrum has no charge data, so it carries no annotation column.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from openms_insight import StateManager + + from src.render_oi.deconv_viewer import build_component + + captured = _capture_built_components(monkeypatch) + sm = StateManager(session_key="oi_anno_ann") + build_component("anno_spectrum", "example_fd", fake_fm, sm, key_prefix="p0")() + + lp, _ = captured["lineplot"] + assert lp._annotation_column is None + + +def test_3d_plot_title_reflects_selection(fake_fm, monkeypatch): + """3D_SN_plot title is 'Precursor Signals' with no mass selected and + 'Mass Signals' once a massIndex is selected.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from openms_insight import StateManager + + from src.render_oi.deconv_viewer import build_component + + captured = _capture_built_components(monkeypatch) + sm = StateManager(session_key="oi_3d_title") + render = build_component("3D_SN_plot", "example_fd", fake_fm, sm, key_prefix="p0") + + sm.set_selection("scanIndex", 0) + render() + assert captured["scatter3d"][0]._title == "Precursor Signals" + + sm.set_selection("massIndex", 0) + render() + assert captured["scatter3d"][0]._title == "Mass Signals" + + sm.set_selection("massIndex", None) + render() + assert captured["scatter3d"][0]._title == "Precursor Signals" diff --git a/tests/test_tnt_viewer_realdata.py b/tests/test_tnt_viewer_realdata.py index 98cf5dd..1fab27f 100644 --- a/tests/test_tnt_viewer_realdata.py +++ b/tests/test_tnt_viewer_realdata.py @@ -94,6 +94,149 @@ def test_every_tnt_component_builds(comp, fake_fm, monkeypatch): assert callable(builder), f"{comp} did not produce a render callable" +def _captured_table(builder, monkeypatch): + """Run a build_component_tnt render callable and capture the Table instance it + constructs (the protein table builds its Table inside the render closure to + honour the runtime "Best per spectrum" checkbox).""" + from openms_insight import Table + + captured = {} + + def _spy(self, *a, **k): + captured["table"] = self + return None + + monkeypatch.setattr(Table, "__call__", _spy, raising=False) + builder() + return captured["table"] + + +def test_tag_table_column_definitions_and_sort(fake_fm, monkeypatch): + """Tag table: legacy titles incl. restored Nmass/Cmass, initial_sort by Score + desc, go_to fields, plus the proteinIndex filter / tagIndex interactivity.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from openms_insight import StateManager + + from src.render_oi.tnt_viewer import build_component_tnt + + sm = StateManager(session_key="oi_tnt_test_tag") + builder = build_component_tnt("tag_table", "ds", fake_fm, sm, key_prefix="p0") + tbl = _captured_table(builder, monkeypatch) + args = tbl._get_component_args() + + titles = {c["title"]: c["field"] for c in args["columnDefinitions"]} + # Restored columns that the pre-parity build dropped. + assert titles.get("N mass") == "Nmass" + assert titles.get("C mass") == "Cmass" + # Every column definition carries a header tooltip. + assert all("headerTooltip" in c for c in args["columnDefinitions"]) + assert args["initialSort"] == [{"column": "Score", "dir": "desc"}] + assert "Scan" in args["goToFields"] and "TagSequence" in args["goToFields"] + assert tbl._filters == {"proteinIndex": "ProteinIndex"} + assert args["interactivity"] == {"tagIndex": "TagIndex"} + # The private dash-sentinel marker never reaches the frontend. + assert all("_dashSentinel" not in c for c in args["columnDefinitions"]) + + +def test_protein_table_column_definitions_and_sort(fake_fm, monkeypatch): + """Protein table: legacy + parity columns, initial_sort by Score desc, go_to + fields, and the proteinIndex interactivity preserved.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + monkeypatch.setattr(st, "checkbox", lambda *a, **k: True, raising=False) + from openms_insight import StateManager + + from src.render_oi.tnt_viewer import build_component_tnt + + sm = StateManager(session_key="oi_tnt_test_prot") + builder = build_component_tnt("protein_table", "ds", fake_fm, sm, key_prefix="p0") + tbl = _captured_table(builder, monkeypatch) + args = tbl._get_component_args() + + fields = {c["field"] for c in args["columnDefinitions"]} + # Legacy columns the pre-parity build had dropped, plus the requested Coverage. + for restored in ( + "Scan", + "accession", + "description", + "length", + "ProteoformMass", + "Coverage(%)", + "MatchingFragments", + "ModCount", + "TagCount", + "Score", + "ProteoformLevelQvalue", + ): + assert restored in fields, f"protein column {restored} missing" + assert all("headerTooltip" in c for c in args["columnDefinitions"]) + assert args["initialSort"] == [{"column": "Score", "dir": "desc"}] + assert args["goToFields"] == ["Scan", "accession"] + assert args["interactivity"] == {"proteinIndex": "index"} + assert all("_dashSentinel" not in c for c in args["columnDefinitions"]) + + +def test_protein_best_per_spectrum_reduces_rows(fake_fm, monkeypatch): + """The "Best per spectrum" toggle collapses the protein table to one (top + Score) row per Scan; default-on shows fewer rows than the unfiltered table, + and the reduced count equals the number of distinct scans.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from openms_insight import StateManager + + from src.render_oi.tnt_viewer import build_component_tnt + + def _rows(best_only): + monkeypatch.setattr(st, "checkbox", lambda *a, **k: best_only, raising=False) + sm = StateManager(session_key=f"oi_tnt_test_bps_{best_only}") + builder = build_component_tnt( + "protein_table", "ds", fake_fm, sm, key_prefix="p0" + ) + tbl = _captured_table(builder, monkeypatch) + return tbl._prepare_vue_data({})["_pagination"]["total_rows"] + + all_rows = _rows(False) + best_rows = _rows(True) + + n_scans = ( + pl.scan_parquet(_DS / "protein_dfs.pq").select("Scan").collect()["Scan"].n_unique() + ) + assert best_rows == n_scans + assert best_rows < all_rows, "Best per spectrum should reduce the row count" + + +def test_protein_dash_sentinel_nulled(fake_fm, monkeypatch): + """The -1.0 'unmatched' sentinel for the dash columns (ProteoformMass / + ProteoformLevelQvalue) is nulled at the data layer so the cell renders blank + instead of -1, while the column stays numeric.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + monkeypatch.setattr(st, "checkbox", lambda *a, **k: False, raising=False) + from openms_insight import StateManager + + from src.render_oi.tnt_viewer import build_component_tnt + + raw = pl.scan_parquet(_DS / "protein_dfs.pq").collect() + if "ProteoformMass" not in raw.columns: + pytest.skip("ProteoformMass not present in this dataset") + neg1 = int((raw["ProteoformMass"] == -1).sum()) + if neg1 == 0: + pytest.skip("no -1 sentinel present to null in this dataset") + + sm = StateManager(session_key="oi_tnt_test_dash") + builder = build_component_tnt("protein_table", "ds", fake_fm, sm, key_prefix="p0") + tbl = _captured_table(builder, monkeypatch) + df = tbl._prepare_vue_data({})["tableData"] + # No -1 sentinel survives; the rows that had it are now null. + assert (df["ProteoformMass"] == -1).sum() == 0 + assert df["ProteoformMass"].isna().sum() >= neg1 + + def test_proteoform_scan_resolution(fake_fm, monkeypatch): import streamlit as st From 83574eac953d13c52d26b138604561ca46d3d9cc Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 12:55:40 +0000 Subject: [PATCH 16/18] =?UTF-8?q?Parity:=20TnT=20residue=E2=86=92tag=20cro?= =?UTF-8?q?ss-link,=20Deconv=20mass=20highlight,=20Quant=20per-isotope=20t?= =?UTF-8?q?races?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FLASHTnT sequence-view residue (AApos) cross-link, closing the last legacy gap: - SequenceView emits the clicked residue's 0-based index under AApos via the POSITION_SENTINEL (""), alongside the existing peak interactivity. - Tag table restricts server-side to tags spanning the selected residue (StartPos <= AApos <= EndPos), built in a render closure so AApos is read at render time with a cache_id that varies per residue. - _tag_data marks selectedAA as the within-tag offset (AApos - StartPos) so the augmented-spectrum tagger overlay draws the gold selected-residue highlight. - render_experiment_tnt clears the tag + residue sub-selections when the protein changes (a private _tag_protein marker), matching the legacy reset-on-click. FLASHDeconv: deconv_spectrum gains interactivity={massIndex: mass_id} so a mass -table click highlights the matching stick (and vice-versa) -- the legacy mass-table -> spectrum highlight cross-link, reading the shared selection store. FLASHQuant: FeatureView trace_key_column="isotope" breaks the polyline between isotope traces within a charge, matching the legacy per-isotope-trace breaks. Bump Docker OPENMS_INSIGHT_REF to 2767f1d (residue-position interactivity + FeatureView per-trace breaks). CI / requirements track the OI branch directly. Adds test_residue_aapos_cross_link covering the offset math, the server-side residue filter (cache_id variation + row count), and the SequenceView sentinel. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- Dockerfile | 2 +- Dockerfile.arm | 2 +- src/render_oi/deconv_viewer.py | 4 ++ src/render_oi/quant_viewer.py | 3 + src/render_oi/tnt_viewer.py | 99 +++++++++++++++++++++++-------- tests/test_tnt_viewer_realdata.py | 80 +++++++++++++++++++++++++ 6 files changed, 162 insertions(+), 28 deletions(-) diff --git a/Dockerfile b/Dockerfile index 55f0492..ac6d824 100644 --- a/Dockerfile +++ b/Dockerfile @@ -154,7 +154,7 @@ RUN npm run build # validated commit for reproducible images (override OPENMS_INSIGHT_REF to bump). FROM node:21 AS openms-insight-build ARG OPENMS_INSIGHT_REPO=https://github.com/t0mdavid-m/OpenMS-Insight.git -ARG OPENMS_INSIGHT_REF=2adb7d7eafecbbd798664e76945770f87e284c24 +ARG OPENMS_INSIGHT_REF=2767f1d1dea651c47ac104a05dd8efa4e30fe961 # Bust the clone cache when REF tracks a moving branch (no-op for a pinned SHA). ADD https://api.github.com/repos/t0mdavid-m/OpenMS-Insight/commits/$OPENMS_INSIGHT_REF oi-version.json RUN git clone ${OPENMS_INSIGHT_REPO} /OpenMS-Insight \ diff --git a/Dockerfile.arm b/Dockerfile.arm index 3080ab3..de5e612 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -149,7 +149,7 @@ RUN npm run build # validated commit for reproducible images (override OPENMS_INSIGHT_REF to bump). FROM node:21 AS openms-insight-build ARG OPENMS_INSIGHT_REPO=https://github.com/t0mdavid-m/OpenMS-Insight.git -ARG OPENMS_INSIGHT_REF=2adb7d7eafecbbd798664e76945770f87e284c24 +ARG OPENMS_INSIGHT_REF=2767f1d1dea651c47ac104a05dd8efa4e30fe961 # Bust the clone cache when REF tracks a moving branch (no-op for a pinned SHA). ADD https://api.github.com/repos/t0mdavid-m/OpenMS-Insight/commits/$OPENMS_INSIGHT_REF oi-version.json RUN git clone ${OPENMS_INSIGHT_REPO} /OpenMS-Insight \ diff --git a/src/render_oi/deconv_viewer.py b/src/render_oi/deconv_viewer.py index dd0f896..fae8bd1 100644 --- a/src/render_oi/deconv_viewer.py +++ b/src/render_oi/deconv_viewer.py @@ -194,6 +194,10 @@ def build_component( cache_id=cid("deconv_spectrum"), data=long, filters={SCAN: "index"}, + # Click sets massIndex AND the peak matching the current massIndex (set + # from the mass table) is highlighted — restoring the legacy mass-table + # -> deconv-spectrum highlight cross-link. + interactivity={MASS: "mass_id"}, x_column="mass", y_column="intensity", # Per-peak charge label on each deconvolved stick (legacy showed the diff --git a/src/render_oi/quant_viewer.py b/src/render_oi/quant_viewer.py index c9ecea1..a005c2a 100644 --- a/src/render_oi/quant_viewer.py +++ b/src/render_oi/quant_viewer.py @@ -89,6 +89,9 @@ def build_quant_components( rt_column="rt", intensity_column="intensity", isotope_column="isotope", + # Break the polyline between isotope traces within a charge (matching the + # legacy per-isotope-trace breaks) instead of one connected line per charge. + trace_key_column="isotope", title="Feature Group Visualization", cache_path=cache_dir, ) diff --git a/src/render_oi/tnt_viewer.py b/src/render_oi/tnt_viewer.py index d779db4..96085b1 100644 --- a/src/render_oi/tnt_viewer.py +++ b/src/render_oi/tnt_viewer.py @@ -40,6 +40,7 @@ PROTEIN = "proteinIndex" DECONV = "deconvIndex" TAG = "tagIndex" +AAPOS = "AApos" def _load_pickle_gz(path: Path): @@ -177,13 +178,14 @@ def _sequence_table(file_manager, dataset_id: str) -> Optional[pl.LazyFrame]: ) -def _tag_data(file_manager, dataset_id: str, tag_index) -> Optional[dict]: +def _tag_data(file_manager, dataset_id: str, tag_index, aa_pos=None) -> Optional[dict]: """Build the tagger-overlay ``tagData`` for the selected tag row. Mirrors the legacy tag-table click: parse the comma-joined fragment ``mzs``, carry the tag sequence + span, and flag N-terminal tags (``Nmass == -1``). - ``selectedAA`` is left unset (-1000) until the sequence-view ``AApos`` - cross-link lands. + When a sequence-view residue is selected (``aa_pos``) and it falls within the + tag span, ``selectedAA`` is its tag-relative offset (drives the gold + selected-residue highlight); otherwise it stays unset (-1000). """ tags = ( _load_polars(file_manager, dataset_id, "tag_dfs") @@ -198,13 +200,21 @@ def _tag_data(file_manager, dataset_id: str, tag_index) -> Optional[dict]: for m in str(r.get("mzs") or "").split(",") if m.strip() and float(m) != 0.0 ] + start = int(r.get("StartPos") or 0) + end = int(r.get("EndPos") or 0) + # Gold selected-residue highlight: when a sequence-view residue (aa_pos) is + # selected and lies within this tag's span, selectedAA is its tag-relative + # offset (legacy: selectedAApos - StartPos); otherwise unset (-1000). + selected_aa = -1000 + if aa_pos is not None and start <= int(aa_pos) <= end: + selected_aa = int(aa_pos) - start return { "masses": masses, "sequence": str(r.get("TagSequence") or ""), "nTerminal": float(r.get("Nmass", -1) or -1) == -1, - "startPos": int(r.get("StartPos") or 0), - "endPos": int(r.get("EndPos") or 0), - "selectedAA": -1000, + "startPos": start, + "endPos": end, + "selectedAA": selected_aa, } @@ -553,23 +563,43 @@ def _render_protein_table(): ] cols = list(dict.fromkeys(keep)) # de-dupe, preserve order data = _apply_dash_sentinels(data.select(cols), col_defs) - tbl = Table( - cache_id=cid("tag_table"), - data=data, - filters={PROTEIN: "ProteinIndex"}, - interactivity={TAG: "TagIndex"}, - index_field="TagIndex", - column_definitions=col_defs, - go_to_fields=[ - f - for f in ("Scan", "StartPos", "EndPos", "TagSequence") - if f in schema - ], - initial_sort=[{"column": "Score", "dir": "desc"}], - title="Tag Table", - cache_path=cache_dir, - ) - return lambda: tbl(key=skey("tag_table"), state_manager=state_manager) + has_span = {"StartPos", "EndPos"} <= set(schema) + + def _render_tag_table(): + # Residue-driven tag filter: when a sequence-view residue is selected + # (AApos), restrict the tag table to tags spanning that residue + # (StartPos <= AApos <= EndPos) -- the legacy residue-click tag filter. + # It's a range predicate the identifier->column `filters` map can't + # express, so apply it server-side at render time (AApos is read from + # state, like the protein table's "best per spectrum" toggle) with a + # cache_id that varies per residue (each filtered view caches cleanly). + aa_pos = state_manager.get_selection(AAPOS) + shown, tag_cid = data, cid("tag_table") + if aa_pos is not None and has_span: + shown = data.filter( + (pl.col("StartPos") <= int(aa_pos)) + & (pl.col("EndPos") >= int(aa_pos)) + ) + tag_cid = f"{cid('tag_table')}_aa{int(aa_pos)}" + tbl = Table( + cache_id=tag_cid, + data=shown, + filters={PROTEIN: "ProteinIndex"}, + interactivity={TAG: "TagIndex"}, + index_field="TagIndex", + column_definitions=col_defs, + go_to_fields=[ + f + for f in ("Scan", "StartPos", "EndPos", "TagSequence") + if f in schema + ], + initial_sort=[{"column": "Score", "dir": "desc"}], + title="Tag Table", + cache_path=cache_dir, + ) + return tbl(key=skey("tag_table"), state_manager=state_manager) + + return _render_tag_table # ---- Combined / augmented spectrum (deconv primary + annotated overlay) ---- if comp_name == "combined_spectrum": @@ -622,7 +652,10 @@ def _render_protein_table(): ion: f"fragment_masses_{ion}" for ion in _ION_TYPES }, peaks_data=peaks_tbl, - interactivity={"peak": "peak_id"}, + # Residue click emits the peak (unused downstream) AND the residue's + # 0-based index under AApos (POSITION_SENTINEL), driving the tag-table + # residue filter + the gold selected-residue highlight in the overlay. + interactivity={"peak": "peak_id", AAPOS: ""}, annotation_config={ "ion_types": settings.get("ion_types", ["b", "y"]), "tolerance": settings.get("tolerance", 10.0), @@ -715,12 +748,26 @@ def render_experiment_tnt( if state_manager.get_selection(DECONV) != deconv_index: state_manager.set_selection(DECONV, deconv_index) + # When the selected protein changes, clear the tag + residue sub-selections so + # a stale tag overlay / residue highlight from the previous proteoform doesn't + # persist (the legacy reset these on protein-table click). A private state + # marker tracks which protein the current tag/residue selection belongs to. + if state_manager.get_selection("_tag_protein") != protein_index: + state_manager.set_selection("_tag_protein", protein_index) + if state_manager.get_selection(TAG) is not None: + state_manager.set_selection(TAG, None) + if state_manager.get_selection(AAPOS) is not None: + state_manager.set_selection(AAPOS, None) + # Resolve the selected tag -> tagData so the augmented spectrum's tagger # overlay can highlight tag-matched sticks and draw the charge buttons / - # inter-residue amino-acid arrows. set_selection no-ops when unchanged. + # inter-residue amino-acid arrows. When a sequence-view residue (AApos) is + # also selected, _tag_data marks the within-tag offset (gold highlight). + # set_selection no-ops when unchanged. tag_index = state_manager.get_selection(TAG) + aa_pos = state_manager.get_selection(AAPOS) tag_data = ( - _tag_data(file_manager, dataset_id, tag_index) + _tag_data(file_manager, dataset_id, tag_index, aa_pos) if tag_index is not None else None ) diff --git a/tests/test_tnt_viewer_realdata.py b/tests/test_tnt_viewer_realdata.py index 1fab27f..2ad30c9 100644 --- a/tests/test_tnt_viewer_realdata.py +++ b/tests/test_tnt_viewer_realdata.py @@ -353,3 +353,83 @@ def test_tagger_overlay_data(fake_fm): "selectedAA", } assert isinstance(td["masses"], list) and isinstance(td["sequence"], str) + + +def _captured_seqview(builder, monkeypatch): + """Run a sequence_view render callable and capture the SequenceView it builds.""" + from openms_insight import SequenceView + + captured = {} + + def _spy(self, *a, **k): + captured["sv"] = self + return None + + monkeypatch.setattr(SequenceView, "__call__", _spy, raising=False) + builder() + return captured["sv"] + + +def test_residue_aapos_cross_link(fake_fm, monkeypatch): + """Sequence-view residue (AApos) cross-link: _tag_data marks the within-tag + offset, the tag table restricts to tags spanning the residue (cache_id varies + per residue), and the SequenceView emits the residue-position sentinel.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from openms_insight import StateManager + + from src.render_oi.tnt_viewer import ( + AAPOS, + PROTEIN, + _tag_data, + build_component_tnt, + ) + + tags = pl.scan_parquet(_DS / "tag_dfs.pq").collect() + r = tags.row(0, named=True) + tid, start, end = r["TagIndex"], int(r["StartPos"]), int(r["EndPos"]) + + # selectedAA = aa_pos - StartPos within the span (legacy), else -1000. + assert _tag_data(fake_fm, "ds", tid, start)["selectedAA"] == 0 + mid = (start + end) // 2 + assert _tag_data(fake_fm, "ds", tid, mid)["selectedAA"] == mid - start + assert _tag_data(fake_fm, "ds", tid, end + 1)["selectedAA"] == -1000 + assert _tag_data(fake_fm, "ds", tid)["selectedAA"] == -1000 # no residue + + # Tag table server-side residue filter: StartPos <= AApos <= EndPos, scoped to + # the protein, with a cache_id that varies per residue. + pid = int(r["ProteinIndex"]) + prot_tags = tags.filter(pl.col("ProteinIndex") == pid) + aa = int(prot_tags.row(0, named=True)["StartPos"]) + expected = prot_tags.filter( + (pl.col("StartPos") <= aa) & (pl.col("EndPos") >= aa) + ).height + + sm = StateManager(session_key="oi_tnt_test_aapos") + sm.set_selection(AAPOS, aa) + tbl = _captured_table( + build_component_tnt("tag_table", "ds", fake_fm, sm, key_prefix="p0"), + monkeypatch, + ) + assert tbl._cache_id.endswith(f"_aa{aa}") + got = tbl._prepare_vue_data({PROTEIN: pid})["_pagination"]["total_rows"] + assert got == expected + + # Without a residue selection the table is unfiltered and keeps the plain id. + sm2 = StateManager(session_key="oi_tnt_test_aapos2") + tbl2 = _captured_table( + build_component_tnt("tag_table", "ds", fake_fm, sm2, key_prefix="p1"), + monkeypatch, + ) + assert not tbl2._cache_id.endswith(f"_aa{aa}") + got_all = tbl2._prepare_vue_data({PROTEIN: pid})["_pagination"]["total_rows"] + assert expected <= got_all + + # SequenceView emits the residue-position sentinel under AApos (the source of + # the AApos selection that drives the two filters above). + seq_builder = build_component_tnt("sequence_view", "ds", fake_fm, sm, key_prefix="p0") + if seq_builder is not None: + sv = _captured_seqview(seq_builder, monkeypatch) + assert sv._interactivity.get(AAPOS) == "" + assert sv._interactivity.get("peak") == "peak_id" From 90bac88b09d65c630fc0acca71f1ce0741eca761 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 13:13:21 +0000 Subject: [PATCH 17/18] FLASHQuant parity: legacy column titles, drop extra column, dataset-scoped state Re-audit (fan-out) of FLASHQuant surfaced four divergences from the legacy FLASHQuantView, all fixed here: - Feature-group table had no column_definitions, so OpenMS-Insight auto-titled the CamelCase fields ("Monoisotopicmass", "Startretentiontime(Fwhm)", ...). Add explicit column_definitions mirroring the legacy featureGroupTableColumnDefinitions (Index, Monoisotopic Mass, Average Mass, Start/End Retention Time (FWHM), Feature Group Quantity, Min/Max Charge, Most Abundant Charge, Isotope Cosine Score) with tooltips + number sorter/precision, reusing the Deconv _column_definitions helper. - Drop HighestApexRetentionTime: the legacy table never surfaced it. - Match legacy titles: table "Feature groups", 3D view "Feature group signals" (was "Feature Groups" / "Feature Group Visualization"). - Scope the quant StateManager session_key to dataset_id so switching the selected experiment starts from a clean selection (default-row-0) instead of inheriting the previous dataset's featureGroup -- reproducing the legacy render_grid reset on dataset change (src/render/render.py:80-82). Most acute for Quant, whose single FeatureView is entirely driven by featureGroup. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- src/render_oi/quant_viewer.py | 107 ++++++++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 18 deletions(-) diff --git a/src/render_oi/quant_viewer.py b/src/render_oi/quant_viewer.py index a005c2a..4f4e788 100644 --- a/src/render_oi/quant_viewer.py +++ b/src/render_oi/quant_viewer.py @@ -19,27 +19,87 @@ import polars as pl -from .deconv_viewer import _oi_cache_dir, _load_polars +from .deconv_viewer import _column_definitions, _load_polars, _oi_cache_dir logger = logging.getLogger(__name__) FEATURE_GROUP = "featureGroup" -# Feature-group summary columns to show in the selector table (when present). -_FG_SUMMARY_COLUMNS = [ - "FeatureGroupIndex", - "MonoisotopicMass", - "AverageMass", - "StartRetentionTime(FWHM)", - "EndRetentionTime(FWHM)", - "HighestApexRetentionTime", - "FeatureGroupQuantity", - "MinCharge", - "MaxCharge", - "MostAbundantFeatureCharge", - "IsotopeCosineScore", +# Feature-group selector columns mirroring the legacy +# ``FLASHQuantView.featureGroupTableColumnDefinitions`` as (field, title, tooltip, +# is_float). The explicit titles match the legacy human-readable labels -- without +# column_definitions the OI Table auto-titles the CamelCase fields ("Monoisotopicmass", +# "Startretentiontime(Fwhm)", ...). Legacy did not surface HighestApexRetentionTime, +# so it is intentionally omitted. +_FG_TABLE_COLUMNS = [ + ( + "FeatureGroupIndex", + "Index", + "The sequential index of the feature group in the dataset.", + False, + ), + ( + "MonoisotopicMass", + "Monoisotopic Mass", + "The monoisotopic mass of the feature group in Daltons.", + True, + ), + ( + "AverageMass", + "Average Mass", + "The average mass of the feature group in Daltons.", + True, + ), + ( + "StartRetentionTime(FWHM)", + "Start Retention Time (FWHM)", + "The start of the feature group's elution window (full width at half " + "maximum) in seconds.", + True, + ), + ( + "EndRetentionTime(FWHM)", + "End Retention Time (FWHM)", + "The end of the feature group's elution window (full width at half " + "maximum) in seconds.", + True, + ), + ( + "FeatureGroupQuantity", + "Feature Group Quantity", + "The integrated abundance (quantity) of the feature group.", + True, + ), + ( + "MinCharge", + "Min Charge", + "The minimum charge state observed for the feature group.", + False, + ), + ( + "MaxCharge", + "Max Charge", + "The maximum charge state observed for the feature group.", + False, + ), + ( + "MostAbundantFeatureCharge", + "Most Abundant Charge", + "The charge state of the most abundant feature in the group.", + False, + ), + ( + "IsotopeCosineScore", + "Isotope Cosine Score", + "The cosine similarity between the observed and theoretical isotope " + "patterns.", + True, + ), ] +# Field projection for the selector table (column order follows the spec above). +_FG_SUMMARY_COLUMNS = [field for field, *_ in _FG_TABLE_COLUMNS] + def build_quant_components( dataset_id: str, @@ -69,12 +129,16 @@ def build_quant_components( # one row per group), click sets featureGroup. summary_cols = [c for c in _FG_SUMMARY_COLUMNS if c in schema] # Drop the array columns from the table (keep only scalar summary columns). + table_data = quant.select(summary_cols) if summary_cols else quant fg_table = Table( cache_id=cid("feature_table"), - data=quant.select(summary_cols) if summary_cols else quant, + data=table_data, interactivity={FEATURE_GROUP: "FeatureGroupIndex"}, index_field="FeatureGroupIndex", - title="Feature Groups", + # Legacy column titles/tooltips + number sorter/precision (without these the + # OI Table auto-titles the CamelCase fields, e.g. "Monoisotopicmass"). + column_definitions=_column_definitions(summary_cols, _FG_TABLE_COLUMNS), + title="Feature groups", cache_path=cache_dir, ) @@ -92,7 +156,8 @@ def build_quant_components( # Break the polyline between isotope traces within a charge (matching the # legacy per-isotope-trace breaks) instead of one connected line per charge. trace_key_column="isotope", - title="Feature Group Visualization", + # Match the legacy 3D plot title ("Feature group signals"). + title="Feature group signals", cache_path=cache_dir, ) @@ -112,7 +177,13 @@ def render_experiment_quant( import streamlit as st from openms_insight import StateManager - state_manager = StateManager(session_key=f"oi_quant_state_{panel_key}") + # Scope the StateManager to the dataset so switching the selected experiment + # starts from a clean selection (OI Table default-row-0) instead of inheriting + # the previous dataset's featureGroup -- the legacy render_grid reset selections + # on dataset change (src/render/render.py:80-82). + state_manager = StateManager( + session_key=f"oi_quant_state_{panel_key}_{dataset_id}" + ) try: render = build_quant_components( dataset_id, file_manager, state_manager, key_prefix=panel_key From 48106f662def85019e14a0c44f9a79f090fcddfb Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 13:33:12 +0000 Subject: [PATCH 18/18] Parity round 2: Deconv/TnT fixes from the fan-out re-audit Deconv (deconv_viewer.py): - Remove the always-on z=N charge labels from the deconvolved spectrum. The legacy draws annotation boxes ONLY for the selected mass (computeAnnotationBoxes returns [] when nothing is highlighted) and gates z=N labels to the m/z axis, so per-stick labels were a regression. The massIndex highlight cross-link already reproduces the on-selection emphasis. Drops the now-dead _deconv_spectrum_with_charge helper (explode_spectrum_long already yields the mass_id-aligned long frame). - Reset the selected mass to the new scan's first peak on a scan-TABLE click (legacy updateSelectedScan -> updateSelectedMass(0)) without clobbering a heatmap click that sets scan+mass together (_reset_mass_on_scan_change, dual-tracked). - Scope the StateManager session_key to dataset_id so switching the selected experiment starts clean instead of inheriting the previous dataset's scan/mass (legacy render_grid reset, src/render/render.py:80-82). - Restore scan/mass table "go to" navigation (go_to_fields). - FLASHDeconv sequence view defaults to a/x/y ions (legacy SequenceViewInformation default; the OI default is b/y). - Heatmap axis labels Title Case ("Retention Time"/"Monoisotopic Mass") and 3D title lowercase ("Precursor signals"/"Mass signals"), matching the bundle. TnT (tnt_viewer.py): - Scope the StateManager session_key to dataset_id (same dataset-switch reset). - Protein-table accession/description sort as strings (legacy gave them no numeric sorter; a number sorter parses text as NaN and fails to order it). TagSequence keeps its numeric sorter to match the legacy tag table. - id_fdr_plot x-axis label "QScore" (legacy FDRPlotly), matching the Deconv plot. - Raw heatmap labelled m/z (was hardcoded to mass for all heatmaps); Title-Case axis labels, mirroring the Deconv heatmap. Tests: scan->mass reset (incl. the heatmap-keeps-its-mass case), deconvolved spectrum carries no always-on annotation + the massIndex cross-link, protein string-sorter, and 3D title casing. https://claude.ai/code/session_01DzcAbhoSHQbA8EQ6ARFPRj --- src/render_oi/deconv_viewer.py | 144 ++++++++++++--------------- src/render_oi/tnt_viewer.py | 35 +++++-- tests/test_deconv_viewer_realdata.py | 77 ++++++++------ tests/test_tnt_viewer_realdata.py | 9 ++ 4 files changed, 147 insertions(+), 118 deletions(-) diff --git a/src/render_oi/deconv_viewer.py b/src/render_oi/deconv_viewer.py index fae8bd1..6f89d1a 100644 --- a/src/render_oi/deconv_viewer.py +++ b/src/render_oi/deconv_viewer.py @@ -129,8 +129,8 @@ def build_component( y_column="mass", intensity_column="intensity", title=title, - x_label="Retention time", - y_label="Monoisotopic mass" if is_deconv else "m/z", + x_label="Retention Time", + y_label="Monoisotopic Mass" if is_deconv else "m/z", # Click a point -> scanIndex (all heatmaps) plus massIndex (deconv only), # restoring the legacy heatmap cross-links into the spectra/mass/3D panels. interactivity=( @@ -156,6 +156,11 @@ def build_component( # formatting (recovered from the built bundle's TabulatorScanTable # columnDefinitions). Built only from the columns actually present. column_definitions=_scan_table_column_definitions(data), + # Legacy scan-table "go to" navigation (TabulatorScanTable + # go-to-fields ["id","Scan"]; the new index column is "index"). + go_to_fields=[ + f for f in ("index", "Scan") if f in data.collect_schema().names() + ], cache_path=cache_dir, ) return lambda: tbl(key=skey("scan_table"), state_manager=state_manager) @@ -176,20 +181,20 @@ def build_component( # tooltips / numeric formatting (recovered from the built bundle), # built only from the exploded columns actually present. column_definitions=_mass_table_column_definitions(long), + # Legacy mass-table "go to" navigation (TabulatorMassTable + # go-to-fields ["id"]; the new per-scan index column is "mass_id"). + go_to_fields=["mass_id"], cache_path=cache_dir, ) return lambda: tbl(key=skey("mass_table"), state_manager=state_manager) # ---- Deconvolved spectrum (LinePlot, filtered by scanIndex) ---- if comp_name == "deconv_spectrum": - # Build from combined_spectrum when available: it carries the SAME - # deconvolved sticks (MonoMass/SumIntensity) PLUS the per-mass SignalPeaks - # ([binIdx, mz, intensity, charge]), letting us derive a per-peak charge - # label (z=N) locally — without touching long_format.py. Fall back to the - # plain deconv_spectrum cache (no charge data) when combined is absent. - long, ann_col = _deconv_spectrum_with_charge( - file_manager, dataset_id, explode_spectrum_long - ) + # Primary deconvolved sticks (MonoMass/SumIntensity) as long rows carrying + # a per-scan mass_id (matching the mass table's index), so the massIndex + # interactivity below highlights the same peak the mass table selects. + per_scan = _load_polars(file_manager, dataset_id, "deconv_spectrum") + long = explode_spectrum_long(per_scan) lp = LinePlot( cache_id=cid("deconv_spectrum"), data=long, @@ -200,9 +205,12 @@ def build_component( interactivity={MASS: "mass_id"}, x_column="mass", y_column="intensity", - # Per-peak charge label on each deconvolved stick (legacy showed the - # charge state next to peaks); None when no charge data is available. - annotation_column=ann_col, + # No always-on per-peak labels: the legacy deconvolved (mass-axis) + # spectrum draws annotation boxes ONLY for the selected mass + # (computeAnnotationBoxes returns [] when nothing is highlighted), and + # the z=N charge labels are gated to the m/z (annotated) axis. The gold + # selected-peak highlight above already reproduces the on-selection + # emphasis, so no annotation_column is supplied here. title="Deconvolved Spectrum", x_label="Monoisotopic Mass", y_label="Intensity", @@ -245,22 +253,22 @@ def build_component( filters={SCAN: "index"}, optional_filters={MASS: "mass_id"}, mz_column="mass", - title="Precursor Signals", + title="Precursor signals", cache_path=cache_dir, ) def _render_3d(): # Reflect the selection state in the title: the default view shows the - # scan's full precursor S/N peaks ("Precursor Signals"); once a single + # scan's full precursor S/N peaks ("Precursor signals"); once a single # mass is isolated (massIndex set via the mass-table / deconv-heatmap - # click) it shows that mass's signal/noisy peaks ("Mass Signals"). + # click) it shows that mass's signal/noisy peaks ("Mass signals"). # Only the displayed title arg changes — the cached/filtered data is # untouched — so this does not invalidate the component cache. mass_selected = ( state_manager is not None and state_manager.get_selection(MASS) is not None ) - s3._title = "Mass Signals" if mass_selected else "Precursor Signals" + s3._title = "Mass signals" if mass_selected else "Precursor signals" return s3(key=skey("3D_SN_plot"), state_manager=state_manager) return _render_3d @@ -482,67 +490,6 @@ def _mass_table_column_definitions(long: pl.LazyFrame) -> List[Dict[str, Any]]: return _column_definitions(fields, _MASS_TABLE_COLUMNS) -def _deconv_spectrum_with_charge(file_manager, dataset_id, explode_spectrum_long): - """Deconvolved-spectrum long format + a per-peak ``charge_label`` (``z=N``). - - Returns ``(long_frame, annotation_column_or_None)``. - - The plain ``deconv_spectrum`` cache holds only ``MonoMass``/``SumIntensity`` - (no charge). ``combined_spectrum`` carries the SAME deconvolved sticks PLUS - each mass's constituent ``SignalPeaks`` (``[binIdx, mz, intensity, charge]``), - so we derive a representative charge per deconvolved mass (the charge of its - most intense constituent peak) and format it as ``z=N``. This is built - locally via Polars expressions — ``long_format.py`` is untouched. - - Falls back to the plain ``deconv_spectrum`` (no annotation) when - ``combined_spectrum`` / ``SignalPeaks`` are unavailable, in which case the - annotation column is ``None``. - """ - try: - combined = _load_polars(file_manager, dataset_id, "combined_spectrum") - schema = combined.collect_schema().names() - if not {"MonoMass", "SumIntensity", "SignalPeaks"} <= set(schema): - raise KeyError("combined_spectrum lacks SignalPeaks") - long = ( - combined.select(["index", "MonoMass", "SumIntensity", "SignalPeaks"]) - .explode(["MonoMass", "SumIntensity", "SignalPeaks"]) - .rename({"MonoMass": "mass", "SumIntensity": "intensity"}) - # mass_id matches explode_spectrum_long's per-scan peak order so the - # scan cross-link row counts stay identical. - .with_columns(pl.int_range(pl.len()).over("index").alias("mass_id")) - ) - # Representative charge = charge (peak field 3) of the constituent peak - # with the maximum intensity (peak field 2). Null when a mass has no - # constituent peaks; then no label is shown for that stick. - long = long.with_columns( - pl.col("SignalPeaks") - .list.eval(pl.element().list.get(3)) - .list.get( - pl.col("SignalPeaks") - .list.eval(pl.element().list.get(2)) - .list.arg_max() - ) - .cast(pl.Int64, strict=False) - .alias("_charge") - ).with_columns( - pl.when(pl.col("_charge").is_not_null()) - .then(pl.format("z={}", pl.col("_charge"))) - .otherwise(pl.lit("")) - .alias("charge_label") - ) - long = long.select( - ["index", "mass_id", "mass", "intensity", "charge_label"] - ).sort(["index", "mass_id"]) - return long, "charge_label" - except Exception: # pragma: no cover - defensive fallback to plain cache - logger.info( - "deconv_spectrum charge labels unavailable (no SignalPeaks); " - "rendering without charge annotation" - ) - per_scan = _load_polars(file_manager, dataset_id, "deconv_spectrum") - return explode_spectrum_long(per_scan), None - - def _build_sequence_view( dataset_id, file_manager, cache_dir, cid, skey, state_manager ): @@ -576,6 +523,11 @@ def _build_sequence_view( filters={SCAN: "index"}, deconvolved=True, fixed_modifications=_fixed_mods_from_sequence(seq), + # Legacy FLASHDeconv SequenceViewInformation defaults to a/x/y ions + # selected (aIon/xIon/yIon true; b/c/z false). Without this the OI default + # (b/y) would show different initial ion annotations. (TnT instead drives + # ion_types from the FLASHTnT run settings.) + annotation_config={"ion_types": ["a", "x", "y"]}, cache_path=cache_dir, ) return lambda: sv(key=skey("sequence_view"), state_manager=state_manager) @@ -620,6 +572,28 @@ def _fixed_mods_from_sequence(seq: Dict[str, Any]) -> List[str]: return mods +def _reset_mass_on_scan_change(state_manager) -> None: + """Reset the selected mass to the new scan's first peak when the SCAN selection + changes via the scan table (legacy ``updateSelectedScan -> updateSelectedMass(0)``). + + A heatmap click sets scan AND mass together (via the bare store action, no + reset), so the reset is skipped when BOTH changed in the same run -- otherwise + it would clobber the heatmap's mass selection. The previous scan/mass are kept + in private state markers to detect the change. + """ + scan_index = state_manager.get_selection(SCAN) + mass_index = state_manager.get_selection(MASS) + if ( + scan_index != state_manager.get_selection("_last_scan") + and mass_index == state_manager.get_selection("_last_mass") + and mass_index not in (None, 0) + ): + state_manager.set_selection(MASS, 0) + mass_index = 0 + state_manager.set_selection("_last_scan", scan_index) + state_manager.set_selection("_last_mass", mass_index) + + def render_experiment( dataset_id: str, layout_rows: List[List[str]], @@ -643,9 +617,17 @@ def render_experiment( import streamlit as st from openms_insight import StateManager - # Per-experiment StateManager — distinct session_key keeps selections from - # leaking across side-by-side panels (Risks/watch-items in the plan). - state_manager = StateManager(session_key=f"oi_state_{panel_key}") + # Per-experiment StateManager — a distinct session_key keeps selections from + # leaking across side-by-side panels, AND scoping it to dataset_id starts each + # newly selected experiment from a clean selection (default-row-0) instead of + # inheriting the previous dataset's scan/mass -- reproducing the legacy + # render_grid reset on dataset change (src/render/render.py:80-82). + state_manager = StateManager(session_key=f"oi_state_{panel_key}_{dataset_id}") + + # Scan-table click resets the mass to the new scan's first peak (legacy scan + # table: updateSelectedScan -> updateSelectedMass(0)); a heatmap click keeps + # its own mass selection (see _reset_mass_on_scan_change). + _reset_mass_on_scan_change(state_manager) for row_index, row in enumerate(layout_rows): if not row: diff --git a/src/render_oi/tnt_viewer.py b/src/render_oi/tnt_viewer.py index 96085b1..a560d12 100644 --- a/src/render_oi/tnt_viewer.py +++ b/src/render_oi/tnt_viewer.py @@ -406,14 +406,23 @@ def _deconv_signal_peaks_long(per_scan: pl.LazyFrame) -> pl.LazyFrame: ] +# Columns sorted as text (legacy gave these no numeric sorter, so a header click +# sorts alphabetically). Only the protein table's free-text fields qualify; the +# tag table's TagSequence kept a numeric sorter in the legacy, so it is excluded. +_STRING_SORT_FIELDS = {"accession", "description"} + + def _tnt_column_definitions(present_fields, spec) -> List[Dict[str, Any]]: """Build Tabulator ``column_definitions`` from a (field,title,tooltip,float, dash) spec, emitting only fields present in the data. Numeric columns get a ``number`` sorter; float columns additionally get the - fixed-precision ``money`` formatter (legacy ``toFixed(4)``). ``dash`` columns - record ``_dashSentinel`` so the caller can null their -1.0 sentinel in the - data (the legacy "-1 -> '-'" formatter); the key is ignored by Tabulator. + fixed-precision ``money`` formatter (legacy ``toFixed(4)``). Free-text columns + (``_STRING_SORT_FIELDS``) get a ``string`` sorter so they sort alphabetically + like the legacy (a numeric sorter parses text as NaN and fails to order it). + ``dash`` columns record ``_dashSentinel`` so the caller can null their -1.0 + sentinel in the data (the legacy "-1 -> '-'" formatter); the key is ignored by + Tabulator. """ present = set(present_fields) defs: List[Dict[str, Any]] = [] @@ -424,7 +433,7 @@ def _tnt_column_definitions(present_fields, spec) -> List[Dict[str, Any]]: "title": title, "field": field, "headerTooltip": tooltip, - "sorter": "number", + "sorter": "string" if field in _STRING_SORT_FIELDS else "number", } if is_float: col.update(_FLOAT_FMT) @@ -681,7 +690,9 @@ def _render_tag_table(): "Decoy": {"label": "Decoy", "color": "red"}, }, title="Score Distribution", - x_label="Proteoform-level q-value", + # Legacy FDRPlotly x-axis label (the target/decoy density is over the + # identification score, matching the Deconv fdr_plot's "QScore"). + x_label="QScore", cache_path=cache_dir, ) return lambda: dp(key=skey("id_fdr_plot"), state_manager=state_manager) @@ -690,6 +701,7 @@ def _render_tag_table(): if comp_name in _HEATMAP_SPEC: title, cache_name = _HEATMAP_SPEC[comp_name] data = _load_polars(file_manager, dataset_id, cache_name) + is_deconv = "deconv" in comp_name hm = Heatmap( cache_id=cid(comp_name), data=data, @@ -697,8 +709,10 @@ def _render_tag_table(): y_column="mass", intensity_column="intensity", title=title, - x_label="Retention time", - y_label="Monoisotopic mass", + x_label="Retention Time", + # Raw heatmaps plot m/z; only deconvolved heatmaps plot neutral mass + # (mirrors the Deconv heatmap labelling). + y_label="Monoisotopic Mass" if is_deconv else "m/z", zoom_identifier=f"tnt_{comp_name}_zoom", cache_path=cache_dir, ) @@ -736,7 +750,12 @@ def render_experiment_tnt( import streamlit as st from openms_insight import StateManager - state_manager = StateManager(session_key=f"oi_tnt_state_{panel_key}") + # Scope the session_key to dataset_id so switching the selected experiment + # starts from a clean selection instead of inheriting the previous dataset's + # protein/tag/residue selection (legacy render_grid reset on dataset change, + # src/render/render.py:80-82). A distinct panel_key still isolates side-by-side + # panels. + state_manager = StateManager(session_key=f"oi_tnt_state_{panel_key}_{dataset_id}") # Resolve proteinIndex → deconvIndex BEFORE rendering downstream panels so # the spectrum/mass/sequence filters see the right scan on this run. diff --git a/tests/test_deconv_viewer_realdata.py b/tests/test_deconv_viewer_realdata.py index 1f457a7..742d5aa 100644 --- a/tests/test_deconv_viewer_realdata.py +++ b/tests/test_deconv_viewer_realdata.py @@ -251,44 +251,34 @@ def test_mass_table_column_definitions(fake_fm, monkeypatch): assert by_field[f]["formatter"] == "money", f -def test_deconv_spectrum_charge_annotation(fake_fm, monkeypatch, tmp_path): - """deconv_spectrum LinePlot carries a per-peak z=N charge annotation column, - and that column flows into the rendered plotData.""" +def test_deconv_spectrum_no_always_on_annotation(fake_fm, monkeypatch): + """The deconvolved (mass-axis) spectrum carries NO always-on per-peak label: + the legacy draws annotation boxes only for the SELECTED mass and gates z=N + charge labels to the m/z axis. It relies on the massIndex cross-link for the + gold selected-peak highlight, and its rows carry mass_id (the highlight key, + aligned with the mass table).""" import streamlit as st monkeypatch.setattr(st, "session_state", {}, raising=False) from openms_insight import StateManager - from src.render_oi.deconv_viewer import build_component + from src.render_oi.deconv_viewer import MASS, build_component captured = _capture_built_components(monkeypatch) sm = StateManager(session_key="oi_deconv_ann") build_component("deconv_spectrum", "example_fd", fake_fm, sm, key_prefix="p0")() lp, _ = captured["lineplot"] - assert lp._annotation_column == "charge_label" + # No always-on labels (the regression was a z=N label on every stick). + assert lp._annotation_column is None + # The mass-table <-> spectrum highlight cross-link is wired via massIndex. + assert lp._interactivity == {MASS: "mass_id"} - # The annotation column is present and populated with z=N labels in plotData. + # mass_id is present (the highlight lookup key) and rows are non-empty. from src.parse.long_format import explode_spectrum_long - from src.render_oi.deconv_viewer import _deconv_spectrum_with_charge - long, ann = _deconv_spectrum_with_charge( - fake_fm, "example_fd", explode_spectrum_long - ) - assert ann == "charge_label" - df = long.collect() - labels = df.filter(pl.col("charge_label") != "")["charge_label"].to_list() - assert labels, "expected at least one z=N charge label" - assert all(s.startswith("z=") for s in labels[:50]) - - # Peak/row counts must stay identical to the plain explode so the scan - # cross-link row-count contract is preserved. - plain = explode_spectrum_long(pl.scan_parquet(_FD / "deconv_spectrum.pq")).collect() - assert df.height == plain.height - assert ( - df.group_by("index").len().sort("index")["len"].to_list() - == plain.group_by("index").len().sort("index")["len"].to_list() - ) + df = explode_spectrum_long(pl.scan_parquet(_FD / "deconv_spectrum.pq")).collect() + assert "mass_id" in df.columns and df.height > 0 def test_anno_spectrum_has_no_charge_annotation(fake_fm, monkeypatch): @@ -309,8 +299,8 @@ def test_anno_spectrum_has_no_charge_annotation(fake_fm, monkeypatch): def test_3d_plot_title_reflects_selection(fake_fm, monkeypatch): - """3D_SN_plot title is 'Precursor Signals' with no mass selected and - 'Mass Signals' once a massIndex is selected.""" + """3D_SN_plot title is 'Precursor signals' with no mass selected and + 'Mass signals' once a massIndex is selected.""" import streamlit as st monkeypatch.setattr(st, "session_state", {}, raising=False) @@ -324,12 +314,41 @@ def test_3d_plot_title_reflects_selection(fake_fm, monkeypatch): sm.set_selection("scanIndex", 0) render() - assert captured["scatter3d"][0]._title == "Precursor Signals" + assert captured["scatter3d"][0]._title == "Precursor signals" sm.set_selection("massIndex", 0) render() - assert captured["scatter3d"][0]._title == "Mass Signals" + assert captured["scatter3d"][0]._title == "Mass signals" sm.set_selection("massIndex", None) render() - assert captured["scatter3d"][0]._title == "Precursor Signals" + assert captured["scatter3d"][0]._title == "Precursor signals" + + +def test_scan_change_resets_mass(monkeypatch): + """A scan-table click resets massIndex to the new scan's first peak (legacy + updateSelectedMass(0)); a heatmap click that changes scan AND mass together + keeps the heatmap's mass instead of being clobbered.""" + import streamlit as st + + monkeypatch.setattr(st, "session_state", {}, raising=False) + from openms_insight import StateManager + + from src.render_oi.deconv_viewer import MASS, SCAN, _reset_mass_on_scan_change + + sm = StateManager(session_key="oi_reset_test") + sm.set_selection(SCAN, 1) + sm.set_selection(MASS, 5) + _reset_mass_on_scan_change(sm) # first call only records the markers + assert sm.get_selection(MASS) == 5 + + # Scan-table click: scan changes, mass unchanged -> reset to first peak. + sm.set_selection(SCAN, 2) + _reset_mass_on_scan_change(sm) + assert sm.get_selection(MASS) == 0 + + # Heatmap click: scan AND mass change together -> keep the heatmap's mass. + sm.set_selection(SCAN, 3) + sm.set_selection(MASS, 9) + _reset_mass_on_scan_change(sm) + assert sm.get_selection(MASS) == 9 diff --git a/tests/test_tnt_viewer_realdata.py b/tests/test_tnt_viewer_realdata.py index 2ad30c9..4648fc1 100644 --- a/tests/test_tnt_viewer_realdata.py +++ b/tests/test_tnt_viewer_realdata.py @@ -177,6 +177,15 @@ def test_protein_table_column_definitions_and_sort(fake_fm, monkeypatch): assert args["goToFields"] == ["Scan", "accession"] assert args["interactivity"] == {"proteinIndex": "index"} assert all("_dashSentinel" not in c for c in args["columnDefinitions"]) + # Free-text columns sort as strings (alphabetical), matching legacy; numeric + # columns keep the number sorter. A number sorter on text fails to order it. + sorters = {c["field"]: c.get("sorter") for c in args["columnDefinitions"]} + for f in ("accession", "description"): + if f in sorters: + assert sorters[f] == "string", f"{f} should sort as string" + for f in ("Scan", "length", "Score"): + if f in sorters: + assert sorters[f] == "number", f"{f} should sort as number" def test_protein_best_per_spectrum_reduces_rows(fake_fm, monkeypatch):