From 9c5fb8fde6780d4c186b1a043072621c16c47a97 Mon Sep 17 00:00:00 2001 From: anon Date: Thu, 21 May 2026 14:43:25 +0200 Subject: [PATCH 1/2] Skip wasted cs-level table copy in render_shapes/render_labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_render_shapes` and `_render_labels` were calling `filter_by_coordinate_system(filter_tables=True)`, which copied the table's sparse `.X` via `_filter_table_by_element_names`. The filtered table was then immediately discarded: - Shapes: overwritten by `join_spatialelement_table` (per-element). - Labels: re-sliced by region, then `_set_color_source_vec` calls `match_table_to_element` (per-element). So the cs-level copy was pure waste — multi-second for the consolidated multi-sample Visium tables reported in the issue. Switch both call sites to `filter_tables=False`. Side effect that surfaced: `_filter_table_by_element_names` also mutated `sdata`'s table obs in place via `table.obs = pd.DataFrame(...)`, triggering AnnData's implicit int64→str index coercion. Dropping the cs filter exposed AnnData's strict `_normalize_index` rejection of non-RangeIndex int64 obs indices in the EntityID-collision workaround path (test_render_shapes_color_with_conflicting_index_name). Extended the existing workaround to also swap to a RangeIndex (saved/restored under try/finally). --- src/spatialdata_plot/pl/render.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/spatialdata_plot/pl/render.py b/src/spatialdata_plot/pl/render.py index eb39824d..c39a7064 100644 --- a/src/spatialdata_plot/pl/render.py +++ b/src/spatialdata_plot/pl/render.py @@ -411,9 +411,13 @@ def _render_shapes( _check_obs_var_shadow(sdata, element, col_for_color, render_params.table_name) + # filter_tables=False: when table_name is set, the cs-level table copy is + # redundant — join_spatialelement_table below produces a per-element table + # that overwrites sdata_filt[table_name]. Skipping the upstream copy avoids + # materializing the full sparse .X for large consolidated tables. sdata_filt = sdata.filter_by_coordinate_system( coordinate_system=coordinate_system, - filter_tables=bool(render_params.table_name), + filter_tables=False, ) table_name = render_params.table_name @@ -425,18 +429,27 @@ def _render_shapes( # Workaround for upstream spatialdata bug (scverse/spatialdata#1099): # join_spatialelement_table calls table.obs.reset_index() which fails when - # the obs index name matches an existing column (e.g. "EntityID" in Merfish data). - # Temporarily drop the conflicting index name for the join, then restore it. + # the obs index name matches an existing column (e.g. "EntityID" in Merfish + # data). When that collision is present, the obs index may also be a + # non-RangeIndex of int dtype, which AnnData's `_normalize_index` rejects + # when the join indexes back into the table. Temporarily swap to a clean + # RangeIndex / drop the conflicting name; restore on exit. _obs = sdata[table_name].obs _saved_index_name = _obs.index.name + _saved_index: pd.Index | None = None if _saved_index_name is not None and _saved_index_name in _obs.columns: _obs.index.name = None + if not isinstance(_obs.index, pd.RangeIndex): + _saved_index = _obs.index + _obs.index = pd.RangeIndex(len(_obs)) try: element_dict, joined_table = join_spatialelement_table( sdata, spatial_element_names=element, table_name=table_name, how="inner" ) finally: + if _saved_index is not None: + _obs.index = _saved_index _obs.index.name = _saved_index_name sdata_filt[element] = shapes = element_dict[element] joined_table.uns["spatialdata_attrs"]["region"] = ( @@ -1748,9 +1761,12 @@ def _render_labels( _check_obs_var_shadow(sdata, element, col_for_color, table_name) + # filter_tables=False: see _render_shapes — match_table_to_element below + # (via _set_color_source_vec → get_values) already filters to the single + # element, so the upstream cs-level sparse copy is wasted work. sdata_filt = sdata.filter_by_coordinate_system( coordinate_system=coordinate_system, - filter_tables=bool(table_name), + filter_tables=False, ) label = sdata_filt.labels[element] From bb4c2e290c401205e22640ff8c01e772d508c6db Mon Sep 17 00:00:00 2001 From: anon Date: Thu, 21 May 2026 14:53:44 +0200 Subject: [PATCH 2/2] Trim comments, flatten nested if in EntityID workaround Address simplify-review nits: cs-filter comments were over-explaining what the code did; collapse to single-line WHY. Flatten the nested collision/Range-index check into an elif cascade. --- src/spatialdata_plot/pl/render.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/spatialdata_plot/pl/render.py b/src/spatialdata_plot/pl/render.py index c39a7064..1c9b97eb 100644 --- a/src/spatialdata_plot/pl/render.py +++ b/src/spatialdata_plot/pl/render.py @@ -411,10 +411,8 @@ def _render_shapes( _check_obs_var_shadow(sdata, element, col_for_color, render_params.table_name) - # filter_tables=False: when table_name is set, the cs-level table copy is - # redundant — join_spatialelement_table below produces a per-element table - # that overwrites sdata_filt[table_name]. Skipping the upstream copy avoids - # materializing the full sparse .X for large consolidated tables. + # filter_tables=False: join_spatialelement_table below overwrites the table, + # so the cs-level sparse copy is wasted work. sdata_filt = sdata.filter_by_coordinate_system( coordinate_system=coordinate_system, filter_tables=False, @@ -437,11 +435,12 @@ def _render_shapes( _obs = sdata[table_name].obs _saved_index_name = _obs.index.name _saved_index: pd.Index | None = None - if _saved_index_name is not None and _saved_index_name in _obs.columns: + _name_collides = _saved_index_name is not None and _saved_index_name in _obs.columns + if _name_collides and not isinstance(_obs.index, pd.RangeIndex): + _saved_index = _obs.index + _obs.index = pd.RangeIndex(len(_obs)) + elif _name_collides: _obs.index.name = None - if not isinstance(_obs.index, pd.RangeIndex): - _saved_index = _obs.index - _obs.index = pd.RangeIndex(len(_obs)) try: element_dict, joined_table = join_spatialelement_table( @@ -1761,9 +1760,8 @@ def _render_labels( _check_obs_var_shadow(sdata, element, col_for_color, table_name) - # filter_tables=False: see _render_shapes — match_table_to_element below - # (via _set_color_source_vec → get_values) already filters to the single - # element, so the upstream cs-level sparse copy is wasted work. + # filter_tables=False: match_table_to_element below already filters per + # element, so the cs-level sparse copy is wasted work. sdata_filt = sdata.filter_by_coordinate_system( coordinate_system=coordinate_system, filter_tables=False,