From 3d8f188f7f7e4791f216f7b02ac79debbcb9383d Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 27 Aug 2025 08:41:04 +0200 Subject: [PATCH 1/9] update changelog and scripts --- CHANGELOG.md | 33 ++++++++++++ scripts/run_benchmark/render_results_local.sh | 54 +++++++++++++++++++ scripts/run_benchmark/run_full_seqeracloud.sh | 3 +- 3 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 scripts/run_benchmark/render_results_local.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index e69de29b..54aa1750 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -0,0 +1,33 @@ +# task_spatial_simulators 0.1.0 + +First release of the spatial simulator benchmark. + +Core task documentation and API: + - Component types: Process Dataset, Method, Metric + - File formats: Single-Cell Dataset, Spatial Dataset, Solution, Score + +Dataset processing components for fetching datasets from the SpatialSimBench figshare: + - Source: https://figshare.com/articles/dataset/SpatialSimBench_dataset/26054188 + - Transforms the h5ads into standardised components + +Simulation methods under `src/methods/`: + - `scdesign2` + - `scdesign3_nb` + - `scdesign3_poisson` + - `sparsim` + - `splatter` + - `srtsim` + - `synsim` + - `zinbwave` + +Control methods under `src/control_methods/`: + - `negative_normal` + - `negative_shuffle` + - `positive` + +Metrics under `src/metrics/`: + - `ks_statistic_gene_cell` + - `ks_statistic_sc_features/` + +Documentation: + - Check the `README.md` and `INSTRUCTIONS.md` for how to use and extend the benchmark. diff --git a/scripts/run_benchmark/render_results_local.sh b/scripts/run_benchmark/render_results_local.sh new file mode 100644 index 00000000..6ada4771 --- /dev/null +++ b/scripts/run_benchmark/render_results_local.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# fail on error +set -e + +# ensure we're in the root of the repo +REPO_ROOT=$(git rev-parse --show-toplevel) +cd "$REPO_ROOT" + +# set input and output directories +TASK=task_spatial_simulators +BASE_DIR="s3://openproblems-data/resources/$TASK/results" +OUTPUT_DIR="output/report" + +# find subdir in bucket with latest date +DATE=$(aws s3 ls $BASE_DIR --recursive | awk '{print $4}' | grep 'task_info.yaml' | sort -r | head -n 1 | sed 's#.*/run_\(.*\)/[^/]*$#\1#') + +INPUT_DIR="$BASE_DIR/run_$DATE" +TASK_STRIP_PREFIX=$(echo $TASK | sed 's/task_//') + +echo "Processing $DATE -> $OUTPUT_DIR" + + +# start the run +extra_filters=() +# extra_filters=( +# --datasets_exclude "cellxgene_census/hypomap;cellxgene_census/mouse_pancreas_atlas" +# --metrics_exclude "hvg_overlap" +# ) + +nextflow run openproblems-bio/openproblems \ + -r build/main \ + -main-script target/nextflow/reporting/process_task_results/main.nf \ + -profile docker \ + -resume \ + -latest \ + -c common/nextflow_helpers/labels_ci.config \ + --id "$TASK/run_$DATE" \ + --input_scores "$INPUT_DIR/score_uns.yaml" \ + --input_dataset_info "$INPUT_DIR/dataset_uns.yaml" \ + --input_method_configs "$INPUT_DIR/method_configs.yaml" \ + --input_metric_configs "$INPUT_DIR/metric_configs.yaml" \ + --input_trace "$INPUT_DIR/trace.txt" \ + --input_task_info "$INPUT_DIR/task_info.yaml" \ + --output_state '$id/state.yaml' \ + --output_combined '$id/combined_output.json' \ + --output_report '$id/report.html' \ + --output_dataset_info '$id/dataset_info.json' \ + --output_method_info '$id/method_info.json' \ + --output_metric_info '$id/metric_info.json' \ + --output_results '$id/results.json' \ + --output_quality_control '$id/quality_control.json' \ + --publish_dir "$OUTPUT_DIR" \ + "${extra_filters[@]}" diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh index 646adadf..a85a4bbe 100755 --- a/scripts/run_benchmark/run_full_seqeracloud.sh +++ b/scripts/run_benchmark/run_full_seqeracloud.sh @@ -21,11 +21,10 @@ publish_dir: "$publish_dir" HERE tw launch https://github.com/openproblems-bio/task_spatial_simulators.git \ - --revision build/main \ + --revision v0.1.0 \ --pull-latest \ --main-script target/nextflow/workflows/run_benchmark/main.nf \ --workspace 53907369739130 \ - --compute-env 5DwwhQoBi0knMSGcwThnlF \ --params-file /tmp/params.yaml \ --entry-name auto \ --config common/nextflow_helpers/labels_tw.config \ From 9a3e01d2c31b52d5f84b0c876457aa77cbd1581e Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 26 May 2026 10:51:06 +0200 Subject: [PATCH 2/9] update submodule --- common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common b/common index 80321bf1..876036f7 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit 80321bf1b5e44330c5ceadfc1434908bb58e2aff +Subproject commit 876036f71713cbd79285b108ab0a9a8238f2b5e1 From 2e70e43e4372ac233b19bb7afdf75303deb155c4 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 26 May 2026 10:52:09 +0200 Subject: [PATCH 3/9] ci force From 80f631fc682f92a9bf9dbc48a0e86f7deb874d5c Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 26 May 2026 11:05:15 +0200 Subject: [PATCH 4/9] update submodule --- _viash.yaml | 2 +- common | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/_viash.yaml b/_viash.yaml index 8776016d..0f495c48 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -1,4 +1,4 @@ -viash_version: 0.9.4 +viash_version: 0.9.7 name: task_spatial_simulators organization: openproblems-bio diff --git a/common b/common index 876036f7..f0816e17 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit 876036f71713cbd79285b108ab0a9a8238f2b5e1 +Subproject commit f0816e178a2b44749fdfb1a9cdfc76887dcf7462 From d9c9efa50ef227626f7347f6f3ff19faf3079138 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 26 May 2026 11:05:45 +0200 Subject: [PATCH 5/9] add instructions --- .../config-method.instructions.md | 131 ++++++++++++++++++ .../config-metric.instructions.md | 93 +++++++++++++ .../method-scripts-python.instructions.md | 97 +++++++++++++ .../method-scripts-r.instructions.md | 100 +++++++++++++ 4 files changed, 421 insertions(+) create mode 100644 .github/instructions/config-method.instructions.md create mode 100644 .github/instructions/config-metric.instructions.md create mode 100644 .github/instructions/method-scripts-python.instructions.md create mode 100644 .github/instructions/method-scripts-r.instructions.md diff --git a/.github/instructions/config-method.instructions.md b/.github/instructions/config-method.instructions.md new file mode 100644 index 00000000..e78c5b04 --- /dev/null +++ b/.github/instructions/config-method.instructions.md @@ -0,0 +1,131 @@ +--- +description: "Use when writing, fixing, or reviewing config.vsh.yaml files in src/methods/ or src/control_methods/. Covers required metadata, info fields, docker engine setup, nextflow runner labels, and how to verify components." +applyTo: "src/methods/**/config.vsh.yaml,src/control_methods/**/config.vsh.yaml" +--- +# Method & Control Method Config Guidelines + +## Structure Overview + +```yaml +__merge__: /src/api/comp_method.yaml # or comp_control_method.yaml +name: "my_method" # snake_case, unique +label: My Method # human-readable, used in tables +summary: "One sentence summary." # used in overview tables +description: | # multi-paragraph, used in docs + Longer description... +references: # omit for control methods + doi: + - 10.xxxx/xxxxx +links: # omit for control methods + repository: https://github.com/... + documentation: https://... +info: + variants: + my_method_default: + my_method_variant: + some_param: value +arguments: # only if method has extra params beyond --input/--output + - name: "--some_param" + type: integer + description: "..." + example: 100 # use example, NOT default + info: + test_default: 1 # override value used during viash test only +resources: + - type: r_script # or python_script + path: script.R # or script.py +engines: + - type: docker + image: openproblems/base_r:1 # see base images below + setup: + - type: r + packages: [package1, package2] +runners: + - type: executable + - type: nextflow + directives: + label: [midtime, highmem, midcpu] # adjust to actual needs +``` + +## Methods vs Control Methods + +| Field | Method | Control Method | +|---|---|---| +| `__merge__` | `/src/api/comp_method.yaml` | `/src/api/comp_control_method.yaml` | +| `references` | required | omit | +| `links` | recommended | omit | +| inputs | `--input` (spatial dataset) | `--input` (spatial dataset) | +| extra args | `--base` (domain/tissue, optional) | none | + +## Required Metadata Fields + +- `name`: unique, matches `[a-z][a-z0-9_]*` +- `label`: short human-readable name +- `summary`: one sentence +- `description`: one or more paragraphs +- `references.doi` (methods only): list of DOIs + +## info Section + +- `variants`: each key becomes a separate benchmark entry. Override any argument value by nesting it under the variant key. Every method needs at least one variant with the same name as the method. + +## Arguments + +- Do **not** set `default` on any argument — defaults belong to the library, not the config. Use `example` to document a typical value. +- Use `info.test_default` to override a parameter value **only during `viash test`** (not in benchmarks). This is useful to reduce epoch counts, disable slow quality checks, etc., so tests run quickly without affecting real benchmark results. +- Argument names use `--snake_case`. Viash exposes them in the script as `par['snake_case']` (Python) or `par$snake_case` (R). +- After adding, removing, or renaming any argument, regenerate the `## VIASH START` block in the script so the `par` dict stays in sync: + ```bash + viash config inject src/methods//config.vsh.yaml + ``` + +```yaml +arguments: + - name: --n_epochs + type: integer + description: "Number of training epochs." + example: 100 + info: + test_default: 1 # 1 epoch during testing for speed + - name: --flow_threshold + type: double + description: "Flow error threshold. Set to 0 to skip flow quality check." + example: 0.4 + info: + test_default: 0 # skip check during testing +``` + +## Base Docker Images + +| Image | Use for | +|---|---| +| `openproblems/base_python:1` | Python, CPU | +| `openproblems/base_r:1` | R, CPU | +| `openproblems/base_pytorch_nvidia:1` | PyTorch + NVIDIA GPU | +| `openproblems/base_tensorflow_nvidia:1` | TensorFlow + NVIDIA GPU | + +## Nextflow Runner Labels + +Set in `runners[type=nextflow].directives.label`. Pick one from each category: + +| Category | Options | +|---|---| +| Time | `lowtime`, `midtime`, `hightime` | +| Memory | `lowmem`, `midmem`, `highmem`, `veryhighmem` | +| CPU | `lowcpu`, `midcpu`, `highcpu` | +| GPU (optional) | `gpu`, `biggpu` | + +## Rebuilding the Docker Image + +After changing the `setup` section: +```bash +viash run src/methods//config.vsh.yaml -- ---setup cachedbuild +``` + +## Verification + +```bash +viash test src/methods//config.vsh.yaml +``` + +Both test scripts must succeed (`2 out of 2`). diff --git a/.github/instructions/config-metric.instructions.md b/.github/instructions/config-metric.instructions.md new file mode 100644 index 00000000..c4659566 --- /dev/null +++ b/.github/instructions/config-metric.instructions.md @@ -0,0 +1,93 @@ +--- +description: "Use when writing, fixing, or reviewing config.vsh.yaml files in src/metrics/. Covers required metadata, the info.metrics list structure, docker engine setup, nextflow runner labels, and how to verify components." +applyTo: "src/metrics/**/config.vsh.yaml" +--- +# Metric Config Guidelines + +## Structure Overview + +Metrics differ from methods: metadata (`label`, `summary`, `description`, `references`) lives inside the `info.metrics` list, not at the top level. A single component can expose multiple metric values. + +```yaml +__merge__: /src/api/comp_metric.yaml +name: "my_metric" # snake_case, unique component name +info: + metrics: + - name: my_metric_value1 # snake_case, unique metric name + label: My Metric Value 1 # human-readable, used in tables + summary: "One sentence summary." + description: "Longer description." + references: + doi: 10.xxxx/xxxxx + min: 0 + max: 1 + maximize: true # true if higher = better + - name: my_metric_value2 + label: My Metric Value 2 + summary: "..." + description: "..." + references: + doi: 10.xxxx/xxxxx + min: 0 + max: 1 + maximize: false +resources: + - type: python_script # or r_script + path: script.py # or script.R +engines: + - type: docker + image: openproblems/base_python:1 # see base images below + setup: + - type: python + packages: [scikit-learn] +runners: + - type: executable + - type: nextflow + directives: + label: [midtime, midmem, midcpu] +``` + +## Required Fields per Metric Entry + +Each entry in `info.metrics` must have: +- `name`: unique metric identifier, snake_case +- `label`: short human-readable name +- `summary`: one sentence +- `description`: full description +- `references.doi`: DOI(s) for the metric +- `min` / `max`: numeric range of possible values +- `maximize`: `true` if higher score = better performance + +## Base Docker Images + +| Image | Use for | +|---|---| +| `openproblems/base_python:1` | Python, CPU | +| `openproblems/base_r:1` | R, CPU | + +Metrics rarely need GPU images. + +## Nextflow Runner Labels + +Metrics are typically lightweight. Use conservative defaults: + +| Category | Options | +|---|---| +| Time | `lowtime`, `midtime`, `hightime` | +| Memory | `lowmem`, `midmem`, `highmem` | +| CPU | `lowcpu`, `midcpu`, `highcpu` | + +## Rebuilding the Docker Image + +After changing the `setup` section: +```bash +viash run src/metrics//config.vsh.yaml -- ---setup cachedbuild +``` + +## Verification + +```bash +viash test src/metrics//config.vsh.yaml +``` + +Both test scripts must succeed (`2 out of 2`). diff --git a/.github/instructions/method-scripts-python.instructions.md b/.github/instructions/method-scripts-python.instructions.md new file mode 100644 index 00000000..17163fcf --- /dev/null +++ b/.github/instructions/method-scripts-python.instructions.md @@ -0,0 +1,97 @@ +--- +description: "Use when writing, fixing, or reviewing method/metric script.py files in src/methods/, src/metrics/, or src/control_methods/. Covers script style, API compliance, and how to verify components." +applyTo: "src/methods/**/script.py,src/metrics/**/script.py,src/control_methods/**/script.py" +--- +# Method & Metric Script Guidelines (Python) + +## Core Principle + +`script.py` should represent **typical bioinformatician usage** of the tool with minimal modifications. Only adapt what is strictly necessary to: +1. Read inputs from the paths provided by `par` +2. Pass the right data structures to the method +3. Convert the method's output back into the expected output structures +4. Write outputs to `par['output']` + +Do **not** restructure the method's native API, add abstraction layers, or rewrite the algorithm logic. + +## Finding API Specs + +Input/output file formats are defined in `src/api/`. Key files: +- `file_dataset_sp.yaml` — spatial dataset input format (contains `layers['counts']`, `obs['row']`, `obs['col']`, `obs['spatial_cluster']`, etc.) +- `file_dataset_sc.yaml` — single-cell dataset input format (metrics only) +- `file_simulated_dataset.yaml` — expected output format for methods +- `file_score.yaml` — expected output format for metrics +- `comp_method.yaml`, `comp_control_method.yaml`, `comp_metric.yaml` — component argument specs + +Always check these before deciding what fields to read or write. + +## The `## VIASH START` / `## VIASH END` Block + +This block is **auto-generated** by viash from the component's `config.vsh.yaml` arguments. It is replaced at build/test time with a real CLI parser. Keep it in the script only as a local development convenience. + +- **Do not edit it manually** to add or remove parameters — edit `config.vsh.yaml` instead. +- After adding, removing, or renaming an argument in the config, regenerate the block: + ```bash + viash config inject src/methods//config.vsh.yaml + ``` +- Argument names in the config (`--my_param`) map directly to `par['my_param']` keys. + +## Common Patterns + +**Method: reading input:** +```python +import anndata as ad +input = ad.read_h5ad(par['input']) +``` + +**Method: writing simulated dataset output:** +```python +output = ad.AnnData( + layers={"counts": simulated_counts}, # integer matrix, cells x genes + obs=input.obs[["row", "col"]], + var=input.var, + uns={ + **input.uns, + "method_id": meta["name"], + }, +) +output.write_h5ad(par['output'], compression="gzip") +``` + +**Metric: reading inputs:** +```python +import anndata as ad +input_spatial_dataset = ad.read_h5ad(par['input_spatial_dataset']) +input_singlecell_dataset = ad.read_h5ad(par['input_singlecell_dataset']) +input_simulated_dataset = ad.read_h5ad(par['input_simulated_dataset']) +``` + +**Metric: writing score output:** +```python +output = ad.AnnData( + uns={ + "dataset_id": input_simulated_dataset.uns["dataset_id"], + "method_id": input_simulated_dataset.uns["method_id"], + "metric_ids": ["metric_name_1", "metric_name_2"], + "metric_values": [score1, score2], + }, +) +output.write_h5ad(par['output'], compression="gzip") +``` + +## Dependency Fixes + +If a library has a dependency conflict (e.g., incompatible with newer `anndata`, `numpy`, etc.), prefer replacing it with an alternative that provides the same model/algorithm natively rather than pinning transitive dependencies. + +Update `config.vsh.yaml` to remove the broken package from the `setup` block when replacing it. + +## Verification + +After any change to a method script or config, verify with: +```bash +viash test src/methods//config.vsh.yaml +# or +viash test src/metrics//config.vsh.yaml +``` + +Both test scripts must succeed (`2 out of 2`). diff --git a/.github/instructions/method-scripts-r.instructions.md b/.github/instructions/method-scripts-r.instructions.md new file mode 100644 index 00000000..0605889e --- /dev/null +++ b/.github/instructions/method-scripts-r.instructions.md @@ -0,0 +1,100 @@ +--- +description: "Use when writing, fixing, or reviewing method/metric script.R files in src/methods/, src/metrics/, or src/control_methods/. Covers script style, API compliance, and how to verify components." +applyTo: "src/methods/**/script.R,src/metrics/**/script.R,src/control_methods/**/script.R" +--- +# Method & Metric Script Guidelines (R) + +## Core Principle + +`script.R` should represent **typical bioinformatician usage** of the tool with minimal modifications. Only adapt what is strictly necessary to: +1. Read inputs from the paths provided by `par` +2. Pass the right data structures to the method +3. Convert the method's output back into the expected output structures +4. Write outputs to `par$output` + +Do **not** restructure the method's native API, add abstraction layers, or rewrite the algorithm logic. + +## Finding API Specs + +Input/output file formats are defined in `src/api/`. Key files: +- `file_dataset_sp.yaml` — spatial dataset input format (contains `layers$counts`, `obs$row`, `obs$col`, `obs$spatial_cluster`, etc.) +- `file_dataset_sc.yaml` — single-cell dataset input format (metrics only) +- `file_simulated_dataset.yaml` — expected output format for methods +- `file_score.yaml` — expected output format for metrics +- `comp_method.yaml`, `comp_control_method.yaml`, `comp_metric.yaml` — component argument specs + +Always check these before deciding what fields to read or write. + +## The `## VIASH START` / `## VIASH END` Block + +This block is **auto-generated** by viash from the component's `config.vsh.yaml` arguments. It is replaced at build/test time with a real CLI parser. Keep it in the script only as a local development convenience. + +- **Do not edit it manually** to add or remove parameters — edit `config.vsh.yaml` instead. +- After adding, removing, or renaming an argument in the config, regenerate the block: + ```bash + viash config inject src/methods//config.vsh.yaml + ``` +- Argument names in the config (`--my_param`) map directly to `par$my_param` keys. + +## Common Patterns + +**Method: reading input:** +```r +input <- anndata::read_h5ad(par$input) +``` + +**Method: writing simulated dataset output:** +```r +output <- anndata::AnnData( + layers = list( + counts = simulated_counts # integer matrix, cells x genes + ), + obs = input$obs[c("row", "col")], + var = input$var, + uns = c( + input$uns, + list( + method_id = meta$name + ) + ) +) +output$write_h5ad(par$output, compression = "gzip") +``` + +**Metric: reading inputs:** +```r +input_spatial_dataset <- anndata::read_h5ad(par[["input_spatial_dataset"]]) +input_singlecell_dataset <- anndata::read_h5ad(par[["input_singlecell_dataset"]]) +input_simulated_dataset <- anndata::read_h5ad(par[["input_simulated_dataset"]]) +``` + +**Metric: writing score output:** +```r +output <- anndata::AnnData( + uns = list( + dataset_id = input_simulated_dataset$uns[["dataset_id"]], + method_id = input_simulated_dataset$uns[["method_id"]], + metric_ids = c("metric_name_1", "metric_name_2"), + metric_values = c(score1, score2) + ), + shape = c(0L, 0L) +) +output$write_h5ad(par[["output"]], compression = "gzip") +``` + +## Dependency Fixes + +If a library has a dependency conflict (e.g., incompatible with a newer Bioconductor version, `anndata` R package, etc.), prefer replacing it with an alternative that provides the same model/algorithm natively rather than pinning transitive dependencies. + +Update `config.vsh.yaml` to remove the broken package from the `setup` block when replacing it. + +## Verification + +After any change to a method script or config, verify with: +```bash +viash test src/methods//config.vsh.yaml +# or +viash test src/metrics//config.vsh.yaml +``` + +Both test scripts must succeed (`2 out of 2`). From 36daefee7f29a3688800e21760e198697605a13d Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 26 May 2026 11:05:51 +0200 Subject: [PATCH 6/9] update readme --- README.md | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index cb0ad127..2786ad08 100644 --- a/README.md +++ b/README.md @@ -33,13 +33,13 @@ should consist of similar cell types from similar tissues. ## Authors & contributors -| name | roles | -|:------------------|:-------------------| -| Xiaoqi Liang | author, maintainer | -| Yue Cao | author | -| Jean Yang | author | -| Robrecht Cannoodt | contributor | -| Sai Nirmayi Yasa | contributor | +| Name | Roles | Orcid | Github | +|:------------------|:-------------------|:--------------------|:--------------| +| Xiaoqi Liang | author, maintainer | 0009-0004-9625-1441 | littlecabiria | +| Yue Cao | author | 0000-0002-2356-4031 | ycao6928 | +| Jean Yang | author | 0000-0002-5271-2603 | jeany21 | +| Robrecht Cannoodt | contributor | 0000-0003-3641-729X | rcannood | +| Sai Nirmayi Yasa | contributor | 0009-0003-6319-9803 | sainirmayi | ## API @@ -76,24 +76,10 @@ Arguments: | Name | Type | Description | |:---|:---|:---| -| `--input_sc` | `file` | NA. | -| `--input_sp` | `file` | NA. | +| `--input_sc` | `file` | . | +| `--input_sp` | `file` | . | | `--output_sc` | `file` | (*Output*) An unprocessed single-cell dataset as output by a dataset loader. Default: `$id/output_sc.h5ad`. | | `--output_sp` | `file` | (*Output*) An unprocessed spatial dataset as output by a dataset loader. Default: `$id/output_sp.h5ad`. | -| `--dataset_id` | `string` | NA. | -| `--dataset_name` | `string` | NA. | -| `--dataset_url_spatial` | `string` | (*Optional*) NA. | -| `--dataset_url_singlecell` | `string` | (*Optional*) NA. | -| `--dataset_reference` | `string` | (*Optional*) NA. | -| `--dataset_reference_spatial` | `string` | (*Optional*) NA. | -| `--dataset_reference_singlecell` | `string` | (*Optional*) NA. | -| `--dataset_summary_spatial` | `string` | NA. | -| `--dataset_summary_singlecell` | `string` | NA. | -| `--dataset_description_spatial` | `string` | NA. | -| `--dataset_description_singlecell` | `string` | NA. | -| `--dataset_organism` | `string` | NA. | -| `--dataset_assay_spatial` | `string` | NA. | -| `--dataset_assay_singlecell` | `string` | NA. | @@ -168,8 +154,6 @@ Arguments: | Name | Type | Description | |:---|:---|:---| | `--input` | `file` | (*Optional*) An unprocessed spatial dataset as output by a dataset loader. | -| `--base` | `string` | (*Optional*) NA. Default: `domain`. | -| `--base` | `string` | (*Optional*) NA. Default: `domain`. | | `--output` | `file` | (*Output*) The solution for the test data. | @@ -186,4 +170,3 @@ The solution for the test data Example file: `resources_test/spatialsimbench_mobnew/simulated_dataset_processed.h5ad` - From b01c6e5e899e8f958545c273e9b0d9c298f4cd8b Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 26 May 2026 11:07:44 +0200 Subject: [PATCH 7/9] Fix splatter --- src/methods/splatter/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/methods/splatter/config.vsh.yaml b/src/methods/splatter/config.vsh.yaml index 16abce03..845d6310 100644 --- a/src/methods/splatter/config.vsh.yaml +++ b/src/methods/splatter/config.vsh.yaml @@ -8,7 +8,7 @@ description: | references: doi: 10.1186/s13059-017-1305-0 links: - documentation: https://bioconductor.org/packages/devel/bioc/vignettes/splatter/inst/doc/splatter.html + documentation: https://bioconductor.org/packages/release/bioc/vignettes/splatter/inst/doc/splatter.html repository: https://github.com/Oshlack/splatter resources: From 48df60b9bf280c263002a1e216c3708819a7712c Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 26 May 2026 11:09:56 +0200 Subject: [PATCH 8/9] update script --- scripts/run_benchmark/render_results_local.sh | 46 +------------------ 1 file changed, 2 insertions(+), 44 deletions(-) diff --git a/scripts/run_benchmark/render_results_local.sh b/scripts/run_benchmark/render_results_local.sh index 6ada4771..5fc3f6b9 100644 --- a/scripts/run_benchmark/render_results_local.sh +++ b/scripts/run_benchmark/render_results_local.sh @@ -7,48 +7,6 @@ set -e REPO_ROOT=$(git rev-parse --show-toplevel) cd "$REPO_ROOT" -# set input and output directories -TASK=task_spatial_simulators -BASE_DIR="s3://openproblems-data/resources/$TASK/results" -OUTPUT_DIR="output/report" +publish_dir="resources/results/...some_run_id..." -# find subdir in bucket with latest date -DATE=$(aws s3 ls $BASE_DIR --recursive | awk '{print $4}' | grep 'task_info.yaml' | sort -r | head -n 1 | sed 's#.*/run_\(.*\)/[^/]*$#\1#') - -INPUT_DIR="$BASE_DIR/run_$DATE" -TASK_STRIP_PREFIX=$(echo $TASK | sed 's/task_//') - -echo "Processing $DATE -> $OUTPUT_DIR" - - -# start the run -extra_filters=() -# extra_filters=( -# --datasets_exclude "cellxgene_census/hypomap;cellxgene_census/mouse_pancreas_atlas" -# --metrics_exclude "hvg_overlap" -# ) - -nextflow run openproblems-bio/openproblems \ - -r build/main \ - -main-script target/nextflow/reporting/process_task_results/main.nf \ - -profile docker \ - -resume \ - -latest \ - -c common/nextflow_helpers/labels_ci.config \ - --id "$TASK/run_$DATE" \ - --input_scores "$INPUT_DIR/score_uns.yaml" \ - --input_dataset_info "$INPUT_DIR/dataset_uns.yaml" \ - --input_method_configs "$INPUT_DIR/method_configs.yaml" \ - --input_metric_configs "$INPUT_DIR/metric_configs.yaml" \ - --input_trace "$INPUT_DIR/trace.txt" \ - --input_task_info "$INPUT_DIR/task_info.yaml" \ - --output_state '$id/state.yaml' \ - --output_combined '$id/combined_output.json' \ - --output_report '$id/report.html' \ - --output_dataset_info '$id/dataset_info.json' \ - --output_method_info '$id/method_info.json' \ - --output_metric_info '$id/metric_info.json' \ - --output_results '$id/results.json' \ - --output_quality_control '$id/quality_control.json' \ - --publish_dir "$OUTPUT_DIR" \ - "${extra_filters[@]}" +common/scripts/render_results_report local "$publish_dir" --output "$publish_dir/report/" From 6ae3d2c72538111ed565cdd8bee7355af2251960 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 26 May 2026 11:11:17 +0200 Subject: [PATCH 9/9] update readme --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 2786ad08..438aa0c5 100644 --- a/README.md +++ b/README.md @@ -33,13 +33,13 @@ should consist of similar cell types from similar tissues. ## Authors & contributors -| Name | Roles | Orcid | Github | -|:------------------|:-------------------|:--------------------|:--------------| -| Xiaoqi Liang | author, maintainer | 0009-0004-9625-1441 | littlecabiria | -| Yue Cao | author | 0000-0002-2356-4031 | ycao6928 | -| Jean Yang | author | 0000-0002-5271-2603 | jeany21 | -| Robrecht Cannoodt | contributor | 0000-0003-3641-729X | rcannood | -| Sai Nirmayi Yasa | contributor | 0009-0003-6319-9803 | sainirmayi | +| Name | Roles | Github | Orcid | +|:------------------|:-------------------|:--------------|:--------------------| +| Xiaoqi Liang | author | littlecabiria | 0009-0004-9625-1441 | +| Yue Cao | author, maintainer | ycao6928 | 0000-0002-2356-4031 | +| Jean Yang | author | jeany21 | 0000-0002-5271-2603 | +| Robrecht Cannoodt | contributor | rcannood | 0000-0003-3641-729X | +| Sai Nirmayi Yasa | contributor | sainirmayi | 0009-0003-6319-9803 | ## API