From f62af090a44f95a44681abe2bd17aee8e98b8639 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 6 May 2026 10:31:11 +0200
Subject: [PATCH 1/2] Move knowledge of how data status works to db layer
---
src/database/datasets.py | 19 +++++++++++--------
src/routers/openml/datasets.py | 12 +++++-------
2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/src/database/datasets.py b/src/database/datasets.py
index d6f91706..ba0edcad 100644
--- a/src/database/datasets.py
+++ b/src/database/datasets.py
@@ -14,7 +14,7 @@
DuplicatePrimaryKeyError,
ForeignKeyConstraintError,
)
-from schemas.datasets.openml import Feature
+from schemas.datasets.openml import DatasetStatus, Feature
async def get(id_: int, connection: AsyncConnection) -> Row | None:
@@ -105,18 +105,21 @@ async def get_description(
async def get_status(id_: int, connection: AsyncConnection) -> Row | None:
"""Get most recent status for the dataset."""
- row = await connection.execute(
- text(
- """
+ row = (
+ await connection.execute(
+ text(
+ """
SELECT *
FROM dataset_status
WHERE did = :dataset_id
ORDER BY status_date DESC
+ LIMIT 1
""",
- ),
- parameters={"dataset_id": id_},
- )
- return row.first()
+ ),
+ parameters={"dataset_id": id_},
+ )
+ ).first()
+ return DatasetStatus(row.status) if row else DatasetStatus.IN_PREPARATION
async def get_latest_processing_update(dataset_id: int, connection: AsyncConnection) -> Row | None:
diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py
index 68d86aed..e14fa804 100644
--- a/src/routers/openml/datasets.py
+++ b/src/routers/openml/datasets.py
@@ -366,7 +366,7 @@ async def update_dataset_status(
raise DatasetAdminOnlyError(msg)
current_status = await database.datasets.get_status(dataset_id, expdb)
- if current_status and current_status.status == status:
+ if current_status == status:
msg = f"Illegal status transition, requested status {status} matches current status."
raise DatasetStatusTransitionError(msg)
@@ -376,14 +376,14 @@ async def update_dataset_status(
# - in preparation => deactivated (add a row)
# - active => deactivated (add a row)
# - deactivated => active (delete a row)
- if current_status is None or status == DatasetStatus.DEACTIVATED:
+ if current_status == DatasetStatus.IN_PREPARATION or status == DatasetStatus.DEACTIVATED:
await database.datasets.update_status(
dataset_id,
status,
user_id=user.user_id,
connection=expdb,
)
- elif current_status.status == DatasetStatus.DEACTIVATED:
+ elif current_status == DatasetStatus.DEACTIVATED:
await database.datasets.remove_deactivated_status(dataset_id, expdb)
else:
msg = f"Unknown status transition: {current_status} -> {status}"
@@ -392,7 +392,7 @@ async def update_dataset_status(
logger.info(
"Dataset {dataset_id} changed from {previous} to {current}",
dataset_id=dataset_id,
- previous=current_status.status if current_status else DatasetStatus.IN_PREPARATION,
+ previous=current_status,
current=status,
)
return {"dataset_id": dataset_id, "status": status}
@@ -427,8 +427,6 @@ async def get_dataset(
database.datasets.get_status(dataset_id, expdb_db),
)
- status_ = DatasetStatus(status.status) if status else DatasetStatus.IN_PREPARATION
-
description_ = ""
if description:
description_ = description.description.replace("\r", "").strip()
@@ -446,7 +444,7 @@ async def get_dataset(
return DatasetMetadata(
id=dataset.did,
visibility=dataset.visibility,
- status=status_,
+ status=status,
name=dataset.name,
licence=dataset.licence,
version=dataset.version,
From 0971055911c23560d75b0f78031e7f63ca733248 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 6 May 2026 11:03:27 +0200
Subject: [PATCH 2/2] fix minor oversights
---
src/database/datasets.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/database/datasets.py b/src/database/datasets.py
index ba0edcad..9abfc14c 100644
--- a/src/database/datasets.py
+++ b/src/database/datasets.py
@@ -103,13 +103,13 @@ async def get_description(
return row.first()
-async def get_status(id_: int, connection: AsyncConnection) -> Row | None:
+async def get_status(id_: int, connection: AsyncConnection) -> DatasetStatus:
"""Get most recent status for the dataset."""
row = (
await connection.execute(
text(
"""
- SELECT *
+ SELECT status
FROM dataset_status
WHERE did = :dataset_id
ORDER BY status_date DESC