From f62af090a44f95a44681abe2bd17aee8e98b8639 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 6 May 2026 10:31:11 +0200 Subject: [PATCH 1/2] Move knowledge of how data status works to db layer --- src/database/datasets.py | 19 +++++++++++-------- src/routers/openml/datasets.py | 12 +++++------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/database/datasets.py b/src/database/datasets.py index d6f91706..ba0edcad 100644 --- a/src/database/datasets.py +++ b/src/database/datasets.py @@ -14,7 +14,7 @@ DuplicatePrimaryKeyError, ForeignKeyConstraintError, ) -from schemas.datasets.openml import Feature +from schemas.datasets.openml import DatasetStatus, Feature async def get(id_: int, connection: AsyncConnection) -> Row | None: @@ -105,18 +105,21 @@ async def get_description( async def get_status(id_: int, connection: AsyncConnection) -> Row | None: """Get most recent status for the dataset.""" - row = await connection.execute( - text( - """ + row = ( + await connection.execute( + text( + """ SELECT * FROM dataset_status WHERE did = :dataset_id ORDER BY status_date DESC + LIMIT 1 """, - ), - parameters={"dataset_id": id_}, - ) - return row.first() + ), + parameters={"dataset_id": id_}, + ) + ).first() + return DatasetStatus(row.status) if row else DatasetStatus.IN_PREPARATION async def get_latest_processing_update(dataset_id: int, connection: AsyncConnection) -> Row | None: diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index 68d86aed..e14fa804 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -366,7 +366,7 @@ async def update_dataset_status( raise DatasetAdminOnlyError(msg) current_status = await database.datasets.get_status(dataset_id, expdb) - if current_status and current_status.status == status: + if current_status == status: msg = f"Illegal status transition, requested status {status} matches current status." raise DatasetStatusTransitionError(msg) @@ -376,14 +376,14 @@ async def update_dataset_status( # - in preparation => deactivated (add a row) # - active => deactivated (add a row) # - deactivated => active (delete a row) - if current_status is None or status == DatasetStatus.DEACTIVATED: + if current_status == DatasetStatus.IN_PREPARATION or status == DatasetStatus.DEACTIVATED: await database.datasets.update_status( dataset_id, status, user_id=user.user_id, connection=expdb, ) - elif current_status.status == DatasetStatus.DEACTIVATED: + elif current_status == DatasetStatus.DEACTIVATED: await database.datasets.remove_deactivated_status(dataset_id, expdb) else: msg = f"Unknown status transition: {current_status} -> {status}" @@ -392,7 +392,7 @@ async def update_dataset_status( logger.info( "Dataset {dataset_id} changed from {previous} to {current}", dataset_id=dataset_id, - previous=current_status.status if current_status else DatasetStatus.IN_PREPARATION, + previous=current_status, current=status, ) return {"dataset_id": dataset_id, "status": status} @@ -427,8 +427,6 @@ async def get_dataset( database.datasets.get_status(dataset_id, expdb_db), ) - status_ = DatasetStatus(status.status) if status else DatasetStatus.IN_PREPARATION - description_ = "" if description: description_ = description.description.replace("\r", "").strip() @@ -446,7 +444,7 @@ async def get_dataset( return DatasetMetadata( id=dataset.did, visibility=dataset.visibility, - status=status_, + status=status, name=dataset.name, licence=dataset.licence, version=dataset.version, From 0971055911c23560d75b0f78031e7f63ca733248 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 6 May 2026 11:03:27 +0200 Subject: [PATCH 2/2] fix minor oversights --- src/database/datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/database/datasets.py b/src/database/datasets.py index ba0edcad..9abfc14c 100644 --- a/src/database/datasets.py +++ b/src/database/datasets.py @@ -103,13 +103,13 @@ async def get_description( return row.first() -async def get_status(id_: int, connection: AsyncConnection) -> Row | None: +async def get_status(id_: int, connection: AsyncConnection) -> DatasetStatus: """Get most recent status for the dataset.""" row = ( await connection.execute( text( """ - SELECT * + SELECT status FROM dataset_status WHERE did = :dataset_id ORDER BY status_date DESC