From 4ca7e4fc1b063932e0a602b6643da3bb3ef175f2 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 6 May 2026 12:14:24 +0200
Subject: [PATCH 01/13] (no-verify) move execution out of module import
---
src/config.py | 58 ++++++++++++++++++++++++++++++++-------------------
src/main.py | 2 +-
2 files changed, 38 insertions(+), 22 deletions(-)
diff --git a/src/config.py b/src/config.py
index ffc4fb89..a25cb955 100644
--- a/src/config.py
+++ b/src/config.py
@@ -18,22 +18,7 @@
EXPDB_DB_USERNAME_ENV = "OPENML_DATABASES_EXPDB_USERNAME"
EXPDB_DB_PASSWORD_ENV = "OPENML_DATABASES_EXPDB_PASSWORD" # noqa: S105 # not a password
-_config_directory = Path(os.getenv(CONFIG_DIRECTORY_ENV, Path(__file__).parent))
-_config_directory = _config_directory.expanduser().absolute()
-_config_file = Path(os.getenv(CONFIG_FILE_ENV, _config_directory / "config.toml"))
-_config_file = _config_file.expanduser().absolute()
-_dotenv_file = Path(os.getenv(DOTENV_FILE_ENV, _config_directory / ".env"))
-_dotenv_file = _dotenv_file.expanduser().absolute()
-
-
-logger.info(
- "Determined configuration sources.",
- configuration_directory=_config_directory,
- configuration_file=_config_file,
- dotenv_file=_dotenv_file,
-)
-
-load_dotenv(dotenv_path=_dotenv_file)
+_config_file: Path | None = None
def _apply_defaults_to_siblings(configuration: TomlTable) -> TomlTable:
@@ -51,12 +36,12 @@ def _load_configuration(file: Path) -> TomlTable:
def load_routing_configuration(file: Path = _config_file) -> TomlTable:
- return typing.cast("TomlTable", _load_configuration(file)["routing"])
+ return typing.cast("TomlTable", load_configuration(configuration_file=file)["routing"])
@functools.cache
def load_database_configuration(file: Path = _config_file) -> TomlTable:
- configuration = _load_configuration(file)
+ configuration = load_configuration(configuration_file=file)
database_configuration = _apply_defaults_to_siblings(
configuration["databases"],
)
@@ -79,6 +64,37 @@ def load_database_configuration(file: Path = _config_file) -> TomlTable:
return database_configuration
-def load_configuration(file: Path | None = None) -> TomlTable:
- file = file or _config_file
- return tomllib.loads(file.read_text())
+def load_configuration(
+ dotenv_file: Path | None = None, configuration_file: Path | None = None
+) -> None:
+ """Load configuration from file and environment variables."""
+ _config_directory = Path(os.getenv(CONFIG_DIRECTORY_ENV, Path(__file__).parent))
+ _config_directory = _config_directory.expanduser().absolute()
+ logger.info(
+ "Determined configuration directory to be {configuration_directory}.",
+ configuration_directory=_config_directory,
+ )
+
+ if not dotenv_file:
+ dotenv_filepath = os.getenv(DOTENV_FILE_ENV, _config_directory / ".env")
+ dotenv_file = Path(dotenv_filepath).expanduser().absolute()
+
+ logger.info(
+ "Determined dotenv file path to be {dotenv_file}.",
+ dotenv_file=dotenv_file,
+ )
+ load_dotenv(dotenv_file)
+
+ if not configuration_file:
+ config_filepath = os.getenv(CONFIG_FILE_ENV, _config_directory / "config.toml")
+ configuration_file = Path(config_filepath).expanduser().absolute()
+
+ logger.info(
+ "Determined config file path to be {config_file}.",
+ config_file=configuration_file,
+ )
+
+ global _config_file
+ _config_file = configuration_file
+
+ return tomllib.loads(configuration_file.read_text())
diff --git a/src/main.py b/src/main.py
index 46cd79cf..68f8ac7f 100644
--- a/src/main.py
+++ b/src/main.py
@@ -80,7 +80,7 @@ def create_api(configuration_file: Path | None = None) -> FastAPI:
setup_sink = logger.add(sys.stderr, serialize=True)
setup_log_sinks(configuration_file)
- fastapi_kwargs = load_configuration(configuration_file)["fastapi"]
+ fastapi_kwargs = load_configuration(configuration_file=configuration_file)["fastapi"]
logger.info("Creating FastAPI App", lifespan=lifespan, **fastapi_kwargs)
app = FastAPI(**fastapi_kwargs, lifespan=lifespan)
From 79d3938914a4d34183984dd7a1d57682e39a795f Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 6 May 2026 15:45:20 +0200
Subject: [PATCH 02/13] Define classes for the configuration sections
---
src/config.py | 48 +++++++++++++++++++++++++++++++-
src/config.toml | 3 ++
src/routers/mldcat_ap/dataset.py | 2 +-
3 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/src/config.py b/src/config.py
index a25cb955..332eca44 100644
--- a/src/config.py
+++ b/src/config.py
@@ -3,9 +3,11 @@
import tomllib
import typing
from pathlib import Path
+from typing import Literal
from dotenv import load_dotenv
from loguru import logger
+from pydantic import AnyUrl, BaseModel, Field
TomlTable = dict[str, typing.Any]
@@ -21,6 +23,49 @@
_config_file: Path | None = None
+class DatabaseConfiguration(BaseModel, frozen=True):
+ """Settings for one database connection."""
+
+ host: str = Field(default="database", description="Database server host name")
+ port: int = Field(default=3306, gt=0)
+ database: str = Field(description="Database name")
+ user: str = Field(default="root")
+ password: str = Field(default="ok")
+ driver_name: str = Field(
+ default="mysql+aiomysql",
+ description="SQLAlchemy `dialect` and `driver`: https://docs.sqlalchemy.org/en/20/dialects/index.html",
+ )
+
+
+class DevelopmentConfiguration(BaseModel, frozen=True):
+ """Settings for development or test specific features."""
+
+ allow_test_api_keys: bool = Field(frozen=True)
+
+
+class RoutingConfiguration(BaseModel, frozen=True):
+ root_path: str = Field(default="", description="Path prefix under which the service is hosted.")
+ minio_url: AnyUrl = Field(description="URL to the MinIO server or service")
+ server_url: AnyUrl = Field(
+ description="URL to this server (excluding the path prefix of `fastapi.root_path`).",
+ )
+
+
+class LoggingConfiguration(BaseModel, frozen=True):
+ """Configuration for a single log sink.
+
+ You can add any arguments that `loguru.logger.add` allows,
+ the `sink` will be used as first positional argument.
+ See also: https://loguru.readthedocs.io/en/stable/api/logger.html
+ """
+
+ sink: str
+ level: Literal["TRACE", "DEBUG", "INFO", "SUCCESS", "WARNING", "ERROR"]
+ rotation: str = Field(description="Set rotation policy by date or file size.")
+ retention: str = Field(description="Timespan after which automatic cleanup occurs.")
+ compression: str = Field(default="gz")
+
+
def _apply_defaults_to_siblings(configuration: TomlTable) -> TomlTable:
defaults = configuration["defaults"]
return {
@@ -65,7 +110,8 @@ def load_database_configuration(file: Path = _config_file) -> TomlTable:
def load_configuration(
- dotenv_file: Path | None = None, configuration_file: Path | None = None
+ dotenv_file: Path | None = None,
+ configuration_file: Path | None = None,
) -> None:
"""Load configuration from file and environment variables."""
_config_directory = Path(os.getenv(CONFIG_DIRECTORY_ENV, Path(__file__).parent))
diff --git a/src/config.toml b/src/config.toml
index 384067d7..664c45df 100644
--- a/src/config.toml
+++ b/src/config.toml
@@ -9,6 +9,9 @@ allow_test_api_keys=true
# You can add any arguments to `loguru.logger.add`,
# the `sink` variable will be used as first positional argument.
# https://loguru.readthedocs.io/en/stable/api/logger.html
+
+# Any number of logging.NAME configurations can be added.
+# NAME is for reference only, it has no meaning otherwise.
[logging.develop]
sink="develop.log"
# One of loguru levels: TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR
diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py
index 2749664f..8da808a0 100644
--- a/src/routers/mldcat_ap/dataset.py
+++ b/src/routers/mldcat_ap/dataset.py
@@ -33,7 +33,7 @@
router = APIRouter(prefix="/mldcat_ap", tags=["MLDCAT-AP"])
_configuration = config.load_configuration()
_server_url = (
- f"{_configuration['arff_base_url']}{_configuration['fastapi']['root_path']}{router.prefix}"
+ f"{_configuration['server_url']}{_configuration['fastapi']['root_path']}{router.prefix}"
)
From 1ba61fa854b711605cb77ca3f798e9b40cdf0920 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 6 May 2026 16:22:17 +0200
Subject: [PATCH 03/13] Move some configuration options around, start using
Config classes
---
src/config.py | 59 +++++++++++++-------------------
src/config.toml | 14 +-------
src/database/setup.py | 14 +++++---
src/main.py | 6 ++--
src/routers/mldcat_ap/dataset.py | 4 +--
tests/config.test.toml | 13 +------
tests/config_test.py | 27 ++-------------
7 files changed, 41 insertions(+), 96 deletions(-)
diff --git a/src/config.py b/src/config.py
index 332eca44..5a3a1ff5 100644
--- a/src/config.py
+++ b/src/config.py
@@ -15,10 +15,6 @@
CONFIG_FILE_ENV = "OPENML_REST_API_CONFIG_FILE"
DOTENV_FILE_ENV = "OPENML_REST_API_DOTENV_FILE"
-OPENML_DB_USERNAME_ENV = "OPENML_DATABASES_OPENML_USERNAME"
-OPENML_DB_PASSWORD_ENV = "OPENML_DATABASES_OPENML_PASSWORD" # noqa: S105 # not a password
-EXPDB_DB_USERNAME_ENV = "OPENML_DATABASES_EXPDB_USERNAME"
-EXPDB_DB_PASSWORD_ENV = "OPENML_DATABASES_EXPDB_PASSWORD" # noqa: S105 # not a password
_config_file: Path | None = None
@@ -29,9 +25,13 @@ class DatabaseConfiguration(BaseModel, frozen=True):
host: str = Field(default="database", description="Database server host name")
port: int = Field(default=3306, gt=0)
database: str = Field(description="Database name")
- user: str = Field(default="root")
+ username: str = Field(default="root")
password: str = Field(default="ok")
- driver_name: str = Field(
+ echo: bool = Field(
+ default=False,
+ description="https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.create_engine.params.echo",
+ )
+ drivername: str = Field(
default="mysql+aiomysql",
description="SQLAlchemy `dialect` and `driver`: https://docs.sqlalchemy.org/en/20/dialects/index.html",
)
@@ -66,15 +66,6 @@ class LoggingConfiguration(BaseModel, frozen=True):
compression: str = Field(default="gz")
-def _apply_defaults_to_siblings(configuration: TomlTable) -> TomlTable:
- defaults = configuration["defaults"]
- return {
- subtable: (defaults | overrides) if isinstance(overrides, dict) else overrides
- for subtable, overrides in configuration.items()
- if subtable != "defaults"
- }
-
-
@functools.cache
def _load_configuration(file: Path) -> TomlTable:
return tomllib.loads(file.read_text())
@@ -85,28 +76,24 @@ def load_routing_configuration(file: Path = _config_file) -> TomlTable:
@functools.cache
-def load_database_configuration(file: Path = _config_file) -> TomlTable:
+def load_database_configuration(file: Path = _config_file) -> dict[str, DatabaseConfiguration]:
configuration = load_configuration(configuration_file=file)
- database_configuration = _apply_defaults_to_siblings(
- configuration["databases"],
- )
- database_configuration["openml"]["username"] = os.environ.get(
- OPENML_DB_USERNAME_ENV,
- "root",
- )
- database_configuration["openml"]["password"] = os.environ.get(
- OPENML_DB_PASSWORD_ENV,
- "ok",
- )
- database_configuration["expdb"]["username"] = os.environ.get(
- EXPDB_DB_USERNAME_ENV,
- "root",
- )
- database_configuration["expdb"]["password"] = os.environ.get(
- EXPDB_DB_PASSWORD_ENV,
- "ok",
- )
- return database_configuration
+
+ database_configurations = {}
+ for db_alias, db_configuration in configuration["databases"].items():
+ credentials = {
+ "username": os.environ.get(
+ f"OPENML_DATABASES_{db_alias.upper()}_USERNAME",
+ "root",
+ ),
+ "password": os.environ.get(
+ f"OPENML_DATABASES_{db_alias.upper()}_PASSWORD",
+ "ok",
+ ),
+ }
+ database_configurations[db_alias] = DatabaseConfiguration(**db_configuration, **credentials)
+
+ return database_configurations
def load_configuration(
diff --git a/src/config.toml b/src/config.toml
index 664c45df..78f42be7 100644
--- a/src/config.toml
+++ b/src/config.toml
@@ -1,6 +1,3 @@
-arff_base_url="https://test.openml.org"
-minio_base_url="https://openml1.win.tue.nl"
-
[development]
allow_test_api_keys=true
@@ -16,21 +13,11 @@ allow_test_api_keys=true
sink="develop.log"
# One of loguru levels: TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR
level="DEBUG"
-# Automatically create a new file by date or file size
rotation="50 MB"
# Retention specifies the timespan after which automatic cleanup occurs.
retention="1 day"
compression="gz"
-[fastapi]
-root_path=""
-
-[databases.defaults]
-host="database"
-port="3306"
-# SQLAlchemy `dialect` and `driver`: https://docs.sqlalchemy.org/en/20/dialects/index.html
-drivername="mysql+aiomysql"
-
[databases.expdb]
database="openml_expdb"
@@ -38,5 +25,6 @@ database="openml_expdb"
database="openml"
[routing]
+root_path=""
minio_url="http://minio:9000/"
server_url="http://php-api:80/"
diff --git a/src/database/setup.py b/src/database/setup.py
index ca877138..decc6619 100644
--- a/src/database/setup.py
+++ b/src/database/setup.py
@@ -9,13 +9,17 @@
def _create_engine(database_name: str) -> AsyncEngine:
database_configuration = load_database_configuration()
- db_config = dict(database_configuration[database_name])
- echo = db_config.pop("echo", False)
-
- db_url = URL.create(**db_config)
+ db_config = database_configuration[database_name]
+ db_url = URL.create(
+ drivername=db_config.drivername,
+ username=db_config.username,
+ password=db_config.password,
+ host=db_config.host,
+ database=db_config.database,
+ )
return create_async_engine(
db_url,
- echo=echo,
+ echo=db_config.echo,
pool_recycle=3600,
)
diff --git a/src/main.py b/src/main.py
index 68f8ac7f..8e8c40f1 100644
--- a/src/main.py
+++ b/src/main.py
@@ -80,9 +80,9 @@ def create_api(configuration_file: Path | None = None) -> FastAPI:
setup_sink = logger.add(sys.stderr, serialize=True)
setup_log_sinks(configuration_file)
- fastapi_kwargs = load_configuration(configuration_file=configuration_file)["fastapi"]
- logger.info("Creating FastAPI App", lifespan=lifespan, **fastapi_kwargs)
- app = FastAPI(**fastapi_kwargs, lifespan=lifespan)
+ root_path = load_configuration(configuration_file=configuration_file)["routing"]["root_path"]
+ logger.info("Creating FastAPI App", lifespan=lifespan, root_path=root_path)
+ app = FastAPI(lifespan=lifespan, root_path=root_path)
logger.info("Setting up middleware and exception handlers.")
# Order matters! Each added middleware wraps the previous, creating a stack.
diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py
index 8da808a0..3dd72de2 100644
--- a/src/routers/mldcat_ap/dataset.py
+++ b/src/routers/mldcat_ap/dataset.py
@@ -32,9 +32,7 @@
router = APIRouter(prefix="/mldcat_ap", tags=["MLDCAT-AP"])
_configuration = config.load_configuration()
-_server_url = (
- f"{_configuration['server_url']}{_configuration['fastapi']['root_path']}{router.prefix}"
-)
+_server_url = f"{_configuration['routing']['server_url']}{_configuration['routing']['root_path']}{router.prefix}"
@router.get(
diff --git a/tests/config.test.toml b/tests/config.test.toml
index 6942c904..f94d431b 100644
--- a/tests/config.test.toml
+++ b/tests/config.test.toml
@@ -1,6 +1,3 @@
-arff_base_url="https://test.openml.org"
-minio_base_url="https://openml1.win.tue.nl"
-
[development]
allow_test_api_keys=true
@@ -8,15 +5,6 @@ allow_test_api_keys=true
sink="sys.stderr"
level="DEBUG"
-[fastapi]
-root_path=""
-
-[databases.defaults]
-host="database"
-port="3306"
-# SQLAlchemy `dialect` and `driver`: https://docs.sqlalchemy.org/en/20/dialects/index.html
-drivername="mysql+aiomysql"
-
[databases.expdb]
database="openml_expdb"
@@ -24,5 +12,6 @@ database="openml_expdb"
database="openml"
[routing]
+root_path= ""
minio_url="http://minio:9000/"
server_url="http://php-api:80/"
diff --git a/tests/config_test.py b/tests/config_test.py
index 3218f802..8c01688e 100644
--- a/tests/config_test.py
+++ b/tests/config_test.py
@@ -2,35 +2,14 @@
from pathlib import Path
from unittest import mock
-from config import _apply_defaults_to_siblings, load_database_configuration
-
-
-def test_apply_defaults_to_siblings_applies_defaults() -> None:
- input_ = {"defaults": {1: 1}, "other": {}}
- expected = {"other": {1: 1}}
- output = _apply_defaults_to_siblings(input_)
- assert output == expected
-
-
-def test_apply_defaults_to_siblings_does_not_override() -> None:
- input_ = {"defaults": {1: 1}, "other": {1: 2}}
- expected = {"other": {1: 2}}
- output = _apply_defaults_to_siblings(input_)
- assert output == expected
-
-
-def test_apply_defaults_to_siblings_ignores_nontables() -> None:
- input_ = {"defaults": {1: 1}, "other": {1: 2}, "not-a-table": 3}
- expected = {"other": {1: 2}, "not-a-table": 3}
- output = _apply_defaults_to_siblings(input_)
- assert output == expected
+from config import load_database_configuration
def test_load_configuration_adds_environment_variables(default_configuration_file: Path) -> None:
database_configuration = load_database_configuration(default_configuration_file)
- assert database_configuration["openml"]["username"] == "root"
+ assert database_configuration["openml"].username == "root"
load_database_configuration.cache_clear()
with mock.patch.dict(os.environ, {"OPENML_DATABASES_OPENML_USERNAME": "foo"}):
database_configuration = load_database_configuration(default_configuration_file)
- assert database_configuration["openml"]["username"] == "foo"
+ assert database_configuration["openml"].username == "foo"
From f1d6ca0fdecef8fbcc890f5fd3b52521a47726df Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 6 May 2026 18:01:20 +0200
Subject: [PATCH 04/13] Use configuration types internally
---
src/config.py | 95 +++++++++++++++++++++++---------
src/core/formatting.py | 6 +-
src/core/logging.py | 19 +++----
src/database/setup.py | 10 ++--
src/database/users.py | 4 +-
src/main.py | 7 ++-
src/routers/mldcat_ap/dataset.py | 6 +-
src/routers/openml/tasks.py | 5 +-
tests/config_test.py | 22 +++++---
9 files changed, 110 insertions(+), 64 deletions(-)
diff --git a/src/config.py b/src/config.py
index 5a3a1ff5..8611a1c3 100644
--- a/src/config.py
+++ b/src/config.py
@@ -1,9 +1,18 @@
+"""Configuration logic and schema definitions.
+
+If the configuration should use a non-default configuration file path
+or environment variable file path, then `load_set_configuration` should be
+called explicitly to provide those.
+
+Otherwise, access the configuration with the `get_config` method.
+"""
+
import functools
import os
import tomllib
import typing
from pathlib import Path
-from typing import Literal
+from typing import Literal, cast
from dotenv import load_dotenv
from loguru import logger
@@ -16,7 +25,23 @@
DOTENV_FILE_ENV = "OPENML_REST_API_DOTENV_FILE"
-_config_file: Path | None = None
+_config: Configuration | None = None
+
+
+@functools.cache
+def get_config() -> Configuration:
+ if _config is None:
+ load_set_configuration()
+ # load_set_configuration sets the `_config` variable
+ return cast("Configuration", _config)
+
+
+class Configuration(BaseModel, frozen=True):
+ openml_database: DatabaseConfiguration
+ expdb_database: DatabaseConfiguration
+ development: DevelopmentConfiguration
+ routing: RoutingConfiguration
+ logging: list[LoggingConfiguration]
class DatabaseConfiguration(BaseModel, frozen=True):
@@ -61,26 +86,26 @@ class LoggingConfiguration(BaseModel, frozen=True):
sink: str
level: Literal["TRACE", "DEBUG", "INFO", "SUCCESS", "WARNING", "ERROR"]
- rotation: str = Field(description="Set rotation policy by date or file size.")
- retention: str = Field(description="Timespan after which automatic cleanup occurs.")
- compression: str = Field(default="gz")
-
-
-@functools.cache
-def _load_configuration(file: Path) -> TomlTable:
- return tomllib.loads(file.read_text())
-
-
-def load_routing_configuration(file: Path = _config_file) -> TomlTable:
- return typing.cast("TomlTable", load_configuration(configuration_file=file)["routing"])
-
+ rotation: str | None = Field(
+ default=None,
+ description="Set rotation policy by date or file size.",
+ )
+ retention: str | None = Field(
+ default=None,
+ description="Timespan after which automatic cleanup occurs.",
+ )
+ compression: str | None = Field(default="gz")
+ # Logs provided variables as JSON
+ serialize: bool = Field(default=True)
+ # Decouples log calls from I/O and makes it multiprocessing safe.
+ enqueue: bool = Field(default=True)
-@functools.cache
-def load_database_configuration(file: Path = _config_file) -> dict[str, DatabaseConfiguration]:
- configuration = load_configuration(configuration_file=file)
+def _load_database_configuration(
+ configurations: dict[str, dict[str, str]],
+) -> dict[str, DatabaseConfiguration]:
database_configurations = {}
- for db_alias, db_configuration in configuration["databases"].items():
+ for db_alias, db_configuration in configurations.items():
credentials = {
"username": os.environ.get(
f"OPENML_DATABASES_{db_alias.upper()}_USERNAME",
@@ -96,11 +121,23 @@ def load_database_configuration(file: Path = _config_file) -> dict[str, Database
return database_configurations
-def load_configuration(
+def load_set_configuration(
dotenv_file: Path | None = None,
configuration_file: Path | None = None,
) -> None:
- """Load configuration from file and environment variables."""
+ """Load the configuration from provided paths and use it as default for future lookups."""
+ global _config # noqa: PLW0603
+ _config = parse_configuration(dotenv_file, configuration_file)
+
+
+def parse_configuration(
+ dotenv_file: Path | None = None,
+ configuration_file: Path | None = None,
+) -> Configuration:
+ """Load configuration from file and environment variables.
+
+ The parsed configuration is returned but not used by default for other calls in this module.
+ """
_config_directory = Path(os.getenv(CONFIG_DIRECTORY_ENV, Path(__file__).parent))
_config_directory = _config_directory.expanduser().absolute()
logger.info(
@@ -127,7 +164,15 @@ def load_configuration(
config_file=configuration_file,
)
- global _config_file
- _config_file = configuration_file
-
- return tomllib.loads(configuration_file.read_text())
+ config = tomllib.loads(configuration_file.read_text())
+ db_configurations = _load_database_configuration(config["databases"])
+ return Configuration(
+ routing=RoutingConfiguration(**config["routing"]),
+ logging=[
+ LoggingConfiguration(**sink_configuration)
+ for sink_configuration in config["logging"].values()
+ ],
+ openml_database=db_configurations["openml"],
+ expdb_database=db_configurations["expdb"],
+ development=DevelopmentConfiguration(**config["development"]),
+ )
diff --git a/src/core/formatting.py b/src/core/formatting.py
index 406659fa..faf6d423 100644
--- a/src/core/formatting.py
+++ b/src/core/formatting.py
@@ -1,7 +1,7 @@
import html
from typing import TYPE_CHECKING
-from config import load_routing_configuration
+from config import get_config
from schemas.datasets.openml import DatasetFileFormat
if TYPE_CHECKING:
@@ -21,14 +21,14 @@ def _format_parquet_url(dataset: Row) -> str | None:
if dataset.format.lower() != DatasetFileFormat.ARFF:
return None
- minio_base_url = load_routing_configuration()["minio_url"]
+ minio_base_url = get_config().routing.minio_url
ten_thousands_prefix = f"{dataset.did // 10_000:04d}"
padded_id = f"{dataset.did:04d}"
return f"{minio_base_url}datasets/{ten_thousands_prefix}/{padded_id}/dataset_{dataset.did}.pq"
def _format_dataset_url(dataset: Row) -> str:
- base_url = load_routing_configuration()["server_url"]
+ base_url = get_config().routing.server_url
filename = f"{html.escape(dataset.name)}.{dataset.format.lower()}"
return f"{base_url}data/v1/download/{dataset.file_id}/{filename}"
diff --git a/src/core/logging.py b/src/core/logging.py
index 6546f714..7f01cb4e 100644
--- a/src/core/logging.py
+++ b/src/core/logging.py
@@ -4,31 +4,26 @@
import time
import uuid
from collections.abc import Awaitable, Callable
-from pathlib import Path
from typing import TYPE_CHECKING
from loguru import logger
-from config import load_configuration
+from config import LoggingConfiguration
if TYPE_CHECKING:
from starlette.requests import Request
from starlette.responses import Response
-def setup_log_sinks(configuration_file: Path | None = None) -> None:
+def setup_log_sinks(*configurations: LoggingConfiguration) -> None:
"""Configure loguru based on app configuration."""
- configuration = load_configuration(configuration_file)
- for nickname, sink_configuration in configuration.get("logging", {}).items():
- logger.info("Configuring sink", nickname=nickname, **sink_configuration)
- sink = sink_configuration.pop("sink")
+ for sink_configuration in configurations:
+ conf = sink_configuration.model_dump()
+ logger.info("Configuring sink", **conf)
+ sink = conf.pop("sink")
if sink == "sys.stderr":
sink = sys.stderr
- # Logs the additionally provided data as JSON.
- sink_configuration.setdefault("serialize", True)
- # Decouples log calls from I/O and makes it multiprocessing safe.
- sink_configuration.setdefault("enqueue", True)
- logger.add(sink, **sink_configuration)
+ logger.add(sink, **conf)
async def add_request_context_to_log(
diff --git a/src/database/setup.py b/src/database/setup.py
index decc6619..c136bf37 100644
--- a/src/database/setup.py
+++ b/src/database/setup.py
@@ -1,15 +1,13 @@
from sqlalchemy.engine import URL
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
-from config import load_database_configuration
+from config import DatabaseConfiguration, get_config
_user_engine = None
_expdb_engine = None
-def _create_engine(database_name: str) -> AsyncEngine:
- database_configuration = load_database_configuration()
- db_config = database_configuration[database_name]
+def _create_engine(db_config: DatabaseConfiguration) -> AsyncEngine:
db_url = URL.create(
drivername=db_config.drivername,
username=db_config.username,
@@ -27,14 +25,14 @@ def _create_engine(database_name: str) -> AsyncEngine:
def user_database() -> AsyncEngine:
global _user_engine # noqa: PLW0603
if _user_engine is None:
- _user_engine = _create_engine("openml")
+ _user_engine = _create_engine(get_config().openml_database)
return _user_engine
def expdb_database() -> AsyncEngine:
global _expdb_engine # noqa: PLW0603
if _expdb_engine is None:
- _expdb_engine = _create_engine("expdb")
+ _expdb_engine = _create_engine(get_config().expdb_database)
return _expdb_engine
diff --git a/src/database/users.py b/src/database/users.py
index 8a812d69..20e67748 100644
--- a/src/database/users.py
+++ b/src/database/users.py
@@ -5,7 +5,7 @@
from pydantic import StringConstraints
from sqlalchemy import text
-from config import load_configuration
+from config import get_config
if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncConnection
@@ -13,7 +13,7 @@
# If `allow_test_api_keys` is set, the key may also be one of `normaluser`,
# `normaluser2`, or `abc` (admin).
api_key_pattern = r"^[0-9a-fA-F]{32}$"
-if load_configuration().get("development", {}).get("allow_test_api_keys"):
+if get_config().development.allow_test_api_keys:
api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$"
APIKey = Annotated[
diff --git a/src/main.py b/src/main.py
index 8e8c40f1..9c0f18be 100644
--- a/src/main.py
+++ b/src/main.py
@@ -10,7 +10,7 @@
from fastapi.exceptions import RequestValidationError
from loguru import logger
-from config import load_configuration
+from config import get_config, load_set_configuration
from core.errors import (
ProblemDetailError,
problem_detail_exception_handler,
@@ -78,9 +78,10 @@ def create_api(configuration_file: Path | None = None) -> FastAPI:
# Default logging configuration so we have logs during setup
logger.remove()
setup_sink = logger.add(sys.stderr, serialize=True)
- setup_log_sinks(configuration_file)
+ load_set_configuration(configuration_file=configuration_file)
+ setup_log_sinks(*get_config().logging)
- root_path = load_configuration(configuration_file=configuration_file)["routing"]["root_path"]
+ root_path = get_config().routing.root_path
logger.info("Creating FastAPI App", lifespan=lifespan, root_path=root_path)
app = FastAPI(lifespan=lifespan, root_path=root_path)
diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py
index 3dd72de2..5778fcd5 100644
--- a/src/routers/mldcat_ap/dataset.py
+++ b/src/routers/mldcat_ap/dataset.py
@@ -31,8 +31,10 @@
from sqlalchemy.ext.asyncio import AsyncConnection
router = APIRouter(prefix="/mldcat_ap", tags=["MLDCAT-AP"])
-_configuration = config.load_configuration()
-_server_url = f"{_configuration['routing']['server_url']}{_configuration['routing']['root_path']}{router.prefix}"
+_routing_configuration = config.get_config().routing
+_server_url = (
+ f"{_routing_configuration.server_url}{_routing_configuration.root_path}{router.prefix}"
+)
@router.get(
diff --git a/src/routers/openml/tasks.py b/src/routers/openml/tasks.py
index 6627d79f..411ea5b5 100644
--- a/src/routers/openml/tasks.py
+++ b/src/routers/openml/tasks.py
@@ -8,9 +8,9 @@
from fastapi import APIRouter, Body, Depends
from sqlalchemy import bindparam, text
-import config
import database.datasets
import database.tasks
+from config import get_config
from core.errors import InternalError, NoResultsError, TaskNotFoundError
from routers.dependencies import Pagination, expdb_connection
from routers.types import (
@@ -165,7 +165,8 @@ async def _fill_json_template( # noqa: C901
# I believe that the operations below are always part of string output, so
# we don't need to be careful to avoid losing typedness
template = template.replace("[TASK:id]", str(task.task_id))
- server_url = config.load_routing_configuration()["server_url"]
+ url = get_config().routing.server_url
+ server_url = f"{url.scheme}://{url.host}:{url.port}/"
return template.replace("[CONSTANT:base_url]", server_url)
diff --git a/tests/config_test.py b/tests/config_test.py
index 8c01688e..8d3a336d 100644
--- a/tests/config_test.py
+++ b/tests/config_test.py
@@ -1,15 +1,19 @@
import os
-from pathlib import Path
from unittest import mock
-from config import load_database_configuration
+from config import _load_database_configuration
-def test_load_configuration_adds_environment_variables(default_configuration_file: Path) -> None:
- database_configuration = load_database_configuration(default_configuration_file)
- assert database_configuration["openml"].username == "root"
+def test_load_configuration_adds_environment_variables() -> None:
+ _db_alias = "openml"
- load_database_configuration.cache_clear()
- with mock.patch.dict(os.environ, {"OPENML_DATABASES_OPENML_USERNAME": "foo"}):
- database_configuration = load_database_configuration(default_configuration_file)
- assert database_configuration["openml"].username == "foo"
+ _fake_config = {
+ _db_alias: {"database": "openml"},
+ }
+ database_configuration = _load_database_configuration(_fake_config)
+ assert database_configuration[_db_alias].username == "root"
+
+ _env_var_name = f"OPENML_DATABASES_{_db_alias.upper()}_USERNAME"
+ with mock.patch.dict(os.environ, {_env_var_name: "foo"}):
+ database_configuration = _load_database_configuration(_fake_config)
+ assert database_configuration[_db_alias].username == "foo"
From 270a942dbf83bbe1219bb1b4c2f67060b1f94f80 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 7 May 2026 11:41:13 +0200
Subject: [PATCH 05/13] Allow setting config directly without parsing
---
src/config.py | 42 ++++++++++++++++++++++++------------------
src/main.py | 13 +++++++++----
tests/config.test.toml | 17 -----------------
tests/conftest.py | 23 +++++++++++++++++------
4 files changed, 50 insertions(+), 45 deletions(-)
delete mode 100644 tests/config.test.toml
diff --git a/src/config.py b/src/config.py
index 8611a1c3..6af80fb8 100644
--- a/src/config.py
+++ b/src/config.py
@@ -1,10 +1,20 @@
"""Configuration logic and schema definitions.
-If the configuration should use a non-default configuration file path
-or environment variable file path, then `load_set_configuration` should be
-called explicitly to provide those.
-
-Otherwise, access the configuration with the `get_config` method.
+The `get_config` function provides access to the most recently loaded configuration.
+A default configuration is loaded if none is explicitly set.
+
+Use `set_config` to use a different configuration.
+To parse a configuration from a file and environment variables, use `parse_configuration`.
+Example of loading a configuration with a custom TOML and .env file:
+
+```
+config = parse_config(
+ dotenv_file=Path("path/to/.env"),
+ configuration_file=Path("path/to/config.toml")
+)
+set_config(config)
+```
+and then consequent calls to `get_config` will return that configuration.
"""
import functools
@@ -31,11 +41,16 @@
@functools.cache
def get_config() -> Configuration:
if _config is None:
- load_set_configuration()
- # load_set_configuration sets the `_config` variable
+ config = parse_config()
+ set_config(config)
return cast("Configuration", _config)
+def set_config(configuration: Configuration) -> None:
+ global _config # noqa: PLW0603
+ _config = configuration
+
+
class Configuration(BaseModel, frozen=True):
openml_database: DatabaseConfiguration
expdb_database: DatabaseConfiguration
@@ -65,7 +80,7 @@ class DatabaseConfiguration(BaseModel, frozen=True):
class DevelopmentConfiguration(BaseModel, frozen=True):
"""Settings for development or test specific features."""
- allow_test_api_keys: bool = Field(frozen=True)
+ allow_test_api_keys: bool = Field(default=False)
class RoutingConfiguration(BaseModel, frozen=True):
@@ -121,16 +136,7 @@ def _load_database_configuration(
return database_configurations
-def load_set_configuration(
- dotenv_file: Path | None = None,
- configuration_file: Path | None = None,
-) -> None:
- """Load the configuration from provided paths and use it as default for future lookups."""
- global _config # noqa: PLW0603
- _config = parse_configuration(dotenv_file, configuration_file)
-
-
-def parse_configuration(
+def parse_config(
dotenv_file: Path | None = None,
configuration_file: Path | None = None,
) -> Configuration:
diff --git a/src/main.py b/src/main.py
index 9c0f18be..fa5ea54f 100644
--- a/src/main.py
+++ b/src/main.py
@@ -3,14 +3,18 @@
import sys
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
-from pathlib import Path
import uvicorn
from fastapi import FastAPI
from fastapi.exceptions import RequestValidationError
from loguru import logger
-from config import get_config, load_set_configuration
+from config import (
+ Configuration,
+ get_config,
+ parse_config,
+ set_config,
+)
from core.errors import (
ProblemDetailError,
problem_detail_exception_handler,
@@ -74,11 +78,12 @@ def _parse_args() -> argparse.Namespace:
return parser.parse_args()
-def create_api(configuration_file: Path | None = None) -> FastAPI:
+def create_api(configuration: Configuration | None = None) -> FastAPI:
# Default logging configuration so we have logs during setup
logger.remove()
setup_sink = logger.add(sys.stderr, serialize=True)
- load_set_configuration(configuration_file=configuration_file)
+ config = configuration or parse_config()
+ set_config(config)
setup_log_sinks(*get_config().logging)
root_path = get_config().routing.root_path
diff --git a/tests/config.test.toml b/tests/config.test.toml
deleted file mode 100644
index f94d431b..00000000
--- a/tests/config.test.toml
+++ /dev/null
@@ -1,17 +0,0 @@
-[development]
-allow_test_api_keys=true
-
-[logging.develop]
-sink="sys.stderr"
-level="DEBUG"
-
-[databases.expdb]
-database="openml_expdb"
-
-[databases.openml]
-database="openml"
-
-[routing]
-root_path= ""
-minio_url="http://minio:9000/"
-server_url="http://php-api:80/"
diff --git a/tests/conftest.py b/tests/conftest.py
index 01ac8c82..23a52955 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -12,6 +12,13 @@
from asgi_lifespan import LifespanManager
from sqlalchemy import text
+from config import (
+ Configuration,
+ DatabaseConfiguration,
+ DevelopmentConfiguration,
+ LoggingConfiguration,
+ RoutingConfiguration,
+)
from database.setup import expdb_database, user_database
from main import create_api
from routers.dependencies import expdb_connection, userdb_connection
@@ -78,7 +85,16 @@ async def php_api() -> AsyncIterator[httpx.AsyncClient]:
@pytest.fixture(scope="session")
async def app() -> AsyncIterator[FastAPI]:
- _app = create_api(Path(__file__).parent / "config.test.toml")
+ config = Configuration(
+ openml_database=DatabaseConfiguration(database="openml"),
+ expdb_database=DatabaseConfiguration(database="openml_expdb"),
+ development=DevelopmentConfiguration(allow_test_api_keys=True),
+ routing=RoutingConfiguration(
+ minio_url="http://minio:9000", server_url="http://php-api:80/"
+ ),
+ logging=[LoggingConfiguration(sink="sys.stderr", level="DEBUG")],
+ )
+ _app = create_api(config)
async with LifespanManager(_app):
yield _app
@@ -123,11 +139,6 @@ def dataset_130() -> Iterator[dict[str, Any]]:
yield json.load(dataset_file)
-@pytest.fixture
-def default_configuration_file() -> Path:
- return Path().parent.parent / "src" / "config.toml"
-
-
class Flow(NamedTuple):
"""To be replaced by an actual ORM class."""
From 43f5661c7dfcff9b0f29890c3b152f83ad01b7fc Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 7 May 2026 11:43:42 +0200
Subject: [PATCH 06/13] Do not cache get_config
---
src/config.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/src/config.py b/src/config.py
index 6af80fb8..45044d32 100644
--- a/src/config.py
+++ b/src/config.py
@@ -17,7 +17,6 @@
and then consequent calls to `get_config` will return that configuration.
"""
-import functools
import os
import tomllib
import typing
@@ -38,7 +37,6 @@
_config: Configuration | None = None
-@functools.cache
def get_config() -> Configuration:
if _config is None:
config = parse_config()
From 5425cef2e92176eca846d341ff0bbd7b4e61e1e8 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 7 May 2026 11:44:18 +0200
Subject: [PATCH 07/13] Include port when creating engine
---
src/database/setup.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/database/setup.py b/src/database/setup.py
index c136bf37..f02f379d 100644
--- a/src/database/setup.py
+++ b/src/database/setup.py
@@ -13,6 +13,7 @@ def _create_engine(db_config: DatabaseConfiguration) -> AsyncEngine:
username=db_config.username,
password=db_config.password,
host=db_config.host,
+ port=db_config.port,
database=db_config.database,
)
return create_async_engine(
From 993667cbbc5b2f24e5561170ffc16632ff8d9609 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 7 May 2026 11:52:39 +0200
Subject: [PATCH 08/13] Explain why this caching mechanism is used
---
src/config.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/config.py b/src/config.py
index 45044d32..84602ea2 100644
--- a/src/config.py
+++ b/src/config.py
@@ -37,6 +37,9 @@
_config: Configuration | None = None
+# The reason we use a module variable instead of functools.cache
+# is that this method allows a custom configuration to be set
+# through `set_config` and subsequently loaded through `get_config`.
def get_config() -> Configuration:
if _config is None:
config = parse_config()
From b90981caca628e8f62900aaf99ff4c5a8bb59f55 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 7 May 2026 11:58:36 +0200
Subject: [PATCH 09/13] restructure db configuration loading
---
src/config.py | 38 +++++++++++++++++---------------------
1 file changed, 17 insertions(+), 21 deletions(-)
diff --git a/src/config.py b/src/config.py
index 84602ea2..0d411161 100644
--- a/src/config.py
+++ b/src/config.py
@@ -117,24 +117,17 @@ class LoggingConfiguration(BaseModel, frozen=True):
enqueue: bool = Field(default=True)
-def _load_database_configuration(
- configurations: dict[str, dict[str, str]],
-) -> dict[str, DatabaseConfiguration]:
- database_configurations = {}
- for db_alias, db_configuration in configurations.items():
- credentials = {
- "username": os.environ.get(
- f"OPENML_DATABASES_{db_alias.upper()}_USERNAME",
- "root",
- ),
- "password": os.environ.get(
- f"OPENML_DATABASES_{db_alias.upper()}_PASSWORD",
- "ok",
- ),
- }
- database_configurations[db_alias] = DatabaseConfiguration(**db_configuration, **credentials)
-
- return database_configurations
+def _db_env_credentials(alias: str) -> dict[str, str]:
+ return {
+ "username": os.environ.get(
+ f"OPENML_DATABASES_{alias.upper()}_USERNAME",
+ "root",
+ ),
+ "password": os.environ.get(
+ f"OPENML_DATABASES_{alias.upper()}_PASSWORD",
+ "ok",
+ ),
+ }
def parse_config(
@@ -172,14 +165,17 @@ def parse_config(
)
config = tomllib.loads(configuration_file.read_text())
- db_configurations = _load_database_configuration(config["databases"])
+ db_section = config["databases"]
+ openml_db = DatabaseConfiguration(**db_section["openml"], **_db_env_credentials("openml"))
+ expdb_db = DatabaseConfiguration(**db_section["expdb"], **_db_env_credentials("expdb"))
+
return Configuration(
routing=RoutingConfiguration(**config["routing"]),
logging=[
LoggingConfiguration(**sink_configuration)
for sink_configuration in config["logging"].values()
],
- openml_database=db_configurations["openml"],
- expdb_database=db_configurations["expdb"],
+ openml_database=openml_db,
+ expdb_database=expdb_db,
development=DevelopmentConfiguration(**config["development"]),
)
From 52edb0ba20bbdc623b64f45bf330913960177a95 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 7 May 2026 12:00:26 +0200
Subject: [PATCH 10/13] Determine server url only after configuration is loaded
---
src/routers/mldcat_ap/dataset.py | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py
index 5778fcd5..a0b2d758 100644
--- a/src/routers/mldcat_ap/dataset.py
+++ b/src/routers/mldcat_ap/dataset.py
@@ -5,6 +5,7 @@
"""
import asyncio
+import functools
from typing import TYPE_CHECKING, Annotated
from fastapi import APIRouter, Depends, HTTPException
@@ -31,10 +32,12 @@
from sqlalchemy.ext.asyncio import AsyncConnection
router = APIRouter(prefix="/mldcat_ap", tags=["MLDCAT-AP"])
-_routing_configuration = config.get_config().routing
-_server_url = (
- f"{_routing_configuration.server_url}{_routing_configuration.root_path}{router.prefix}"
-)
+
+
+@functools.cache
+def _server_url() -> str:
+ _routing_configuration = config.get_config().routing
+ return f"{_routing_configuration.server_url}{_routing_configuration.root_path}{router.prefix}"
@router.get(
From c1d58cfd50442b7813c3dd40bf1435ddbe213d14 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 7 May 2026 12:29:57 +0200
Subject: [PATCH 11/13] define api key validation pattern at runtime
---
src/core/logging.py | 5 +++++
src/database/users.py | 28 +++++++++++++++++++++-------
tests/config_test.py | 24 ++++++++++++------------
3 files changed, 38 insertions(+), 19 deletions(-)
diff --git a/src/core/logging.py b/src/core/logging.py
index 7f01cb4e..36979733 100644
--- a/src/core/logging.py
+++ b/src/core/logging.py
@@ -23,6 +23,11 @@ def setup_log_sinks(*configurations: LoggingConfiguration) -> None:
sink = conf.pop("sink")
if sink == "sys.stderr":
sink = sys.stderr
+ # defaults may be provided for rotation and retention,
+ # but they are not valid options for stderr logging.
+ conf.pop("rotation", None)
+ conf.pop("retention", None)
+ conf.pop("compression", None)
logger.add(sink, **conf)
diff --git a/src/database/users.py b/src/database/users.py
index 20e67748..bc2d645b 100644
--- a/src/database/users.py
+++ b/src/database/users.py
@@ -1,8 +1,10 @@
import dataclasses
+import functools
+import re
from enum import IntEnum
from typing import TYPE_CHECKING, Annotated, Self
-from pydantic import StringConstraints
+from pydantic import AfterValidator
from sqlalchemy import text
from config import get_config
@@ -10,15 +12,27 @@
if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncConnection
-# If `allow_test_api_keys` is set, the key may also be one of `normaluser`,
-# `normaluser2`, or `abc` (admin).
-api_key_pattern = r"^[0-9a-fA-F]{32}$"
-if get_config().development.allow_test_api_keys:
- api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$"
+
+api_key_pattern = re.compile(r"^[0-9a-fA-F]{32}$")
+# The test database currently contains some non-standard API keys
+api_key_pattern_with_test = re.compile(r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$")
+
+
+@functools.cache
+def is_valid_api_key(key: str) -> str:
+ """Raise ValueError if key is not valid, return key otherwise."""
+ pattern = api_key_pattern
+ if get_config().development.allow_test_api_keys:
+ pattern = api_key_pattern_with_test
+ if not pattern.match(key):
+ msg = f"API key {key!r} format is not valid."
+ raise ValueError(msg)
+ return key
+
APIKey = Annotated[
str,
- StringConstraints(pattern=api_key_pattern),
+ AfterValidator(is_valid_api_key),
]
diff --git a/tests/config_test.py b/tests/config_test.py
index 8d3a336d..6935a3c3 100644
--- a/tests/config_test.py
+++ b/tests/config_test.py
@@ -1,19 +1,19 @@
import os
from unittest import mock
-from config import _load_database_configuration
+from config import _db_env_credentials
-def test_load_configuration_adds_environment_variables() -> None:
- _db_alias = "openml"
+def test__db_env_credentials() -> None:
+ db_alias = "openml"
+ credentials = _db_env_credentials(db_alias)
+ assert credentials["username"] == "root"
+ assert credentials["password"] == "ok" # noqa: S105
- _fake_config = {
- _db_alias: {"database": "openml"},
- }
- database_configuration = _load_database_configuration(_fake_config)
- assert database_configuration[_db_alias].username == "root"
+ env_var_name = f"OPENML_DATABASES_{db_alias.upper()}_USERNAME"
+ env_var_pass = f"OPENML_DATABASES_{db_alias.upper()}_PASSWORD"
+ with mock.patch.dict(os.environ, {env_var_name: "foo", env_var_pass: "bar"}):
+ credentials = _db_env_credentials(db_alias)
- _env_var_name = f"OPENML_DATABASES_{_db_alias.upper()}_USERNAME"
- with mock.patch.dict(os.environ, {_env_var_name: "foo"}):
- database_configuration = _load_database_configuration(_fake_config)
- assert database_configuration[_db_alias].username == "foo"
+ assert credentials["username"] == "foo"
+ assert credentials["password"] == "bar" # noqa: S105
From fbce8ef5dcfc7e93b18ef6f0837768340d46bdb0 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 7 May 2026 12:32:20 +0200
Subject: [PATCH 12/13] Remove duplicate comment
---
src/config.toml | 6 ------
1 file changed, 6 deletions(-)
diff --git a/src/config.toml b/src/config.toml
index 78f42be7..8a23dd87 100644
--- a/src/config.toml
+++ b/src/config.toml
@@ -1,12 +1,6 @@
[development]
allow_test_api_keys=true
-# Any number of logging.NAME configurations can be added.
-# NAME is for reference only, it has no meaning otherwise.
-# You can add any arguments to `loguru.logger.add`,
-# the `sink` variable will be used as first positional argument.
-# https://loguru.readthedocs.io/en/stable/api/logger.html
-
# Any number of logging.NAME configurations can be added.
# NAME is for reference only, it has no meaning otherwise.
[logging.develop]
From 6a50c086d283e8fada4b7535c613dea503ecb90f Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 7 May 2026 13:56:29 +0200
Subject: [PATCH 13/13] call server_url()
---
src/routers/mldcat_ap/dataset.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py
index a0b2d758..277fe07e 100644
--- a/src/routers/mldcat_ap/dataset.py
+++ b/src/routers/mldcat_ap/dataset.py
@@ -35,7 +35,7 @@
@functools.cache
-def _server_url() -> str:
+def server_url() -> str:
_routing_configuration = config.get_config().routing
return f"{_routing_configuration.server_url}{_routing_configuration.root_path}{router.prefix}"
@@ -62,6 +62,7 @@ async def get_mldcat_ap_distribution(
get_dataset_features(distribution_id, user, expdb),
get_qualities(distribution_id, user, expdb),
)
+ _server_url = server_url()
features = [
Feature(
id_=f"{_server_url}/feature/{distribution_id}/{feature.index}",
@@ -134,6 +135,7 @@ def get_dataservice(service_id: int) -> JsonLDGraph:
if service_id != 1:
msg = f"Service with id {service_id} not found."
raise ServiceNotFoundError(msg)
+ _server_url = server_url()
return JsonLDGraph(
context="https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/context.jsonld",
graph=[
@@ -164,6 +166,7 @@ async def get_distribution_quality(
status_code=404,
detail=f"Quality '{quality_name}' not found for distribution {distribution_id}.",
)
+ _server_url = server_url()
example_quality = Quality(
id_=f"{_server_url}/quality/{quality_name}/{distribution_id}",
quality_type=f"{_server_url}/quality/{quality_name}",
@@ -195,6 +198,7 @@ async def get_distribution_feature(
expdb=expdb,
)
feature = features[feature_no]
+ _server_url = server_url()
mldcat_feature = Feature(
id_=f"{_server_url}/feature/{distribution_id}/{feature.index}",
name=feature.name,