diff --git a/changelog.d/update-us-bundle.changed b/changelog.d/update-us-bundle.changed new file mode 100644 index 00000000..b1929041 --- /dev/null +++ b/changelog.d/update-us-bundle.changed @@ -0,0 +1 @@ +Update the bundled US release to policyengine-us 1.690.7 and refresh data provenance metadata. diff --git a/pyproject.toml b/pyproject.toml index c7d0f6b9..22be6ff3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,12 +41,12 @@ graph = [ "networkx>=3.0", ] uk = [ - "policyengine_core>=3.26.0", + "policyengine_core>=3.26.1", "policyengine-uk==2.88.14", ] us = [ - "policyengine_core>=3.26.0", - "policyengine-us==1.687.0", + "policyengine_core>=3.26.1", + "policyengine-us==1.690.7", ] dev = [ "pytest", @@ -59,9 +59,9 @@ dev = [ "plotly>=5.0.0", "pytest-asyncio>=0.26.0", "ruff>=0.9.0", - "policyengine_core>=3.26.0", + "policyengine_core>=3.26.1", "policyengine-uk==2.88.14", - "policyengine-us==1.687.0", + "policyengine-us==1.690.7", "towncrier>=24.8.0", "mypy>=1.11.0", "pytest-cov>=5.0.0", diff --git a/scripts/refresh_release_bundle.py b/scripts/refresh_release_bundle.py index 06e64570..2bbdea75 100644 --- a/scripts/refresh_release_bundle.py +++ b/scripts/refresh_release_bundle.py @@ -26,8 +26,11 @@ from __future__ import annotations import argparse +import os import sys +os.environ.setdefault("POLICYENGINE_SKIP_COUNTRY_IMPORTS", "1") + from policyengine.provenance.bundle import ( refresh_release_bundle, regenerate_trace_tro, diff --git a/src/policyengine/__init__.py b/src/policyengine/__init__.py index a8de3971..1c4fb0ae 100644 --- a/src/policyengine/__init__.py +++ b/src/policyengine/__init__.py @@ -28,17 +28,20 @@ (the pinned ``TaxBenefitModelVersion``), and the microsim helpers. """ +import os from importlib.util import find_spec from policyengine import outputs as outputs from policyengine.core import Simulation as Simulation -if find_spec("policyengine_us") is not None: +_SKIP_COUNTRY_IMPORTS = os.environ.get("POLICYENGINE_SKIP_COUNTRY_IMPORTS") == "1" + +if not _SKIP_COUNTRY_IMPORTS and find_spec("policyengine_us") is not None: from policyengine.tax_benefit_models import us as us else: # pragma: no cover us = None # type: ignore[assignment] -if find_spec("policyengine_uk") is not None: +if not _SKIP_COUNTRY_IMPORTS and find_spec("policyengine_uk") is not None: from policyengine.tax_benefit_models import uk as uk else: # pragma: no cover uk = None # type: ignore[assignment] diff --git a/src/policyengine/data/release_manifests/us.json b/src/policyengine/data/release_manifests/us.json index df16c1ab..01775be8 100644 --- a/src/policyengine/data/release_manifests/us.json +++ b/src/policyengine/data/release_manifests/us.json @@ -5,33 +5,34 @@ "policyengine_version": "4.4.2", "model_package": { "name": "policyengine-us", - "version": "1.687.0", - "sha256": "cac7da3aa9ba4bf57009eee75d798217bbef7e1c5ca17646d472fad715ab634f", - "wheel_url": "https://files.pythonhosted.org/packages/c3/36/5633f5a3996c915494154ec3852011b1a239ea06d9f08cb6287ab709618c/policyengine_us-1.687.0-py3-none-any.whl" + "version": "1.690.7", + "sha256": "5a7a541efabac98fa069d6845902cf5924c81db67383234b55dcd2b8bfcfc3ca", + "wheel_url": "https://files.pythonhosted.org/packages/2a/02/52109bae5f4767237b43bd72ce0bc4edf7925650a788053b2bc168caa5ae/policyengine_us-1.690.7-py3-none-any.whl" }, "data_package": { "name": "policyengine-us-data", - "version": "1.78.2", + "version": "1.110.12", "repo_id": "policyengine/policyengine-us-data", - "release_manifest_path": "releases/1.78.2/release_manifest.json", - "release_manifest_revision": "9cb665df0a546f9c3d79b496f8eb2dd55859d38d" + "release_manifest_path": "releases/1.110.12/release_manifest.json", + "release_manifest_revision": "3aac4505ec10d31efc1b3799a1e6458a15853ecc" }, "certified_data_artifact": { "data_package": { "name": "policyengine-us-data", - "version": "1.78.2" + "version": "1.110.12" }, - "build_id": "policyengine-us-data-1.78.2", + "build_id": "policyengine-us-data-1.110.12", "dataset": "enhanced_cps_2024", - "uri": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.78.2", - "sha256": "4e92b340c3ea3e200ed5d55edf752ee1a13baf787442956fb67d25242fed13b5" + "uri": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12", + "sha256": "58a6639f7511b8d804701417e2647f0c3a77f51a3d90441037eaf004b1f00761" }, "certification": { - "compatibility_basis": "matching_data_build_fingerprint", - "data_build_id": "policyengine-us-data-1.78.2", - "built_with_model_version": "1.647.0", - "certified_for_model_version": "1.687.0", - "certified_by": "policyengine.py bundled manifest" + "compatibility_basis": "exact_build_model_version", + "data_build_id": "policyengine-us-data-1.110.12", + "built_with_model_version": "1.690.7", + "certified_for_model_version": "1.690.7", + "certified_by": "policyengine.py bundled manifest", + "data_build_fingerprint": "sha256:9961ed1c5d00943a360724da560eee425eb9f99f91896f053dca74724c46e96e" }, "default_dataset": "enhanced_cps_2024", "datasets": { diff --git a/src/policyengine/data/release_manifests/us.trace.tro.jsonld b/src/policyengine/data/release_manifests/us.trace.tro.jsonld index a669d411..61fb84c9 100644 --- a/src/policyengine/data/release_manifests/us.trace.tro.jsonld +++ b/src/policyengine/data/release_manifests/us.trace.tro.jsonld @@ -17,7 +17,7 @@ "schema:name": "PolicyEngine", "schema:url": "https://policyengine.org" }, - "schema:dateCreated": "2026-05-09T09:38:11.356906Z", + "schema:dateCreated": "2026-05-11T18:53:05.508006Z", "schema:description": "TRACE TRO for certified runtime bundle us-4.4.2 covering the bundle manifest, the certified dataset artifact, the country model wheel, and the country data release manifest when it is available.", "schema:name": "policyengine us certified bundle TRO", "trov:createdWith": { @@ -45,7 +45,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/data_release_manifest" }, - "trov:hasLocation": "https://huggingface.co/policyengine/policyengine-us-data/resolve/9cb665df0a546f9c3d79b496f8eb2dd55859d38d/releases/1.78.2/release_manifest.json" + "trov:hasLocation": "https://huggingface.co/policyengine/policyengine-us-data/resolve/3aac4505ec10d31efc1b3799a1e6458a15853ecc/releases/1.110.12/release_manifest.json" }, { "@id": "arrangement/1/location/dataset", @@ -53,7 +53,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/dataset" }, - "trov:hasLocation": "https://huggingface.co/policyengine/policyengine-us-data/resolve/1.78.2/enhanced_cps_2024.h5" + "trov:hasLocation": "https://huggingface.co/policyengine/policyengine-us-data/resolve/1.110.12/enhanced_cps_2024.h5" }, { "@id": "arrangement/1/location/model_wheel", @@ -61,7 +61,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/model_wheel" }, - "trov:hasLocation": "https://files.pythonhosted.org/packages/c3/36/5633f5a3996c915494154ec3852011b1a239ea06d9f08cb6287ab709618c/policyengine_us-1.687.0-py3-none-any.whl" + "trov:hasLocation": "https://files.pythonhosted.org/packages/2a/02/52109bae5f4767237b43bd72ce0bc4edf7925650a788053b2bc168caa5ae/policyengine_us-1.690.7-py3-none-any.whl" } ] } @@ -75,53 +75,51 @@ "@type": "trov:ResearchArtifact", "schema:name": "policyengine.py bundle manifest for us", "trov:mimeType": "application/json", - "trov:sha256": "ee037378f3e655360894dc9fe66f1d0c335321ff48789e3ebb0ff38805933d72" + "trov:sha256": "f002ead95407fb118933f09f538412a033b6295f019741196e01fb6fc1cab32c" }, { "@id": "composition/1/artifact/data_release_manifest", "@type": "trov:ResearchArtifact", - "schema:name": "policyengine-us-data release manifest 1.78.2", + "schema:name": "policyengine-us-data release manifest 1.110.12", "trov:mimeType": "application/json", - "trov:sha256": "83aafd9fa3d33a444c0277aa4b31b2041a8785910bee30660773ab96e1b1c8b9" + "trov:sha256": "17cfd2fbb31064834ed82c0fd7d8ae5c272fe7f24b1e48b226a4acf97ff4c5dd" }, { "@id": "composition/1/artifact/dataset", "@type": "trov:ResearchArtifact", "schema:name": "enhanced_cps_2024", "trov:mimeType": "application/x-hdf5", - "trov:sha256": "4e92b340c3ea3e200ed5d55edf752ee1a13baf787442956fb67d25242fed13b5" + "trov:sha256": "58a6639f7511b8d804701417e2647f0c3a77f51a3d90441037eaf004b1f00761" }, { "@id": "composition/1/artifact/model_wheel", "@type": "trov:ResearchArtifact", - "schema:name": "policyengine-us==1.687.0 wheel", + "schema:name": "policyengine-us==1.690.7 wheel", "trov:mimeType": "application/zip", - "trov:sha256": "cac7da3aa9ba4bf57009eee75d798217bbef7e1c5ca17646d472fad715ab634f" + "trov:sha256": "5a7a541efabac98fa069d6845902cf5924c81db67383234b55dcd2b8bfcfc3ca" } ], "trov:hasFingerprint": { "@id": "composition/1/fingerprint", "@type": "trov:CompositionFingerprint", - "trov:sha256": "a26e89fc2cd91ee54bc7526aa4c1a32c5b8ad35caefa1e7f2572e2fb8bb303a0" + "trov:sha256": "524aef1282c79dc983d821f01c26c7fe17bf2787c73bd7530bd7e8fd4900d7d6" } }, "trov:hasPerformance": { "@id": "trp/1", "@type": "trov:TransparentResearchPerformance", - "pe:builtWithModelVersion": "1.647.0", + "pe:builtWithModelVersion": "1.690.7", "pe:certifiedBy": "policyengine.py bundled manifest", - "pe:certifiedForModelVersion": "1.687.0", - "pe:ciGitRef": "refs/heads/main", - "pe:ciGitSha": "eb7334698e9116cc06a5c6232ac4b4a91e2aef9a", - "pe:ciRunUrl": "https://github.com/PolicyEngine/policyengine.py/actions/runs/25598363562", - "pe:compatibilityBasis": "matching_data_build_fingerprint", - "pe:dataBuildId": "policyengine-us-data-1.78.2", - "pe:emittedIn": "github-actions", - "rdfs:comment": "Certification of build policyengine-us-data-1.78.2 for policyengine-us 1.687.0.", + "pe:certifiedForModelVersion": "1.690.7", + "pe:compatibilityBasis": "exact_build_model_version", + "pe:dataBuildFingerprint": "sha256:9961ed1c5d00943a360724da560eee425eb9f99f91896f053dca74724c46e96e", + "pe:dataBuildId": "policyengine-us-data-1.110.12", + "pe:emittedIn": "local", + "rdfs:comment": "Certification of build policyengine-us-data-1.110.12 for policyengine-us 1.690.7.", "trov:accessedArrangement": { "@id": "arrangement/1" }, - "trov:startedAtTime": "2026-05-09T09:38:11.356906Z", + "trov:startedAtTime": "2026-05-11T18:53:05.508006Z", "trov:wasConductedBy": { "@id": "trs" } diff --git a/src/policyengine/provenance/bundle.py b/src/policyengine/provenance/bundle.py index 50ebf173..af11449c 100644 --- a/src/policyengine/provenance/bundle.py +++ b/src/policyengine/provenance/bundle.py @@ -174,6 +174,66 @@ def _hf_dataset_sha256(repo_id: str, path: str, revision: str) -> str: return hasher.hexdigest() +@dataclass(frozen=True) +class _DataReleaseManifestFetch: + payload: dict + repo_commit: Optional[str] + + +def _fetch_data_release_manifest( + repo_id: str, + release_manifest_path: str, + revision: str, +) -> Optional[_DataReleaseManifestFetch]: + """Fetch a data release manifest from HF if one is available. + + Older data releases may not have a machine-readable release manifest at the + inferred path. In that case the bundle refresh falls back to hashing the + dataset artifact directly. + + Data releases are stored under versioned paths, but the HF repository does + not necessarily create a matching git tag for each data version. Try the + version revision first for repositories that do publish tags, then fall + back to ``main`` and persist the immutable ``x-repo-commit`` header. + """ + headers = {"User-Agent": "policyengine.py"} + token = os.environ.get("HUGGING_FACE_TOKEN") or os.environ.get("HF_TOKEN") + if token: + headers["Authorization"] = f"Bearer {token}" + + revisions = [revision] + if revision != "main": + revisions.append("main") + + for candidate in revisions: + url = ( + f"https://huggingface.co/{repo_id}/resolve/" + f"{candidate}/{release_manifest_path}" + ) + try: + with urlopen(Request(url, headers=headers)) as f: + payload = json.load(f) + repo_commit = getattr(f, "headers", {}).get("x-repo-commit") + return _DataReleaseManifestFetch( + payload=payload, + repo_commit=repo_commit, + ) + except (OSError, ValueError): + continue + return None + + +def _updated_release_manifest_path( + current_path: str, + old_data: str, + new_data: str, +) -> str: + """Preserve country-specific release-manifest layout while bumping versions.""" + if old_data in current_path: + return current_path.replace(old_data, new_data) + return current_path + + # --------------------------------------------------------------------------- # Refresh result # --------------------------------------------------------------------------- @@ -276,19 +336,90 @@ def refresh_release_bundle( ) repo_id, dataset_path, _old_revision = repo_id_match.groups() + data_package_json = manifest_json["data_package"] + release_manifest_json = None + new_release_manifest_revision = None + new_release_manifest_path = data_package_json.get("release_manifest_path") + if new_data != old_data and new_release_manifest_path is not None: + new_release_manifest_path = _updated_release_manifest_path( + current_path=new_release_manifest_path, + old_data=old_data, + new_data=new_data, + ) + release_manifest_fetch = _fetch_data_release_manifest( + repo_id=repo_id, + release_manifest_path=new_release_manifest_path, + revision=new_data, + ) + if release_manifest_fetch is None: + raise ValueError( + "Could not fetch data release manifest " + f"{new_release_manifest_path!r} from {repo_id}@{new_data}. " + "Refusing to refresh a release-manifest-backed bundle with " + "partial certification metadata." + ) + if release_manifest_fetch.repo_commit is None: + raise ValueError( + "Could not resolve an immutable HF commit for data release " + f"manifest {new_release_manifest_path!r} from {repo_id}@{new_data}." + ) + release_manifest_json = release_manifest_fetch.payload + release_manifest_data_version = release_manifest_json.get( + "data_package", {} + ).get("version") + if release_manifest_data_version != new_data: + raise ValueError( + "Data release manifest " + f"{new_release_manifest_path!r} from {repo_id} declares " + f"version {release_manifest_data_version!r}, expected {new_data!r}." + ) + new_release_manifest_revision = release_manifest_fetch.repo_commit + + certified_dataset = ( + current.certified_data_artifact.dataset + if current.certified_data_artifact is not None + else current.default_dataset + ) + data_artifact_json = {} + if release_manifest_json is not None: + data_artifact_json = release_manifest_json.get("artifacts", {}).get( + certified_dataset, + {}, + ) + if not data_artifact_json: + raise ValueError( + "Data release manifest " + f"{new_release_manifest_path!r} from {repo_id}@{new_data} " + f"does not include certified dataset {certified_dataset!r}." + ) + dataset_repo_id = data_artifact_json.get("repo_id", repo_id) + dataset_path = data_artifact_json.get("path", dataset_path) + dataset_revision = data_artifact_json.get("revision", new_data) + # Only hit HF if the data version actually changed. if new_data != old_data: - new_dataset_sha256 = _hf_dataset_sha256(repo_id, dataset_path, new_data) + new_dataset_sha256 = data_artifact_json.get("sha256") or _hf_dataset_sha256( + dataset_repo_id, + dataset_path, + dataset_revision, + ) else: new_dataset_sha256 = old_dataset_sha256 - new_uri = f"hf://{repo_id}/{dataset_path}@{new_data}" + new_uri = f"hf://{dataset_repo_id}/{dataset_path}@{dataset_revision}" policyengine_version = _pyproject_version(pyproject_path) # Mutate the manifest JSON in place (keep unknown fields untouched). manifest_json["model_package"]["version"] = new_model manifest_json["model_package"]["sha256"] = new_wheel_sha256 manifest_json["model_package"]["wheel_url"] = new_wheel_url - manifest_json["data_package"]["version"] = new_data + data_package_json["version"] = new_data + if new_data != old_data: + if new_release_manifest_path is not None: + data_package_json["release_manifest_path"] = new_release_manifest_path + if new_release_manifest_revision is not None: + data_package_json["release_manifest_revision"] = ( + new_release_manifest_revision + ) manifest_json["certified_data_artifact"]["data_package"]["version"] = new_data manifest_json["certified_data_artifact"]["build_id"] = ( f"{current.data_package.name}-{new_data}" @@ -299,10 +430,43 @@ def refresh_release_bundle( f"{current.data_package.name}-{new_data}" ) manifest_json["certification"]["certified_for_model_version"] = new_model + if release_manifest_json is not None: + build = release_manifest_json.get("build") or {} + built_with_model = build.get("built_with_model_package") or {} + data_build_id = ( + build.get("build_id") or f"{current.data_package.name}-{new_data}" + ) + manifest_json["certified_data_artifact"]["build_id"] = data_build_id + certification_json = manifest_json["certification"] + certification_json["data_build_id"] = data_build_id + certification_json["certified_for_model_version"] = new_model + built_with_model_version = built_with_model.get("version") + if built_with_model_version is not None: + certification_json["built_with_model_version"] = built_with_model_version + if built_with_model.get("git_sha") is not None: + certification_json["built_with_model_git_sha"] = built_with_model["git_sha"] + else: + certification_json.pop("built_with_model_git_sha", None) + data_build_fingerprint = built_with_model.get("data_build_fingerprint") + if data_build_fingerprint is not None: + certification_json["data_build_fingerprint"] = data_build_fingerprint + else: + certification_json.pop("data_build_fingerprint", None) + if built_with_model_version == new_model: + certification_json["compatibility_basis"] = "exact_build_model_version" + elif data_build_fingerprint is not None: + certification_json["compatibility_basis"] = ( + "matching_data_build_fingerprint" + ) + else: + certification_json["compatibility_basis"] = ( + "legacy_compatible_model_package" + ) manifest_path.write_text( json.dumps(manifest_json, indent=2, sort_keys=False) + "\n" ) + get_release_manifest.cache_clear() sync_release_manifest_policyengine_version( policyengine_version=policyengine_version, manifest_dir=manifest_dir, diff --git a/tests/fixtures/household_calculator_snapshots/us_model_surface.json b/tests/fixtures/household_calculator_snapshots/us_model_surface.json index 26226b4e..076b79b1 100644 --- a/tests/fixtures/household_calculator_snapshots/us_model_surface.json +++ b/tests/fixtures/household_calculator_snapshots/us_model_surface.json @@ -5,7 +5,7 @@ "has_income_tax": true, "has_region_registry": true, "model_package_name": "policyengine-us", - "num_parameters_bucketed_100s": 850, + "num_parameters_bucketed_100s": 851, "num_variables_bucketed_100s": 48, "region_registry_country": "us" } diff --git a/tests/fixtures/household_calculator_snapshots/us_single_adult_no_income.json b/tests/fixtures/household_calculator_snapshots/us_single_adult_no_income.json index b77b54f4..ce33c7a1 100644 --- a/tests/fixtures/household_calculator_snapshots/us_single_adult_no_income.json +++ b/tests/fixtures/household_calculator_snapshots/us_single_adult_no_income.json @@ -32,8 +32,8 @@ "person[0].unemployment_compensation": 0.0, "spm_unit.snap": 3596.04, "spm_unit.spm_unit_id": 0.0, - "spm_unit.spm_unit_is_in_deep_spm_poverty": 0.0, - "spm_unit.spm_unit_is_in_spm_poverty": 0.0, + "spm_unit.spm_unit_is_in_deep_spm_poverty": 1.0, + "spm_unit.spm_unit_is_in_spm_poverty": 1.0, "spm_unit.spm_unit_net_income": 3596.04, "spm_unit.spm_unit_weight": 1.0, "spm_unit.tanf": 0.0, diff --git a/tests/test_bundle_refresh.py b/tests/test_bundle_refresh.py index 85a12b1c..7a9c8581 100644 --- a/tests/test_bundle_refresh.py +++ b/tests/test_bundle_refresh.py @@ -24,6 +24,7 @@ import json from pathlib import Path from unittest.mock import patch +from urllib.error import HTTPError import pytest @@ -65,8 +66,9 @@ def _pypi_response(package: str, version: str): class _FakeHFResponse: """Streams a deterministic byte sequence so sha256 is predictable.""" - def __init__(self, content: bytes) -> None: + def __init__(self, content: bytes, headers: dict | None = None) -> None: self._buffer = io.BytesIO(content) + self.headers = headers or {} def read(self, size: int = -1) -> bytes: return self._buffer.read(size) @@ -78,6 +80,48 @@ def __exit__(self, *args): self._buffer.close() +def _data_release_manifest_response( + *, + model_version: str = "1.653.3", + data_version: str = "1.83.4", + dataset_sha256: str = "e" * 64, + headers: dict | None = None, +): + payload = { + "schema_version": 1, + "data_package": { + "name": "policyengine-us-data", + "version": data_version, + }, + "build": { + "build_id": f"policyengine-us-data-{data_version}", + "built_with_model_package": { + "name": "policyengine-us", + "version": model_version, + "git_sha": "deadbeef", + "data_build_fingerprint": "sha256:fingerprint", + }, + }, + "artifacts": { + "enhanced_cps_2024": { + "kind": "microdata", + "path": "enhanced_cps_2024.h5", + "repo_id": "policyengine/policyengine-us-data", + "revision": data_version, + "sha256": dataset_sha256, + } + }, + } + return _FakeHFResponse( + json.dumps(payload).encode(), + headers=( + {"x-repo-commit": "release-manifest-commit-sha"} + if headers is None + else headers + ), + ) + + @pytest.fixture def sandbox(tmp_path: Path) -> dict: """A writable scratch copy of the US release manifest + a stub @@ -101,6 +145,8 @@ def sandbox(tmp_path: Path) -> dict: "name": "policyengine-us-data", "version": "1.70.0", "repo_id": "policyengine/policyengine-us-data", + "release_manifest_path": "releases/1.70.0/release_manifest.json", + "release_manifest_revision": "old-release-manifest-commit", }, "certified_data_artifact": { "data_package": { @@ -173,12 +219,26 @@ def fake_urlopen(request, *args, **kwargs): assert written["policyengine_version"] == "4.2.0" # Dataset pins untouched. assert written["data_package"]["version"] == "1.70.0" + assert ( + written["data_package"]["release_manifest_path"] + == "releases/1.70.0/release_manifest.json" + ) + assert ( + written["data_package"]["release_manifest_revision"] + == "old-release-manifest-commit" + ) assert written["certified_data_artifact"]["sha256"] == "d" * 64 def test__bump_data_only_streams_hf_and_updates_uri(sandbox) -> None: """Bumping only the data version streams the HF file, recomputes its sha256, and rewrites the URI revision.""" + manifest_path = sandbox["manifest_dir"] / "us.json" + manifest = json.loads(manifest_path.read_text()) + manifest["data_package"].pop("release_manifest_path") + manifest["data_package"].pop("release_manifest_revision") + manifest_path.write_text(json.dumps(manifest, indent=2)) + hf_bytes = b"synthetic dataset payload" expected_sha256 = hashlib.sha256(hf_bytes).hexdigest() @@ -206,6 +266,8 @@ def fake_urlopen(request, *args, **kwargs): written = json.loads((sandbox["manifest_dir"] / "us.json").read_text()) assert written["data_package"]["version"] == "1.83.4" + assert "release_manifest_path" not in written["data_package"] + assert "release_manifest_revision" not in written["data_package"] assert written["certified_data_artifact"]["data_package"]["version"] == "1.83.4" assert written["certified_data_artifact"]["build_id"] == ( "policyengine-us-data-1.83.4" @@ -217,15 +279,93 @@ def fake_urlopen(request, *args, **kwargs): ) -def test__bump_both_updates_everything(sandbox) -> None: - hf_bytes = b"another payload" +def test__bump_data_only_writes_release_manifest_revision_when_absent( + sandbox, +) -> None: + manifest_path = sandbox["manifest_dir"] / "us.json" + manifest = json.loads(manifest_path.read_text()) + manifest["data_package"].pop("release_manifest_revision") + manifest_path.write_text(json.dumps(manifest, indent=2)) + def fake_urlopen(request, *args, **kwargs): + url = request.full_url + if url.endswith("releases/1.83.4/release_manifest.json"): + return _data_release_manifest_response() + raise AssertionError(f"Unexpected URL fetched: {url}") + + with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen): + refresh_release_bundle( + country="us", + data_version="1.83.4", + manifest_dir=sandbox["manifest_dir"], + pyproject_path=sandbox["pyproject_path"], + ) + + written = json.loads((sandbox["manifest_dir"] / "us.json").read_text()) + assert ( + written["data_package"]["release_manifest_revision"] + == "release-manifest-commit-sha" + ) + + +def test__bump_data_only_falls_back_to_main_for_release_manifest( + sandbox, +) -> None: + seen_urls = [] + + def fake_urlopen(request, *args, **kwargs): + url = request.full_url + seen_urls.append(url) + if "/resolve/1.83.4/releases/1.83.4/release_manifest.json" in url: + raise HTTPError(url, 404, "Not Found", hdrs=None, fp=None) + if "/resolve/main/releases/1.83.4/release_manifest.json" in url: + return _data_release_manifest_response() + raise AssertionError(f"Unexpected URL fetched: {url}") + + with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen): + refresh_release_bundle( + country="us", + data_version="1.83.4", + manifest_dir=sandbox["manifest_dir"], + pyproject_path=sandbox["pyproject_path"], + ) + + assert any("/resolve/1.83.4/" in url for url in seen_urls) + assert any("/resolve/main/" in url for url in seen_urls) + written = json.loads((sandbox["manifest_dir"] / "us.json").read_text()) + assert ( + written["data_package"]["release_manifest_revision"] + == "release-manifest-commit-sha" + ) + + +def test__release_manifest_version_mismatch_raises(sandbox) -> None: + def fake_urlopen(request, *args, **kwargs): + url = request.full_url + if url.endswith("releases/1.83.4/release_manifest.json"): + return _data_release_manifest_response(data_version="1.83.3") + raise AssertionError(f"Unexpected URL fetched: {url}") + + with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen): + with pytest.raises( + ValueError, + match="declares version '1.83.3', expected '1.83.4'", + ): + refresh_release_bundle( + country="us", + data_version="1.83.4", + manifest_dir=sandbox["manifest_dir"], + pyproject_path=sandbox["pyproject_path"], + ) + + +def test__bump_both_updates_everything(sandbox) -> None: def fake_urlopen(request, *args, **kwargs): url = request.full_url if "pypi.org" in url: return _pypi_response("policyengine-us", "1.653.3") - if "huggingface.co" in url: - return _FakeHFResponse(hf_bytes) + if url.endswith("releases/1.83.4/release_manifest.json"): + return _data_release_manifest_response() raise AssertionError(url) with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen): @@ -242,6 +382,122 @@ def fake_urlopen(request, *args, **kwargs): assert result.new_data == "1.83.4" +def test__bump_both_uses_data_release_manifest_metadata(sandbox) -> None: + def fake_urlopen(request, *args, **kwargs): + url = request.full_url + if "pypi.org" in url: + return _pypi_response("policyengine-us", "1.653.3") + if url.endswith("releases/1.83.4/release_manifest.json"): + return _data_release_manifest_response() + raise AssertionError(f"Unexpected URL fetched: {url}") + + with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen): + result = refresh_release_bundle( + country="us", + model_version="1.653.3", + data_version="1.83.4", + manifest_dir=sandbox["manifest_dir"], + pyproject_path=sandbox["pyproject_path"], + ) + + assert result.new_dataset_sha256 == "e" * 64 + + written = json.loads((sandbox["manifest_dir"] / "us.json").read_text()) + assert written["certified_data_artifact"]["build_id"] == ( + "policyengine-us-data-1.83.4" + ) + assert written["certified_data_artifact"]["sha256"] == "e" * 64 + assert written["certification"]["compatibility_basis"] == ( + "exact_build_model_version" + ) + assert written["certification"]["built_with_model_version"] == "1.653.3" + assert written["certification"]["built_with_model_git_sha"] == "deadbeef" + assert written["certification"]["data_build_fingerprint"] == ("sha256:fingerprint") + assert ( + written["data_package"]["release_manifest_revision"] + == "release-manifest-commit-sha" + ) + + +def test__missing_release_manifest_metadata_raises(sandbox) -> None: + def fake_urlopen(request, *args, **kwargs): + url = request.full_url + if url.endswith("releases/1.83.4/release_manifest.json"): + return _FakeHFResponse(b"not json") + raise AssertionError(f"Unexpected URL fetched: {url}") + + with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen): + with pytest.raises( + ValueError, + match="Could not fetch data release manifest", + ): + refresh_release_bundle( + country="us", + data_version="1.83.4", + manifest_dir=sandbox["manifest_dir"], + pyproject_path=sandbox["pyproject_path"], + ) + + +def test__missing_release_manifest_commit_raises(sandbox) -> None: + def fake_urlopen(request, *args, **kwargs): + url = request.full_url + if url.endswith("releases/1.83.4/release_manifest.json"): + return _data_release_manifest_response(headers={}) + raise AssertionError(f"Unexpected URL fetched: {url}") + + with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen): + with pytest.raises( + ValueError, + match="Could not resolve an immutable HF commit", + ): + refresh_release_bundle( + country="us", + data_version="1.83.4", + manifest_dir=sandbox["manifest_dir"], + pyproject_path=sandbox["pyproject_path"], + ) + + +def test__release_manifest_missing_certified_artifact_raises(sandbox) -> None: + def fake_urlopen(request, *args, **kwargs): + url = request.full_url + if url.endswith("releases/1.83.4/release_manifest.json"): + payload = { + "schema_version": 1, + "data_package": { + "name": "policyengine-us-data", + "version": "1.83.4", + }, + "artifacts": { + "other_dataset": { + "kind": "microdata", + "path": "other_dataset.h5", + "repo_id": "policyengine/policyengine-us-data", + "revision": "1.83.4", + "sha256": "e" * 64, + } + }, + } + return _FakeHFResponse( + json.dumps(payload).encode(), + headers={"x-repo-commit": "release-manifest-commit-sha"}, + ) + raise AssertionError(f"Unexpected URL fetched: {url}") + + with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen): + with pytest.raises( + ValueError, + match="does not include certified dataset", + ): + refresh_release_bundle( + country="us", + data_version="1.83.4", + manifest_dir=sandbox["manifest_dir"], + pyproject_path=sandbox["pyproject_path"], + ) + + def test__update_pyproject_false_leaves_pins_alone(sandbox) -> None: def fake_urlopen(*args, **kwargs): return _pypi_response("policyengine-us", "1.653.3") diff --git a/tests/test_models.py b/tests/test_models.py index 72597ad8..5b4c2e7b 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -113,12 +113,12 @@ def test_has_release_manifest_metadata(self): assert us_latest.release_manifest is not None assert us_latest.release_manifest.country_id == "us" assert us_latest.model_package.name == "policyengine-us" - assert us_latest.model_package.version == "1.687.0" + assert us_latest.model_package.version == "1.690.7" assert us_latest.data_package.name == "policyengine-us-data" - assert us_latest.data_package.version == "1.78.2" + assert us_latest.data_package.version == "1.110.12" assert ( us_latest.default_dataset_uri - == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.78.2" + == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12" ) def test_has_hundreds_of_parameters(self): diff --git a/tests/test_release_manifests.py b/tests/test_release_manifests.py index ba945cc4..480ee41b 100644 --- a/tests/test_release_manifests.py +++ b/tests/test_release_manifests.py @@ -62,27 +62,27 @@ def test__given_us_manifest__then_has_pinned_model_and_data_packages(self): assert manifest.country_id == "us" assert manifest.policyengine_version == POLICYENGINE_VERSION assert manifest.model_package.name == "policyengine-us" - assert manifest.model_package.version == "1.687.0" + assert manifest.model_package.version == "1.690.7" assert manifest.data_package.name == "policyengine-us-data" - assert manifest.data_package.version == "1.78.2" + assert manifest.data_package.version == "1.110.12" assert manifest.data_package.repo_id == "policyengine/policyengine-us-data" assert ( manifest.data_package.release_manifest_path - == "releases/1.78.2/release_manifest.json" + == "releases/1.110.12/release_manifest.json" ) assert ( manifest.data_package.release_manifest_revision - == "9cb665df0a546f9c3d79b496f8eb2dd55859d38d" + == "3aac4505ec10d31efc1b3799a1e6458a15853ecc" ) assert manifest.certified_data_artifact is not None assert ( - manifest.certified_data_artifact.build_id == "policyengine-us-data-1.78.2" + manifest.certified_data_artifact.build_id == "policyengine-us-data-1.110.12" ) assert manifest.certified_data_artifact.dataset == "enhanced_cps_2024" assert manifest.certification is not None - assert manifest.certification.data_build_id == "policyengine-us-data-1.78.2" - assert manifest.certification.built_with_model_version == "1.647.0" - assert manifest.certification.certified_for_model_version == "1.687.0" + assert manifest.certification.data_build_id == "policyengine-us-data-1.110.12" + assert manifest.certification.built_with_model_version == "1.690.7" + assert manifest.certification.certified_for_model_version == "1.690.7" def test__given_uk_manifest__then_has_pinned_model_and_data_packages(self): manifest = get_release_manifest("uk") @@ -117,7 +117,7 @@ def test__given_us_dataset_name__then_resolves_to_versioned_hf_url(self): assert ( resolved - == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.78.2" + == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12" ) def test__given_uk_dataset_name__then_resolves_to_versioned_hf_url(self): @@ -238,8 +238,8 @@ def test__given_country__then_can_fetch_data_release_manifest(self): mock_get.assert_called_once() assert mock_get.call_args.args[0] == ( "https://huggingface.co/policyengine/policyengine-us-data/resolve/" - "9cb665df0a546f9c3d79b496f8eb2dd55859d38d/" - "releases/1.78.2/release_manifest.json" + "3aac4505ec10d31efc1b3799a1e6458a15853ecc/" + "releases/1.110.12/release_manifest.json" ) def test__given_explicit_manifest_revision__then_builds_manifest_url(self): @@ -247,8 +247,8 @@ def test__given_explicit_manifest_revision__then_builds_manifest_url(self): assert https_release_manifest_uri(manifest.data_package) == ( "https://huggingface.co/policyengine/policyengine-us-data/resolve/" - "9cb665df0a546f9c3d79b496f8eb2dd55859d38d/" - "releases/1.78.2/release_manifest.json" + "3aac4505ec10d31efc1b3799a1e6458a15853ecc/" + "releases/1.110.12/release_manifest.json" ) def test__given_missing_data_release_manifest__then_fetch_raises_unavailable(self): @@ -355,7 +355,7 @@ def test__given_private_manifest_unavailable__then_bundled_certification_is_used ): certification = certify_data_release_compatibility( "us", - runtime_model_version="1.687.0", + runtime_model_version="1.690.7", ) assert certification == get_release_manifest("us").certification @@ -371,7 +371,7 @@ def test__given_manifest_request_timeout__then_bundled_certification_is_used( ): certification = certify_data_release_compatibility( "us", - runtime_model_version="1.687.0", + runtime_model_version="1.690.7", ) assert certification == get_release_manifest("us").certification diff --git a/tests/test_us_regions.py b/tests/test_us_regions.py index a730faf7..bd57e83e 100644 --- a/tests/test_us_regions.py +++ b/tests/test_us_regions.py @@ -105,7 +105,7 @@ def test__given_us_registry__then_has_national_region(self): assert national.region_type == "national" assert ( national.dataset_path - == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.78.2" + == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12" ) def test__given_us_registry__then_has_51_states(self): @@ -134,7 +134,7 @@ def test__given_california_region__then_has_correct_format(self): assert ca.parent_code == "us" assert ( ca.dataset_path - == "hf://policyengine/policyengine-us-data/states/CA.h5@1.78.2" + == "hf://policyengine/policyengine-us-data/states/CA.h5@1.110.12" ) assert ca.state_code == "CA" assert ca.state_name == "California" @@ -167,7 +167,7 @@ def test__given_ca_first_district__then_has_correct_format(self): assert ca01.parent_code == "state/ca" assert ( ca01.dataset_path - == "hf://policyengine/policyengine-us-data/districts/CA-01.h5@1.78.2" + == "hf://policyengine/policyengine-us-data/districts/CA-01.h5@1.110.12" ) assert ca01.state_code == "CA" assert not ca01.requires_filter diff --git a/uv.lock b/uv.lock index 2babaddf..22f8b0d1 100644 --- a/uv.lock +++ b/uv.lock @@ -2411,7 +2411,7 @@ wheels = [ [[package]] name = "policyengine" -version = "4.3.1" +version = "4.4.2" source = { editable = "." } dependencies = [ { name = "jsonschema" }, @@ -2475,13 +2475,13 @@ requires-dist = [ { name = "pandas", specifier = ">=2.0.0" }, { name = "plotly", marker = "extra == 'dev'", specifier = ">=5.0.0" }, { name = "plotly", marker = "extra == 'plotting'", specifier = ">=5.0.0" }, - { name = "policyengine-core", marker = "extra == 'dev'", specifier = ">=3.26.0" }, - { name = "policyengine-core", marker = "extra == 'uk'", specifier = ">=3.26.0" }, - { name = "policyengine-core", marker = "extra == 'us'", specifier = ">=3.26.0" }, + { name = "policyengine-core", marker = "extra == 'dev'", specifier = ">=3.26.1" }, + { name = "policyengine-core", marker = "extra == 'uk'", specifier = ">=3.26.1" }, + { name = "policyengine-core", marker = "extra == 'us'", specifier = ">=3.26.1" }, { name = "policyengine-uk", marker = "extra == 'dev'", specifier = "==2.88.14" }, { name = "policyengine-uk", marker = "extra == 'uk'", specifier = "==2.88.14" }, - { name = "policyengine-us", marker = "extra == 'dev'", specifier = "==1.687.0" }, - { name = "policyengine-us", marker = "extra == 'us'", specifier = "==1.687.0" }, + { name = "policyengine-us", marker = "extra == 'dev'", specifier = "==1.690.7" }, + { name = "policyengine-us", marker = "extra == 'us'", specifier = "==1.690.7" }, { name = "psutil", specifier = ">=5.9.0" }, { name = "pydantic", specifier = ">=2.0.0" }, { name = "pytest", marker = "extra == 'dev'" }, @@ -2496,7 +2496,7 @@ provides-extras = ["plotting", "graph", "uk", "us", "dev"] [[package]] name = "policyengine-core" -version = "3.26.0" +version = "3.26.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "dpath" }, @@ -2520,9 +2520,9 @@ dependencies = [ { name = "standard-imghdr" }, { name = "wheel" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4e/69/adb6407c97de5260a938344f9eafa9979bf8f97aec8c628538d906ecdec2/policyengine_core-3.26.0.tar.gz", hash = "sha256:a571026ef418653ec18f087463cf37e9be730e90ad4376cb10997f0ddf9f8eda", size = 468190, upload-time = "2026-05-04T19:26:27.707Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/56/666e1e708cbd61078989edc943d5389d45123beb7124a5e3180171656ff6/policyengine_core-3.26.1.tar.gz", hash = "sha256:dc4e3007bcd137cbe608042d067cedb889a9b8671db3d08c8e237f1ac3e324b4", size = 472159, upload-time = "2026-05-07T23:46:43.228Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/f3/0e98b30d4eb7b309c3f1f1d8c2354595f78319ce2442eda069f02a47f4d1/policyengine_core-3.26.0-py3-none-any.whl", hash = "sha256:d63a4622233b61c4c5fc64d4f65030d65b2564ac63ac87b17d545d63cdf17194", size = 232135, upload-time = "2026-05-04T19:26:25.693Z" }, + { url = "https://files.pythonhosted.org/packages/15/2f/9be635fe4dfb2fe65d200c33695f5a96ef98a2921f06ff6d465384b0e551/policyengine_core-3.26.1-py3-none-any.whl", hash = "sha256:185374b3c1fe13dc951637c49a9853211ca61a8a9971eb9cc4c4b07b1477240a", size = 232190, upload-time = "2026-05-07T23:46:41.797Z" }, ] [[package]] @@ -2544,7 +2544,7 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.687.0" +version = "1.690.7" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, @@ -2556,9 +2556,9 @@ dependencies = [ { name = "tables", version = "3.11.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/26/49/d34803002b058e7ad91861975d26aa7d2bac3b4988756290e1588e00a776/policyengine_us-1.687.0.tar.gz", hash = "sha256:eb73607fcc54d72429af3830456f83b9ec9dd84161b6cfa86402637dfeb19fa7", size = 9464817, upload-time = "2026-05-05T17:14:34.513Z" } +sdist = { url = "https://files.pythonhosted.org/packages/81/df/60548e64a5ccf5f961a45608c2c6744833daf756c1c82d1e59e5bca14850/policyengine_us-1.690.7.tar.gz", hash = "sha256:3dbb1f54824902fcd6ae64d5879f36ce6b2372a42321c838c20c430fd1507a2e", size = 9479020, upload-time = "2026-05-10T22:27:01.776Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c3/36/5633f5a3996c915494154ec3852011b1a239ea06d9f08cb6287ab709618c/policyengine_us-1.687.0-py3-none-any.whl", hash = "sha256:cac7da3aa9ba4bf57009eee75d798217bbef7e1c5ca17646d472fad715ab634f", size = 9954513, upload-time = "2026-05-05T17:14:30.958Z" }, + { url = "https://files.pythonhosted.org/packages/2a/02/52109bae5f4767237b43bd72ce0bc4edf7925650a788053b2bc168caa5ae/policyengine_us-1.690.7-py3-none-any.whl", hash = "sha256:5a7a541efabac98fa069d6845902cf5924c81db67383234b55dcd2b8bfcfc3ca", size = 9985671, upload-time = "2026-05-10T22:26:58.843Z" }, ] [[package]]