From 64ac7b1cbea29d3b0e12ac3ca5c27df1cc540c1a Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Thu, 7 May 2026 13:32:54 +0100 Subject: [PATCH 1/3] Derive US program-statistics entity from variable metadata Replace the duplicated entity field in the ``US_PROGRAMS`` dict with a metadata-driven lookup via ``model_version.get_variable(name).entity``, so the program list cannot silently drift when policyengine-us moves a variable between entities. The "missing outputs" check from #327 is preserved. Refs #326. Closes #334 (smaller follow-up on top of merged #327). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../derive-us-program-entities.changed.md | 1 + .../tax_benefit_models/us/analysis.py | 38 +++++++++++-------- tests/test_us_program_statistics.py | 15 ++++++-- 3 files changed, 35 insertions(+), 19 deletions(-) create mode 100644 changelog.d/derive-us-program-entities.changed.md diff --git a/changelog.d/derive-us-program-entities.changed.md b/changelog.d/derive-us-program-entities.changed.md new file mode 100644 index 00000000..53ba0269 --- /dev/null +++ b/changelog.d/derive-us-program-entities.changed.md @@ -0,0 +1 @@ +Derive US program-statistics entity from variable metadata instead of duplicating it in the program list. diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index 7bc1cd52..fd154673 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -28,18 +28,23 @@ ) from policyengine.utils.errors import format_conditional_error_detail -US_PROGRAMS = { - "income_tax": {"entity": "tax_unit", "is_tax": True}, - "employee_payroll_tax": {"entity": "tax_unit", "is_tax": True}, - "state_income_tax": {"entity": "tax_unit", "is_tax": True}, - "snap": {"entity": "spm_unit", "is_tax": False}, - "tanf": {"entity": "spm_unit", "is_tax": False}, - "ssi": {"entity": "person", "is_tax": False}, - "social_security": {"entity": "person", "is_tax": False}, - "medicare_cost": {"entity": "person", "is_tax": False}, - "medicaid": {"entity": "person", "is_tax": False}, - "eitc": {"entity": "tax_unit", "is_tax": False}, - "ctc": {"entity": "tax_unit", "is_tax": False}, +# Map of US program-statistics variable name -> is_tax flag. The +# entity for each program is derived from the variable's own metadata +# at runtime (see ``_validate_program_statistics_config`` and +# ``economic_impact_analysis``), so this list cannot silently drift +# when policyengine-us moves a variable between entities. +US_PROGRAMS: dict[str, bool] = { + "income_tax": True, + "employee_payroll_tax": True, + "state_income_tax": True, + "snap": False, + "tanf": False, + "ssi": False, + "social_security": False, + "medicare_cost": False, + "medicaid": False, + "eitc": False, + "ctc": False, } @@ -95,7 +100,7 @@ def _validate_program_statistics_config( missing_outputs: set[tuple[str, str]] = set() simulations = (baseline_simulation, reform_simulation) - for program_name, program_info in US_PROGRAMS.items(): + for program_name in US_PROGRAMS: for simulation in simulations: model_version = simulation.tax_benefit_model_version try: @@ -153,14 +158,15 @@ def economic_impact_analysis( income_variable="household_net_income", ) + model_version = baseline_simulation.tax_benefit_model_version program_statistics = [] - for program_name, program_info in US_PROGRAMS.items(): + for program_name, is_tax in US_PROGRAMS.items(): stats = ProgramStatistics( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, program_name=program_name, - entity=program_info["entity"], - is_tax=program_info["is_tax"], + entity=model_version.get_variable(program_name).entity, + is_tax=is_tax, ) stats.run() program_statistics.append(stats) diff --git a/tests/test_us_program_statistics.py b/tests/test_us_program_statistics.py index 2c5044f8..29d1ef01 100644 --- a/tests/test_us_program_statistics.py +++ b/tests/test_us_program_statistics.py @@ -103,14 +103,15 @@ def test_us_program_statistics_config_runs_against_mocked_outputs(tmp_path): _validate_program_statistics_config(baseline, reform) + model_version = baseline.tax_benefit_model_version results = {} - for program_name, program_info in US_PROGRAMS.items(): + for program_name, is_tax in US_PROGRAMS.items(): stats = ProgramStatistics( baseline_simulation=baseline, reform_simulation=reform, program_name=program_name, - entity=program_info["entity"], - is_tax=program_info["is_tax"], + entity=model_version.get_variable(program_name).entity, + is_tax=is_tax, ) stats.run() results[program_name] = stats @@ -144,3 +145,11 @@ def test_us_program_statistics_config_fails_before_simulation_run( _validate_program_statistics_config(baseline, reform) assert "medicare_cost" in str(exc_info.value) + + +def test_us_programs_entities_match_model_metadata(): + for program_name in US_PROGRAMS: + assert program_name in us_latest.variables_by_name, ( + f"{program_name} is not defined in the US model" + ) + From 344bfcd1ee5e9d7569bd3f843da19f7b7f973823 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Thu, 7 May 2026 13:33:56 +0100 Subject: [PATCH 2/3] Strip trailing blank line in test_us_programs_entities_match_model_metadata Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_us_program_statistics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_us_program_statistics.py b/tests/test_us_program_statistics.py index 29d1ef01..c03d4542 100644 --- a/tests/test_us_program_statistics.py +++ b/tests/test_us_program_statistics.py @@ -152,4 +152,3 @@ def test_us_programs_entities_match_model_metadata(): assert program_name in us_latest.variables_by_name, ( f"{program_name} is not defined in the US model" ) - From 11d115399993476fcc5238f6d027e691c002fa62 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Mon, 11 May 2026 14:39:37 +0100 Subject: [PATCH 3/3] Keep is_tax as a named key in US_PROGRAMS Addresses review feedback on #342: restore is_tax as a named key inside each program's metadata dict so US_PROGRAMS remains self-documenting. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../tax_benefit_models/us/analysis.py | 30 +++++++++---------- tests/test_us_program_statistics.py | 4 +-- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index fd154673..a285020d 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -28,23 +28,23 @@ ) from policyengine.utils.errors import format_conditional_error_detail -# Map of US program-statistics variable name -> is_tax flag. The +# Map of US program-statistics variable name -> program metadata. The # entity for each program is derived from the variable's own metadata # at runtime (see ``_validate_program_statistics_config`` and # ``economic_impact_analysis``), so this list cannot silently drift # when policyengine-us moves a variable between entities. -US_PROGRAMS: dict[str, bool] = { - "income_tax": True, - "employee_payroll_tax": True, - "state_income_tax": True, - "snap": False, - "tanf": False, - "ssi": False, - "social_security": False, - "medicare_cost": False, - "medicaid": False, - "eitc": False, - "ctc": False, +US_PROGRAMS: dict[str, dict] = { + "income_tax": {"is_tax": True}, + "employee_payroll_tax": {"is_tax": True}, + "state_income_tax": {"is_tax": True}, + "snap": {"is_tax": False}, + "tanf": {"is_tax": False}, + "ssi": {"is_tax": False}, + "social_security": {"is_tax": False}, + "medicare_cost": {"is_tax": False}, + "medicaid": {"is_tax": False}, + "eitc": {"is_tax": False}, + "ctc": {"is_tax": False}, } @@ -160,13 +160,13 @@ def economic_impact_analysis( model_version = baseline_simulation.tax_benefit_model_version program_statistics = [] - for program_name, is_tax in US_PROGRAMS.items(): + for program_name, program_info in US_PROGRAMS.items(): stats = ProgramStatistics( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, program_name=program_name, entity=model_version.get_variable(program_name).entity, - is_tax=is_tax, + is_tax=program_info["is_tax"], ) stats.run() program_statistics.append(stats) diff --git a/tests/test_us_program_statistics.py b/tests/test_us_program_statistics.py index c03d4542..cef71cdf 100644 --- a/tests/test_us_program_statistics.py +++ b/tests/test_us_program_statistics.py @@ -105,13 +105,13 @@ def test_us_program_statistics_config_runs_against_mocked_outputs(tmp_path): model_version = baseline.tax_benefit_model_version results = {} - for program_name, is_tax in US_PROGRAMS.items(): + for program_name, program_info in US_PROGRAMS.items(): stats = ProgramStatistics( baseline_simulation=baseline, reform_simulation=reform, program_name=program_name, entity=model_version.get_variable(program_name).entity, - is_tax=is_tax, + is_tax=program_info["is_tax"], ) stats.run() results[program_name] = stats