From 64ac7b1cbea29d3b0e12ac3ca5c27df1cc540c1a Mon Sep 17 00:00:00 2001
From: Vahid Ahmadi <va.vahidahmadi@gmail.com>
Date: Thu, 7 May 2026 13:32:54 +0100
Subject: [PATCH 1/3] Derive US program-statistics entity from variable
 metadata

Replace the duplicated entity field in the ``US_PROGRAMS`` dict with a
metadata-driven lookup via ``model_version.get_variable(name).entity``,
so the program list cannot silently drift when policyengine-us moves a
variable between entities. The "missing outputs" check from #327 is
preserved.

Refs #326. Closes #334 (smaller follow-up on top of merged #327).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../derive-us-program-entities.changed.md     |  1 +
 .../tax_benefit_models/us/analysis.py         | 38 +++++++++++--------
 tests/test_us_program_statistics.py           | 15 ++++++--
 3 files changed, 35 insertions(+), 19 deletions(-)
 create mode 100644 changelog.d/derive-us-program-entities.changed.md

diff --git a/changelog.d/derive-us-program-entities.changed.md b/changelog.d/derive-us-program-entities.changed.md
new file mode 100644
index 00000000..53ba0269
--- /dev/null
+++ b/changelog.d/derive-us-program-entities.changed.md
@@ -0,0 +1 @@
+Derive US program-statistics entity from variable metadata instead of duplicating it in the program list.
diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py
index 7bc1cd52..fd154673 100644
--- a/src/policyengine/tax_benefit_models/us/analysis.py
+++ b/src/policyengine/tax_benefit_models/us/analysis.py
@@ -28,18 +28,23 @@
 )
 from policyengine.utils.errors import format_conditional_error_detail
 
-US_PROGRAMS = {
-    "income_tax": {"entity": "tax_unit", "is_tax": True},
-    "employee_payroll_tax": {"entity": "tax_unit", "is_tax": True},
-    "state_income_tax": {"entity": "tax_unit", "is_tax": True},
-    "snap": {"entity": "spm_unit", "is_tax": False},
-    "tanf": {"entity": "spm_unit", "is_tax": False},
-    "ssi": {"entity": "person", "is_tax": False},
-    "social_security": {"entity": "person", "is_tax": False},
-    "medicare_cost": {"entity": "person", "is_tax": False},
-    "medicaid": {"entity": "person", "is_tax": False},
-    "eitc": {"entity": "tax_unit", "is_tax": False},
-    "ctc": {"entity": "tax_unit", "is_tax": False},
+# Map of US program-statistics variable name -> is_tax flag. The
+# entity for each program is derived from the variable's own metadata
+# at runtime (see ``_validate_program_statistics_config`` and
+# ``economic_impact_analysis``), so this list cannot silently drift
+# when policyengine-us moves a variable between entities.
+US_PROGRAMS: dict[str, bool] = {
+    "income_tax": True,
+    "employee_payroll_tax": True,
+    "state_income_tax": True,
+    "snap": False,
+    "tanf": False,
+    "ssi": False,
+    "social_security": False,
+    "medicare_cost": False,
+    "medicaid": False,
+    "eitc": False,
+    "ctc": False,
 }
 
 
@@ -95,7 +100,7 @@ def _validate_program_statistics_config(
     missing_outputs: set[tuple[str, str]] = set()
 
     simulations = (baseline_simulation, reform_simulation)
-    for program_name, program_info in US_PROGRAMS.items():
+    for program_name in US_PROGRAMS:
         for simulation in simulations:
             model_version = simulation.tax_benefit_model_version
             try:
@@ -153,14 +158,15 @@ def economic_impact_analysis(
         income_variable="household_net_income",
     )
 
+    model_version = baseline_simulation.tax_benefit_model_version
     program_statistics = []
-    for program_name, program_info in US_PROGRAMS.items():
+    for program_name, is_tax in US_PROGRAMS.items():
         stats = ProgramStatistics(
             baseline_simulation=baseline_simulation,
             reform_simulation=reform_simulation,
             program_name=program_name,
-            entity=program_info["entity"],
-            is_tax=program_info["is_tax"],
+            entity=model_version.get_variable(program_name).entity,
+            is_tax=is_tax,
         )
         stats.run()
         program_statistics.append(stats)
diff --git a/tests/test_us_program_statistics.py b/tests/test_us_program_statistics.py
index 2c5044f8..29d1ef01 100644
--- a/tests/test_us_program_statistics.py
+++ b/tests/test_us_program_statistics.py
@@ -103,14 +103,15 @@ def test_us_program_statistics_config_runs_against_mocked_outputs(tmp_path):
 
     _validate_program_statistics_config(baseline, reform)
 
+    model_version = baseline.tax_benefit_model_version
     results = {}
-    for program_name, program_info in US_PROGRAMS.items():
+    for program_name, is_tax in US_PROGRAMS.items():
         stats = ProgramStatistics(
             baseline_simulation=baseline,
             reform_simulation=reform,
             program_name=program_name,
-            entity=program_info["entity"],
-            is_tax=program_info["is_tax"],
+            entity=model_version.get_variable(program_name).entity,
+            is_tax=is_tax,
         )
         stats.run()
         results[program_name] = stats
@@ -144,3 +145,11 @@ def test_us_program_statistics_config_fails_before_simulation_run(
         _validate_program_statistics_config(baseline, reform)
 
     assert "medicare_cost" in str(exc_info.value)
+
+
+def test_us_programs_entities_match_model_metadata():
+    for program_name in US_PROGRAMS:
+        assert program_name in us_latest.variables_by_name, (
+            f"{program_name} is not defined in the US model"
+        )
+

From 344bfcd1ee5e9d7569bd3f843da19f7b7f973823 Mon Sep 17 00:00:00 2001
From: Vahid Ahmadi <va.vahidahmadi@gmail.com>
Date: Thu, 7 May 2026 13:33:56 +0100
Subject: [PATCH 2/3] Strip trailing blank line in
 test_us_programs_entities_match_model_metadata

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tests/test_us_program_statistics.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_us_program_statistics.py b/tests/test_us_program_statistics.py
index 29d1ef01..c03d4542 100644
--- a/tests/test_us_program_statistics.py
+++ b/tests/test_us_program_statistics.py
@@ -152,4 +152,3 @@ def test_us_programs_entities_match_model_metadata():
         assert program_name in us_latest.variables_by_name, (
             f"{program_name} is not defined in the US model"
         )
-

From 11d115399993476fcc5238f6d027e691c002fa62 Mon Sep 17 00:00:00 2001
From: Vahid Ahmadi <va.vahidahmadi@gmail.com>
Date: Mon, 11 May 2026 14:39:37 +0100
Subject: [PATCH 3/3] Keep is_tax as a named key in US_PROGRAMS

Addresses review feedback on #342: restore is_tax as a named key inside
each program's metadata dict so US_PROGRAMS remains self-documenting.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../tax_benefit_models/us/analysis.py         | 30 +++++++++----------
 tests/test_us_program_statistics.py           |  4 +--
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py
index fd154673..a285020d 100644
--- a/src/policyengine/tax_benefit_models/us/analysis.py
+++ b/src/policyengine/tax_benefit_models/us/analysis.py
@@ -28,23 +28,23 @@
 )
 from policyengine.utils.errors import format_conditional_error_detail
 
-# Map of US program-statistics variable name -> is_tax flag. The
+# Map of US program-statistics variable name -> program metadata. The
 # entity for each program is derived from the variable's own metadata
 # at runtime (see ``_validate_program_statistics_config`` and
 # ``economic_impact_analysis``), so this list cannot silently drift
 # when policyengine-us moves a variable between entities.
-US_PROGRAMS: dict[str, bool] = {
-    "income_tax": True,
-    "employee_payroll_tax": True,
-    "state_income_tax": True,
-    "snap": False,
-    "tanf": False,
-    "ssi": False,
-    "social_security": False,
-    "medicare_cost": False,
-    "medicaid": False,
-    "eitc": False,
-    "ctc": False,
+US_PROGRAMS: dict[str, dict] = {
+    "income_tax": {"is_tax": True},
+    "employee_payroll_tax": {"is_tax": True},
+    "state_income_tax": {"is_tax": True},
+    "snap": {"is_tax": False},
+    "tanf": {"is_tax": False},
+    "ssi": {"is_tax": False},
+    "social_security": {"is_tax": False},
+    "medicare_cost": {"is_tax": False},
+    "medicaid": {"is_tax": False},
+    "eitc": {"is_tax": False},
+    "ctc": {"is_tax": False},
 }
 
 
@@ -160,13 +160,13 @@ def economic_impact_analysis(
 
     model_version = baseline_simulation.tax_benefit_model_version
     program_statistics = []
-    for program_name, is_tax in US_PROGRAMS.items():
+    for program_name, program_info in US_PROGRAMS.items():
         stats = ProgramStatistics(
             baseline_simulation=baseline_simulation,
             reform_simulation=reform_simulation,
             program_name=program_name,
             entity=model_version.get_variable(program_name).entity,
-            is_tax=is_tax,
+            is_tax=program_info["is_tax"],
         )
         stats.run()
         program_statistics.append(stats)
diff --git a/tests/test_us_program_statistics.py b/tests/test_us_program_statistics.py
index c03d4542..cef71cdf 100644
--- a/tests/test_us_program_statistics.py
+++ b/tests/test_us_program_statistics.py
@@ -105,13 +105,13 @@ def test_us_program_statistics_config_runs_against_mocked_outputs(tmp_path):
 
     model_version = baseline.tax_benefit_model_version
     results = {}
-    for program_name, is_tax in US_PROGRAMS.items():
+    for program_name, program_info in US_PROGRAMS.items():
         stats = ProgramStatistics(
             baseline_simulation=baseline,
             reform_simulation=reform,
             program_name=program_name,
             entity=model_version.get_variable(program_name).entity,
-            is_tax=is_tax,
+            is_tax=program_info["is_tax"],
         )
         stats.run()
         results[program_name] = stats