diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 2c05c7c..0459c04 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -4,8 +4,8 @@ ## Verification -- [ ] `python -m unittest discover -s tests -v` -- [ ] `python plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py` +- [ ] `python3 -m unittest discover -s tests -v` +- [ ] `python3 plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py` - [ ] Documentation updated, if user-facing behavior changed ## Boundary Check @@ -13,4 +13,3 @@ - [ ] This preserves the "workflow adaptation, not model replacement" boundary. - [ ] This does not add credentials, private prompt text, or hidden provider assumptions. - [ ] Current product/provider/API claims are backed by official sources when applicable. - diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2628cc9..bfeed5a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,15 +27,15 @@ jobs: python-version: ${{ matrix.python-version }} - name: Run tests - run: python -m unittest discover -s tests -v + run: python3 -m unittest discover -s tests -v - name: Compile scripts run: | - python -m py_compile \ + python3 -m py_compile \ plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py \ plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py \ plugins/codex-fable5/skills/codex-fable5/scripts/make_litellm_config.py \ tests/test_scripts.py - name: Validate coverage matrix - run: python plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py + run: python3 plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py diff --git a/README.md b/README.md index 37256b6..5331c4f 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,7 @@ Use $codex-fable5 to convert this Claude/Fable prompt into Codex AGENTS.md guida Create a simple multi-goal ledger: ```bash -python plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py create --brief "Migration" \ +python3 plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py create --brief "Migration" \ --goal "inspect::Find current behavior and tests" \ --goal "change::Implement the migration" \ --goal "verify::Run tests and inspect output" @@ -133,7 +133,7 @@ python plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py create -- If you have a local copy of `CLAUDE-FABLE-5.md`, run: ```bash -python plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py \ +python3 plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py \ --source /path/to/CLAUDE-FABLE-5.md ``` @@ -146,7 +146,7 @@ The target is 100% source-heading accounting. That means every named source sect Run the stdlib-only test suite: ```bash -python -m unittest discover -s tests -v +python3 -m unittest discover -s tests -v ``` --- diff --git a/docs/RELEASING.md b/docs/RELEASING.md index 8d53ddc..8b0fb9c 100644 --- a/docs/RELEASING.md +++ b/docs/RELEASING.md @@ -11,13 +11,13 @@ This project uses a lightweight release process because it is a small Codex plug 5. Run local verification: ```bash -python -m unittest discover -s tests -v -python -m py_compile \ +python3 -m unittest discover -s tests -v +python3 -m py_compile \ plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py \ plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py \ plugins/codex-fable5/skills/codex-fable5/scripts/make_litellm_config.py \ tests/test_scripts.py -python plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py +python3 plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py ``` 6. Verify no secrets, `.codex-fable5/` ledgers, or local cache files are staged. @@ -42,4 +42,3 @@ Use semantic versioning for the plugin package: ## Release Boundaries Do not release changes that claim unavailable model access, include credentials, or copy protected prompt text into the repository. - diff --git a/plugins/codex-fable5/skills/codex-fable5/references/provider-bridge.md b/plugins/codex-fable5/skills/codex-fable5/references/provider-bridge.md index 275c163..271d071 100644 --- a/plugins/codex-fable5/skills/codex-fable5/references/provider-bridge.md +++ b/plugins/codex-fable5/skills/codex-fable5/references/provider-bridge.md @@ -16,7 +16,7 @@ Use LiteLLM Proxy when you need a practical OpenAI-compatible gateway for Anthro Typical flow: ```bash -python -m pip install "litellm[proxy]" +python3 -m pip install "litellm[proxy]" export ANTHROPIC_API_KEY="sk-ant-..." litellm --config litellm-fable5.yaml --host 127.0.0.1 --port 4000 ``` @@ -24,7 +24,7 @@ litellm --config litellm-fable5.yaml --host 127.0.0.1 --port 4000 Generate a starter LiteLLM config: ```bash -python plugins/codex-fable5/skills/codex-fable5/scripts/make_litellm_config.py \ +python3 plugins/codex-fable5/skills/codex-fable5/scripts/make_litellm_config.py \ --model claude-fable-5 \ --alias claude-fable-5 \ --output litellm-fable5.yaml diff --git a/plugins/codex-fable5/skills/codex-fable5/references/task-routing.md b/plugins/codex-fable5/skills/codex-fable5/references/task-routing.md index 839bfb1..4379a82 100644 --- a/plugins/codex-fable5/skills/codex-fable5/references/task-routing.md +++ b/plugins/codex-fable5/skills/codex-fable5/references/task-routing.md @@ -20,13 +20,13 @@ Use `scripts/codex_goals.py` when there are multiple dependent stories and the t Example: ```bash -python plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py create --brief "Add CSV import" \ +python3 plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py create --brief "Add CSV import" \ --goal "inspect::Find current import flow and tests" \ --goal "implement::Add CSV parser and UI path" \ --goal "verify::Run tests and a sample import" -python plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py next -python plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py checkpoint --id G001 --status complete --evidence "Read importer.ts and import.test.ts" -python plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py next +python3 plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py next +python3 plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py checkpoint --id G001 --status complete --evidence "Read importer.ts and import.test.ts" +python3 plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py next ``` Rules: diff --git a/plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py b/plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py index ccce9b1..aeb223d 100755 --- a/plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py +++ b/plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py @@ -13,6 +13,8 @@ STATE_DIR = Path(".codex-fable5") GOALS_FILE = STATE_DIR / "goals.json" LEDGER_FILE = STATE_DIR / "ledger.jsonl" +OPEN_STATUSES = {"pending", "in_progress"} +INCOMPLETE_TERMINAL_STATUSES = {"failed", "blocked"} def now() -> str: @@ -56,6 +58,18 @@ def parse_goal(raw: str, index: int) -> dict[str, Any]: } +def incomplete_terminal_summary(goals: list[dict[str, Any]]) -> str: + counts = { + status: sum(1 for goal in goals if goal["status"] == status) + for status in sorted(INCOMPLETE_TERMINAL_STATUSES) + } + return ", ".join(f"{count} {status}" for status, count in counts.items() if count) + + +def terminal_incomplete_goals(goals: list[dict[str, Any]]) -> list[dict[str, Any]]: + return [goal for goal in goals if goal["status"] in INCOMPLETE_TERMINAL_STATUSES] + + def cmd_create(args: argparse.Namespace) -> None: if GOALS_FILE.exists() and not args.force: sys.exit("codex-fable5: plan already exists. Use `status` or replace it with --force.") @@ -76,14 +90,28 @@ def cmd_next(_: argparse.Namespace) -> None: if active: goal = active[0] else: - pending = [goal for goal in plan["goals"] if goal["status"] == "pending"] - if not pending: - print("codex-fable5: all stories complete") - return - goal = pending[0] - goal["status"] = "in_progress" - write_json(GOALS_FILE, plan) - append_event("story_started", id=goal["id"], title=goal["title"]) + incomplete = terminal_incomplete_goals(plan["goals"]) + if incomplete: + goal = incomplete[0] + previous_status = goal["status"] + goal["status"] = "in_progress" + write_json(GOALS_FILE, plan) + append_event( + "story_reopened", + id=goal["id"], + title=goal["title"], + previous_status=previous_status, + ) + print(f"Reopened {goal['id']} from {previous_status}.") + else: + pending = [goal for goal in plan["goals"] if goal["status"] == "pending"] + if not pending: + print("codex-fable5: all stories complete") + return + goal = pending[0] + goal["status"] = "in_progress" + write_json(GOALS_FILE, plan) + append_event("story_started", id=goal["id"], title=goal["title"]) is_final = goal["id"] == plan["goals"][-1]["id"] print(f"=== codex-fable5 handoff: {goal['id']} {goal['title']}") @@ -129,9 +157,17 @@ def cmd_checkpoint(args: argparse.Namespace) -> None: verify_cmd=verify_cmd, verify_evidence=verify_evidence, ) - remaining = [item for item in plan["goals"] if item["status"] in {"pending", "in_progress"}] + remaining = [item for item in plan["goals"] if item["status"] in OPEN_STATUSES] print(f"codex-fable5: {goal['id']} -> {args.status}") - print("codex-fable5: all stories complete" if not remaining else f"codex-fable5: {len(remaining)} stories left") + if terminal_incomplete_goals(plan["goals"]): + summary = incomplete_terminal_summary(plan["goals"]) + print(f"codex-fable5: plan is not complete; {summary}.") + if remaining: + print(f"codex-fable5: {len(remaining)} open stories remain blocked.") + elif remaining: + print(f"codex-fable5: {len(remaining)} stories left") + else: + print("codex-fable5: all stories complete") def cmd_status(_: argparse.Namespace) -> None: diff --git a/tests/test_scripts.py b/tests/test_scripts.py index f910fee..7642f84 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -42,6 +42,26 @@ def test_manifest_json_files_are_valid(self) -> None: parsed = json.loads(path.read_text(encoding="utf-8")) self.assertIsInstance(parsed, dict) + def test_marketplace_plugin_paths_resolve_to_skill(self) -> None: + marketplace_path = ROOT / ".agents" / "plugins" / "marketplace.json" + marketplace = json.loads(marketplace_path.read_text(encoding="utf-8")) + root = ROOT.resolve() + for plugin_entry in marketplace["plugins"]: + with self.subTest(plugin=plugin_entry["name"]): + plugin_root = (ROOT / plugin_entry["source"]["path"]).resolve() + self.assertTrue(plugin_root.is_relative_to(root)) + + plugin_manifest_path = plugin_root / ".codex-plugin" / "plugin.json" + plugin_manifest = json.loads(plugin_manifest_path.read_text(encoding="utf-8")) + skills_root = (plugin_root / plugin_manifest["skills"]).resolve() + skill_path = skills_root / plugin_manifest["name"] / "SKILL.md" + + self.assertTrue(plugin_root.is_dir()) + self.assertTrue(plugin_manifest_path.is_file()) + self.assertTrue(skills_root.is_relative_to(plugin_root)) + self.assertTrue(skills_root.is_dir()) + self.assertTrue(skill_path.is_file()) + def test_license_contains_full_agpl_text(self) -> None: text = (ROOT / "LICENSE").read_text(encoding="utf-8") @@ -94,8 +114,8 @@ def test_ci_workflow_runs_project_verification(self) -> None: self.assertIn("actions/checkout@v6", workflow) self.assertIn("actions/setup-python@v6", workflow) - self.assertIn("python -m unittest discover -s tests -v", workflow) - self.assertIn("python -m py_compile", workflow) + self.assertIn("python3 -m unittest discover -s tests -v", workflow) + self.assertIn("python3 -m py_compile", workflow) self.assertIn("fable_coverage.py", workflow) self.assertIn('python-version: ["3.11", "3.12", "3.13"]', workflow) @@ -235,6 +255,129 @@ def run(*args: str) -> subprocess.CompletedProcess[str]: self.assertEqual(status.returncode, 0, status.stderr) self.assertIn("2/2 complete", status.stdout) + def test_goal_ledger_failed_story_is_not_reported_complete(self) -> None: + script = SCRIPTS / "codex_goals.py" + with tempfile.TemporaryDirectory() as tmp: + cwd = Path(tmp) + + def run(*args: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, str(script), *args], + cwd=cwd, + text=True, + capture_output=True, + check=False, + ) + + created = run("create", "--brief", "Smoke", "--goal", "inspect::Check state") + self.assertEqual(created.returncode, 0, created.stderr) + + first = run("next") + self.assertEqual(first.returncode, 0, first.stderr) + + failed = run("checkpoint", "--id", "G001", "--status", "failed") + self.assertEqual(failed.returncode, 0, failed.stderr) + self.assertIn("plan is not complete", failed.stdout) + self.assertNotIn("all stories complete", failed.stdout) + + next_story = run("next") + self.assertEqual(next_story.returncode, 0, next_story.stderr) + self.assertIn("Reopened G001 from failed", next_story.stdout) + self.assertNotIn("all stories complete", next_story.stdout + next_story.stderr) + + recovered = run( + "checkpoint", + "--id", + "G001", + "--status", + "complete", + "--evidence", + "retry evidence", + "--verify-cmd", + "smoke", + "--verify-evidence", + "accepted", + ) + self.assertEqual(recovered.returncode, 0, recovered.stderr) + self.assertIn("all stories complete", recovered.stdout) + + def test_goal_ledger_failed_story_blocks_later_pending_stories(self) -> None: + script = SCRIPTS / "codex_goals.py" + with tempfile.TemporaryDirectory() as tmp: + cwd = Path(tmp) + + def run(*args: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, str(script), *args], + cwd=cwd, + text=True, + capture_output=True, + check=False, + ) + + created = run( + "create", + "--brief", + "Smoke", + "--goal", + "inspect::Check state", + "--goal", + "verify::Confirm final state", + ) + self.assertEqual(created.returncode, 0, created.stderr) + self.assertEqual(run("next").returncode, 0) + + failed = run("checkpoint", "--id", "G001", "--status", "failed") + self.assertEqual(failed.returncode, 0, failed.stderr) + self.assertIn("open stories remain blocked", failed.stdout) + + next_story = run("next") + self.assertEqual(next_story.returncode, 0, next_story.stderr) + self.assertIn("Reopened G001 from failed", next_story.stdout) + self.assertIn("G001 inspect", next_story.stdout) + self.assertNotIn("G002", next_story.stdout + next_story.stderr) + + recovered = run( + "checkpoint", + "--id", + "G001", + "--status", + "complete", + "--evidence", + "retry evidence", + ) + self.assertEqual(recovered.returncode, 0, recovered.stderr) + + next_after_recovery = run("next") + self.assertEqual(next_after_recovery.returncode, 0, next_after_recovery.stderr) + self.assertIn("G002 verify", next_after_recovery.stdout) + + def test_goal_ledger_blocked_story_can_be_reopened(self) -> None: + script = SCRIPTS / "codex_goals.py" + with tempfile.TemporaryDirectory() as tmp: + cwd = Path(tmp) + + def run(*args: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, str(script), *args], + cwd=cwd, + text=True, + capture_output=True, + check=False, + ) + + created = run("create", "--brief", "Smoke", "--goal", "inspect::Check state") + self.assertEqual(created.returncode, 0, created.stderr) + self.assertEqual(run("next").returncode, 0) + + blocked = run("checkpoint", "--id", "G001", "--status", "blocked") + self.assertEqual(blocked.returncode, 0, blocked.stderr) + self.assertIn("plan is not complete", blocked.stdout) + + reopened = run("next") + self.assertEqual(reopened.returncode, 0, reopened.stderr) + self.assertIn("Reopened G001 from blocked", reopened.stdout) + def test_litellm_config_generation(self) -> None: plain = self.make_litellm_config.build_config("claude-test", "test-alias") prefixed = self.make_litellm_config.build_config("anthropic/claude-test", "test-alias")