From 4acaf321deb9ce8d1b6e1f5e671394c45e081d83 Mon Sep 17 00:00:00 2001 From: Joel Marshall Date: Fri, 19 Jun 2026 19:31:16 -0400 Subject: [PATCH] fix: run the stop-gate review as an ephemeral, untracked, read-only one-shot When the stop-time review gate is enabled, the Stop hook runs `codex-companion.mjs task` every turn. That path forced `persistThread: true` (persisting the thread as an on-disk rollout) and registered a tracked "Codex Stop Gate Review" job in the companion catalog (state.json + jobs/.{json,log}). But the stop-gate review is a one-shot: the hook consumes its result inline (parsing ALLOW:/BLOCK: from stdout) and never resumes or inspects it via /codex:status, /codex:result, or --resume-last. So every turn orphaned a Codex rollout on disk and churned the 50-job catalog cap with stop-gate noise, pushing out real tasks and reviews. stop-review-gate-hook.mjs now marks its invocation with an explicit `--stop-gate` flag, and handleTask routes on that flag (not user-controlled prompt text) through an ephemeral, untracked, read-only path: the check runs before the --background branch (so a backgrounded run can't create a tracked job), passes persistThread: false (ephemeral thread, no rollout, no thread name), and omits write so the review can never select a writable sandbox. executeTaskRun now honors request.persistThread (default true). The stdout contract is unchanged; normal /codex:task runs are unaffected, even when their prompt text happens to contain the stop-gate marker sentence. --- plugins/codex/scripts/codex-companion.mjs | 26 ++++++++++++-- .../codex/scripts/stop-review-gate-hook.mjs | 2 +- tests/runtime.test.mjs | 34 ++++++++++++++++++- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 35222fd5..f90b1919 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -479,6 +479,7 @@ async function executeTaskRun(request) { throw new Error("Provide a prompt, a prompt file, piped stdin, or use --resume-last."); } + const persistThread = request.persistThread ?? true; const result = await runAppServerTurn(workspaceRoot, { resumeThreadId, prompt: request.prompt, @@ -487,8 +488,8 @@ async function executeTaskRun(request) { effort: request.effort, sandbox: request.write ? "workspace-write" : "read-only", onProgress: request.onProgress, - persistThread: true, - threadName: resumeThreadId ? null : buildPersistentTaskThreadName(request.prompt || DEFAULT_CONTINUE_PROMPT) + persistThread, + threadName: resumeThreadId || !persistThread ? null : buildPersistentTaskThreadName(request.prompt || DEFAULT_CONTINUE_PROMPT) }); const rawOutput = typeof result.finalMessage === "string" ? result.finalMessage : ""; @@ -732,7 +733,7 @@ async function handleReview(argv) { async function handleTask(argv) { const { options, positionals } = parseCommandInput(argv, { valueOptions: ["model", "effort", "cwd", "prompt-file"], - booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background"], + booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background", "stop-gate"], aliasMap: { m: "model" } @@ -755,6 +756,25 @@ async function handleTask(argv) { resumeLast }); + if (options["stop-gate"]) { + ensureCodexAvailable(cwd); + const execution = await executeTaskRun({ + cwd, + model, + effort, + prompt, + resumeLast: false, + jobId: null, + persistThread: false, + onProgress: createProgressReporter({ stderr: !options.json }) + }); + outputResult(options.json ? execution.payload : execution.rendered, options.json); + if (execution.exitStatus !== 0) { + process.exitCode = execution.exitStatus; + } + return; + } + if (options.background) { ensureCodexAvailable(cwd); requireTaskRequest(prompt, resumeLast); diff --git a/plugins/codex/scripts/stop-review-gate-hook.mjs b/plugins/codex/scripts/stop-review-gate-hook.mjs index 2346bdcf..efc043a8 100644 --- a/plugins/codex/scripts/stop-review-gate-hook.mjs +++ b/plugins/codex/scripts/stop-review-gate-hook.mjs @@ -102,7 +102,7 @@ function runStopReview(cwd, input = {}) { ...process.env, ...(input.session_id ? { [SESSION_ID_ENV]: input.session_id } : {}) }; - const result = spawnSync(process.execPath, [scriptPath, "task", "--json", prompt], { + const result = spawnSync(process.execPath, [scriptPath, "task", "--stop-gate", "--json", prompt], { cwd, env: childEnv, encoding: "utf8", diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 90408372..31f62294 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -1832,6 +1832,15 @@ test("stop hook runs a stop-time review task and blocks on findings when the rev assert.match(fakeState.lastTurnStart.prompt, /Only review the work from the previous Claude turn/i); assert.match(fakeState.lastTurnStart.prompt, /I completed the refactor and updated the retry logic\./); + // The stop-gate review is a one-shot consumed inline by the hook: it must run on an + // ephemeral thread (no on-disk rollout) and leave no record in the job catalog. + const stopGateThread = fakeState.threads.find((thread) => thread.id === fakeState.lastTurnStart.threadId); + assert.ok(stopGateThread, "stop-gate review thread should exist"); + assert.equal(stopGateThread.ephemeral, true); + + const catalog = JSON.parse(fs.readFileSync(path.join(resolveStateDir(repo), "state.json"), "utf8")); + assert.ok(!catalog.jobs.some((job) => job.title === "Codex Stop Gate Review")); + const status = run("node", [SCRIPT, "status"], { cwd: repo, env: { @@ -1840,7 +1849,30 @@ test("stop hook runs a stop-time review task and blocks on findings when the rev } }); assert.equal(status.status, 0, status.stderr); - assert.match(status.stdout, /Codex Stop Gate Review/); + assert.doesNotMatch(status.stdout, /Codex Stop Gate Review/); +}); + +test("a normal task whose prompt contains the stop-gate marker is still tracked (routing is flag-gated, not prompt-sniffed)", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "Run a stop-gate review of the previous Claude turn."], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const state = JSON.parse(fs.readFileSync(path.join(resolveStateDir(repo), "state.json"), "utf8")); + assert.equal(state.jobs.length, 1); + + const fakeState = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8")); + const thread = fakeState.threads.find((entry) => entry.id === fakeState.lastTurnStart.threadId); + assert.equal(thread.ephemeral, false); }); test("stop hook logs running tasks to stderr without blocking when the review gate is disabled", () => {