diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 83df468a..5c07b9fc 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -413,6 +413,10 @@ async function executeReviewRun(request) { model: request.model, sandbox: "read-only", outputSchema: readOutputSchema(REVIEW_SCHEMA), + /* LOCAL PATCH (two-turn-review): if the investigation turn ends without the + * forced JSON, resume the thread once to elicit the verdict. */ + structuredRetryPrompt: + "Output ONLY the JSON review object matching the required schema, based on the investigation you just completed. Do not run any commands or include any prose outside the JSON.", onProgress: request.onProgress }); const parsed = parseStructuredOutput(result.finalMessage, { diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs index fead00cc..4cc4736a 100644 --- a/plugins/codex/scripts/lib/codex.mjs +++ b/plugins/codex/scripts/lib/codex.mjs @@ -87,6 +87,34 @@ function buildTurnInput(prompt) { return [{ type: "text", text: prompt, text_elements: [] }]; } +/* LOCAL PATCH (two-turn-review): codex-cli omits the forced-schema final_answer + * when a turn also runs tools, leaving only a prose preamble. These helpers let + * runAppServerTurn detect that and parseStructuredOutput tolerate fenced JSON. + * Auto-re-applied by ~/Documents/claude-setup-review-loop-with-codex/hooks/commit-review-loop.mjs. */ +function stripJsonFences(text) { + const trimmed = String(text ?? "").trim(); + if (!trimmed.startsWith("```")) { + return trimmed; + } + return trimmed + .replace(/^```(?:json)?\s*/i, "") + .replace(/\s*```$/, "") + .trim(); +} + +function looksLikeJson(text) { + const candidate = stripJsonFences(text); + if (!candidate || (candidate[0] !== "{" && candidate[0] !== "[")) { + return false; + } + try { + JSON.parse(candidate); + return true; + } catch { + return false; + } +} + function shorten(text, limit = 72) { const normalized = String(text ?? "").trim().replace(/\s+/g, " "); if (!normalized) { @@ -1143,13 +1171,54 @@ export async function runAppServerTurn(cwd, options = {}) { { onProgress: options.onProgress } ); + let finalMessage = turnState.lastAgentMessage; + let reasoningSummary = turnState.reasoningSummary; + let finalTurn = turnState.finalTurn; + let turnId = turnState.turnId; + + /* LOCAL PATCH (two-turn-review): when a schema is requested but the first + * turn ran tools, codex-cli ends the turn without emitting the forced JSON + * final_answer, so finalMessage holds only a prose preamble (or nothing). + * Resume the same thread with a JSON-only prompt; the investigation context + * is retained, so the model emits the structured verdict it skipped. The + * JSON-only turn is itself flaky on codex-cli 0.142.3, so retry a few times. */ + if (options.structuredRetryPrompt && options.outputSchema && !looksLikeJson(finalMessage)) { + const maxRetries = options.structuredRetryAttempts ?? 3; + for (let attempt = 1; attempt <= maxRetries && !looksLikeJson(finalMessage); attempt += 1) { + emitProgress( + options.onProgress, + `Structured output missing after investigation; requesting JSON verdict (attempt ${attempt}/${maxRetries}).`, + "finalizing" + ); + const retryState = await captureTurn( + client, + threadId, + () => + client.request("turn/start", { + threadId, + input: buildTurnInput(options.structuredRetryPrompt), + model: options.model ?? null, + effort: options.effort ?? null, + outputSchema: options.outputSchema ?? null + }), + { onProgress: options.onProgress } + ); + reasoningSummary = mergeReasoningSections(reasoningSummary, retryState.reasoningSummary); + finalTurn = retryState.finalTurn ?? finalTurn; + turnId = retryState.turnId ?? turnId; + if (looksLikeJson(retryState.lastAgentMessage)) { + finalMessage = retryState.lastAgentMessage; + } + } + } + return { - status: buildResultStatus(turnState), + status: finalTurn?.status === "completed" ? 0 : 1, threadId, - turnId: turnState.turnId, - finalMessage: turnState.lastAgentMessage, - reasoningSummary: turnState.reasoningSummary, - turn: turnState.finalTurn, + turnId, + finalMessage, + reasoningSummary, + turn: finalTurn, error: turnState.error, stderr: cleanCodexStderr(client.stderr), fileChanges: turnState.fileChanges, @@ -1196,8 +1265,10 @@ export function parseStructuredOutput(rawOutput, fallback = {}) { } try { + /* LOCAL PATCH (two-turn-review): tolerate a ```json fenced block so a verdict + * wrapped in markdown still parses instead of failing at position 0. */ return { - parsed: JSON.parse(rawOutput), + parsed: JSON.parse(stripJsonFences(rawOutput)), parseError: null, rawOutput, ...fallback diff --git a/tests/fake-codex-fixture.mjs b/tests/fake-codex-fixture.mjs index f83c96a0..4a645b1e 100644 --- a/tests/fake-codex-fixture.mjs +++ b/tests/fake-codex-fixture.mjs @@ -567,6 +567,43 @@ rl.on("line", (line) => { break; } + // Reproduces codex-cli 0.142.3: the first outputSchema turn investigates + // (commentary + a command) and completes WITHOUT a final_answer; only the + // follow-up JSON-only turn emits the structured verdict. + if ( + BEHAVIOR === "adversarial-tooluse-then-json" && + message.params.outputSchema && + message.params.outputSchema.properties && + message.params.outputSchema.properties.verdict + ) { + const isRetry = prompt.includes("Output ONLY the JSON"); + if (isRetry) { + emitTurnCompleted(thread.id, turnId, [ + { completed: { type: "agentMessage", id: "msg_" + turnId, text: payload, phase: "final_answer" } } + ]); + } else { + send({ method: "turn/started", params: { threadId: thread.id, turn: buildTurn(turnId) } }); + send({ + method: "item/completed", + params: { + threadId: thread.id, + turnId, + item: { type: "agentMessage", id: "msg_" + turnId, text: "I'll inspect the diff before answering.", phase: "commentary" } + } + }); + send({ + method: "item/completed", + params: { + threadId: thread.id, + turnId, + item: { type: "commandExecution", id: "cmd_" + turnId, command: "git diff HEAD~1", status: "completed", exitCode: 0 } + } + }); + send({ method: "turn/completed", params: { threadId: thread.id, turn: buildTurn(turnId, "completed") } }); + } + break; + } + const items = [ ...(BEHAVIOR === "with-reasoning" ? [ diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 8f276835..3e547ea4 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -386,6 +386,27 @@ test("adversarial review renders structured findings over app-server turn/start" assert.match(result.stdout, /Missing empty-state guard/); }); +test("adversarial review recovers structured output when the first turn only used tools", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "adversarial-tooluse-then-json"); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0];\n"); + run("git", ["add", "src/app.js"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0].id;\n"); + + const result = run("node", [SCRIPT, "adversarial-review"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.doesNotMatch(result.stdout, /did not return valid structured JSON/); + assert.match(result.stdout, /Verdict:/); +}); + test("adversarial review accepts the same base-branch targeting as review", () => { const repo = makeTempDir(); const binDir = makeTempDir();