Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions plugins/codex/scripts/codex-companion.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,10 @@ async function executeReviewRun(request) {
model: request.model,
sandbox: "read-only",
outputSchema: readOutputSchema(REVIEW_SCHEMA),
/* LOCAL PATCH (two-turn-review): if the investigation turn ends without the
* forced JSON, resume the thread once to elicit the verdict. */
structuredRetryPrompt:
"Output ONLY the JSON review object matching the required schema, based on the investigation you just completed. Do not run any commands or include any prose outside the JSON.",
onProgress: request.onProgress
});
const parsed = parseStructuredOutput(result.finalMessage, {
Expand Down
83 changes: 77 additions & 6 deletions plugins/codex/scripts/lib/codex.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,34 @@ function buildTurnInput(prompt) {
return [{ type: "text", text: prompt, text_elements: [] }];
}

/* LOCAL PATCH (two-turn-review): codex-cli omits the forced-schema final_answer
* when a turn also runs tools, leaving only a prose preamble. These helpers let
* runAppServerTurn detect that and parseStructuredOutput tolerate fenced JSON.
* Auto-re-applied by ~/Documents/claude-setup-review-loop-with-codex/hooks/commit-review-loop.mjs. */
function stripJsonFences(text) {
const trimmed = String(text ?? "").trim();
if (!trimmed.startsWith("```")) {
return trimmed;
}
return trimmed
.replace(/^```(?:json)?\s*/i, "")
.replace(/\s*```$/, "")
.trim();
}

function looksLikeJson(text) {
const candidate = stripJsonFences(text);
if (!candidate || (candidate[0] !== "{" && candidate[0] !== "[")) {
return false;
}
try {
JSON.parse(candidate);
return true;
} catch {
return false;
}
}

function shorten(text, limit = 72) {
const normalized = String(text ?? "").trim().replace(/\s+/g, " ");
if (!normalized) {
Expand Down Expand Up @@ -1143,13 +1171,54 @@ export async function runAppServerTurn(cwd, options = {}) {
{ onProgress: options.onProgress }
);

let finalMessage = turnState.lastAgentMessage;
let reasoningSummary = turnState.reasoningSummary;
let finalTurn = turnState.finalTurn;
let turnId = turnState.turnId;

/* LOCAL PATCH (two-turn-review): when a schema is requested but the first
* turn ran tools, codex-cli ends the turn without emitting the forced JSON
* final_answer, so finalMessage holds only a prose preamble (or nothing).
* Resume the same thread with a JSON-only prompt; the investigation context
* is retained, so the model emits the structured verdict it skipped. The
* JSON-only turn is itself flaky on codex-cli 0.142.3, so retry a few times. */
if (options.structuredRetryPrompt && options.outputSchema && !looksLikeJson(finalMessage)) {
const maxRetries = options.structuredRetryAttempts ?? 3;
for (let attempt = 1; attempt <= maxRetries && !looksLikeJson(finalMessage); attempt += 1) {
emitProgress(
options.onProgress,
`Structured output missing after investigation; requesting JSON verdict (attempt ${attempt}/${maxRetries}).`,
"finalizing"
);
const retryState = await captureTurn(
client,
threadId,
() =>
client.request("turn/start", {
threadId,
input: buildTurnInput(options.structuredRetryPrompt),
model: options.model ?? null,
effort: options.effort ?? null,
outputSchema: options.outputSchema ?? null
}),
{ onProgress: options.onProgress }
);
reasoningSummary = mergeReasoningSections(reasoningSummary, retryState.reasoningSummary);
finalTurn = retryState.finalTurn ?? finalTurn;

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve failures from the investigation turn

When the first schema turn ends as failed or interrupted after producing only a commentary message, this retry path still runs and then replaces the original failed finalTurn with the retry turn. If that JSON-only retry completes, the later status calculation reports success, so /codex:adversarial-review can render a normal verdict and exit 0 even though the investigation turn actually failed. Please only retry after a completed first turn, or keep the overall status non-zero whenever turnState.finalTurn.status was not completed.

Useful? React with 👍 / 👎.

turnId = retryState.turnId ?? turnId;
if (looksLikeJson(retryState.lastAgentMessage)) {
finalMessage = retryState.lastAgentMessage;
}
}
}

return {
status: buildResultStatus(turnState),
status: finalTurn?.status === "completed" ? 0 : 1,
threadId,
turnId: turnState.turnId,
finalMessage: turnState.lastAgentMessage,
reasoningSummary: turnState.reasoningSummary,
turn: turnState.finalTurn,
turnId,
finalMessage,
reasoningSummary,
turn: finalTurn,
error: turnState.error,
stderr: cleanCodexStderr(client.stderr),
fileChanges: turnState.fileChanges,
Expand Down Expand Up @@ -1196,8 +1265,10 @@ export function parseStructuredOutput(rawOutput, fallback = {}) {
}

try {
/* LOCAL PATCH (two-turn-review): tolerate a ```json fenced block so a verdict
* wrapped in markdown still parses instead of failing at position 0. */
return {
parsed: JSON.parse(rawOutput),
parsed: JSON.parse(stripJsonFences(rawOutput)),
parseError: null,
rawOutput,
...fallback
Expand Down
37 changes: 37 additions & 0 deletions tests/fake-codex-fixture.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,43 @@ rl.on("line", (line) => {
break;
}

// Reproduces codex-cli 0.142.3: the first outputSchema turn investigates
// (commentary + a command) and completes WITHOUT a final_answer; only the
// follow-up JSON-only turn emits the structured verdict.
if (
BEHAVIOR === "adversarial-tooluse-then-json" &&
message.params.outputSchema &&
message.params.outputSchema.properties &&
message.params.outputSchema.properties.verdict
) {
const isRetry = prompt.includes("Output ONLY the JSON");
if (isRetry) {
emitTurnCompleted(thread.id, turnId, [
{ completed: { type: "agentMessage", id: "msg_" + turnId, text: payload, phase: "final_answer" } }
]);
} else {
send({ method: "turn/started", params: { threadId: thread.id, turn: buildTurn(turnId) } });
send({
method: "item/completed",
params: {
threadId: thread.id,
turnId,
item: { type: "agentMessage", id: "msg_" + turnId, text: "I'll inspect the diff before answering.", phase: "commentary" }
}
});
send({
method: "item/completed",
params: {
threadId: thread.id,
turnId,
item: { type: "commandExecution", id: "cmd_" + turnId, command: "git diff HEAD~1", status: "completed", exitCode: 0 }
}
});
send({ method: "turn/completed", params: { threadId: thread.id, turn: buildTurn(turnId, "completed") } });
}
break;
}

const items = [
...(BEHAVIOR === "with-reasoning"
? [
Expand Down
21 changes: 21 additions & 0 deletions tests/runtime.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,27 @@ test("adversarial review renders structured findings over app-server turn/start"
assert.match(result.stdout, /Missing empty-state guard/);
});

test("adversarial review recovers structured output when the first turn only used tools", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
installFakeCodex(binDir, "adversarial-tooluse-then-json");
initGitRepo(repo);
fs.mkdirSync(path.join(repo, "src"));
fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0];\n");
run("git", ["add", "src/app.js"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });
fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0].id;\n");

const result = run("node", [SCRIPT, "adversarial-review"], {
cwd: repo,
env: buildEnv(binDir)
});

assert.equal(result.status, 0, result.stderr);
assert.doesNotMatch(result.stdout, /did not return valid structured JSON/);
assert.match(result.stdout, /Verdict:/);
});

test("adversarial review accepts the same base-branch targeting as review", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
Expand Down