From dc09fb663ca8c0fa8069a0d71bb819e29f8d0905 Mon Sep 17 00:00:00 2001
From: Ragnar Rova <ragnar.rova@gmail.com>
Date: Mon, 15 Jun 2026 19:49:10 +0200
Subject: [PATCH] feat: add resumable review threads to /codex:review and
 /codex:adversarial-review

Add opt-in `--resume` / `--fresh` / `--within-hours` flags so an iterative
review -> fix -> re-review loop can reuse a Codex review thread instead of
re-exploring the repo from a cold start every time, which sharply cuts token
burn. Reuse is scoped to the git worktree and bounded by recency.

- `--resume` reuses the worktree's recent review thread if one exists and
  otherwise starts a fresh resumable thread; `--fresh` forces a new thread;
  `--within-hours <n>` sets the reuse window (default 3h).
- Native one-shot `review/start` cannot carry context between runs, so the
  resumable path uses a turn-based reviewer. Only persisted session-mode
  review jobs are eligible for resume, scoped to the same review kind
  (`/codex:review` vs `/codex:adversarial-review`, which share jobClass but
  differ by kind); native one-shot review threads are ephemeral and never
  selected. The newest resumable review run is reused only if it completed --
  a cancelled, failed, or in-progress run (including a concurrent run still
  starting, before it records a thread id) is never reused, even if an older
  run on the same thread completed. If a persisted thread was pruned/expired
  the run falls back to a fresh thread instead of failing, and the reuse-
  continuation note is injected only when the resume actually succeeds.
- The resumable plain-review prompt (prompts/review.md) is adapted from
  Codex's native reviewer system prompt (codex-rs/core/review_prompt.md) so
  that `/codex:review --resume` reviews like native `/codex:review`; only the
  output section is remapped to this plugin's review-output schema.
- Default (no-flag) `/codex:review` and `/codex:adversarial-review` behavior
  is byte-identical to before -- the change is purely additive.

Tests cover the new flags in tests/commands.test.mjs and tests/runtime.test.mjs
(reuse, recency fallback, native-thread exclusion, kind scoping, non-completed/
newest-not-completed/still-starting exclusion, and pruned-thread fallback).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 plugins/codex/commands/adversarial-review.md |   9 +-
 plugins/codex/commands/review.md             |  10 +-
 plugins/codex/prompts/review.md              |  75 ++++++
 plugins/codex/scripts/codex-companion.mjs    | 132 ++++++++--
 plugins/codex/scripts/lib/codex.mjs          |  58 ++++-
 tests/commands.test.mjs                      |  10 +-
 tests/runtime.test.mjs                       | 248 +++++++++++++++++++
 7 files changed, 512 insertions(+), 30 deletions(-)
 create mode 100644 plugins/codex/prompts/review.md
diff --git a/plugins/codex/commands/adversarial-review.md b/plugins/codex/commands/adversarial-review.md
index da440ab4..47aedc0e 100644
--- a/plugins/codex/commands/adversarial-review.md
+++ b/plugins/codex/commands/adversarial-review.md
@@ -1,6 +1,6 @@
 ---
 description: Run a Codex review that challenges the implementation approach and design choices
-argument-hint: '[--wait|--background] [--base <ref>] [--scope auto|working-tree|branch] [focus ...]'
+argument-hint: '[--wait|--background] [--base <ref>] [--scope auto|working-tree|branch] [--resume|--fresh] [--within-hours <n>] [focus ...]'
 disable-model-invocation: true
 allowed-tools: Read, Glob, Grep, Bash(node:*), Bash(git:*), AskUserQuestion
 ---
@@ -44,6 +44,13 @@ Argument handling:
 - It does not support `--scope staged` or `--scope unstaged`.
 - Unlike `/codex:review`, it can still take extra focus text after the flags.
 
+Context reuse (lower token burn):
+- `--resume` reuses-or-creates: it reuses this git worktree's recent review thread if one exists, and otherwise starts a new resumable thread. It is safe to use on every review; you never need to seed it with `--fresh` first, and it never fails just because no prior thread exists.
+- Reusing the prior thread keeps the exploration Codex already did instead of re-reading the codebase each pass, lowering token/quota burn on iterative review loops.
+- Reuse is scoped to the current git worktree and only happens when the prior review thread was used within the last few hours (default 3, override with `--within-hours <n>`); otherwise a fresh thread is started automatically.
+- `--fresh` forces a new review thread for this worktree even when a recent one exists — use it only to deliberately discard the previous review context.
+- Pass `--resume`/`--fresh` through verbatim; do not treat them as focus text.
+
 Foreground flow:
 - Run:
 ```bash
diff --git a/plugins/codex/commands/review.md b/plugins/codex/commands/review.md
index fb70a487..1b1f4f87 100644
--- a/plugins/codex/commands/review.md
+++ b/plugins/codex/commands/review.md
@@ -1,6 +1,6 @@
 ---
 description: Run a Codex code review against local git state
-argument-hint: '[--wait|--background] [--base <ref>] [--scope auto|working-tree|branch]'
+argument-hint: '[--wait|--background] [--base <ref>] [--scope auto|working-tree|branch] [--resume|--fresh] [--within-hours <n>]'
 disable-model-invocation: true
 allowed-tools: Read, Glob, Grep, Bash(node:*), Bash(git:*), AskUserQuestion
 ---
@@ -39,6 +39,14 @@ Argument handling:
 - `/codex:review` is native-review only. It does not support staged-only review, unstaged-only review, or extra focus text.
 - If the user needs custom review instructions or more adversarial framing, they should use `/codex:adversarial-review`.
 
+Context reuse (lower token burn):
+- `--resume` reuses-or-creates: it reuses this git worktree's recent review thread if one exists, and otherwise starts a new resumable thread. It is safe to use on every review; you never need to seed it with `--fresh` first, and it never fails just because no prior thread exists.
+- Reusing the prior thread lets Codex keep the exploration it already did instead of re-reading the codebase from scratch, which sharply cuts token/quota burn during a review → fix → re-review loop.
+- Reuse is scoped to the current git worktree and only happens when the prior review thread was used within the last few hours (default 3, override with `--within-hours <n>`); otherwise a fresh thread is started automatically.
+- `--fresh` forces a brand-new review thread for this worktree even when a recent one exists — use it only to deliberately discard the previous review context (a new task, or a stale thread).
+- `--resume`/`--fresh` run a turn-based reviewer (so the thread can be resumed) instead of Codex's one-shot native review mode, which cannot carry context between runs. Plain `/codex:review` with no flag is still the best choice for a single cold pass.
+- Pass `--resume`/`--fresh` through verbatim; the companion script handles them.
+
 Foreground flow:
 - Run:
 ```bash
diff --git a/plugins/codex/prompts/review.md b/plugins/codex/prompts/review.md
new file mode 100644
index 00000000..d8b86946
--- /dev/null
+++ b/plugins/codex/prompts/review.md
@@ -0,0 +1,75 @@
+<!-- Adapted from Codex's native reviewer system prompt
+     (openai/codex: codex-rs/core/review_prompt.md) so that the resumable
+     turn-based `/codex:review` reviews like native `/codex:review`. The rubric
+     is kept faithful; only the output section is remapped to this plugin's
+     review-output schema (see schemas/review-output.schema.json). -->
+
+# Review guidelines
+
+You are acting as a reviewer for a proposed code change made by another engineer.
+
+Below are default guidelines for determining whether the original author would appreciate an issue being flagged. They are not the final word: more specific guidelines you encounter (in the repository context below, a developer message, or a user message) override these.
+
+Flag something as a bug only when:
+1. It meaningfully impacts the accuracy, performance, security, or maintainability of the code.
+2. The bug is discrete and actionable (not a general issue with the codebase, and not a combination of multiple issues).
+3. Fixing it does not demand a level of rigor absent from the rest of the codebase (e.g. one-off scripts do not need detailed comments and input validation).
+4. The bug was introduced by the change under review — do not flag pre-existing bugs.
+5. The original author would likely fix it if they were made aware of it.
+6. It does not rely on unstated assumptions about the codebase or the author's intent.
+7. You can identify the specific other code that is provably affected — it is not enough to speculate that a change may disrupt another part of the codebase.
+8. It is clearly not an intentional change by the original author.
+
+When you flag a bug, the accompanying explanation should:
+1. Be clear about why the issue is a bug.
+2. Communicate severity accurately — do not claim an issue is more severe than it is.
+3. Be brief: at most one paragraph, with no line breaks in the prose unless needed for a code fragment.
+4. Avoid code chunks longer than 3 lines; wrap any code in inline code tags or a short code block.
+5. Explicitly state the scenarios, environments, or inputs required for the bug to arise, and make clear that severity depends on those factors.
+6. Be matter-of-fact, not accusatory or flattering (avoid "Great job…", "Thanks for…").
+7. Let the author grasp the issue without close reading.
+
+HOW MANY FINDINGS TO RETURN:
+
+Output every finding the original author would fix if they knew about it. If there is no finding a person would clearly want fixed, prefer returning none. Do not stop at the first qualifying finding; continue until you have listed every qualifying one.
+
+GUIDELINES:
+
+- Ignore trivial style unless it obscures meaning or violates a documented standard.
+- Use one finding per distinct issue.
+- Keep the line range as short as possible — avoid ranges longer than 5–10 lines; pick the subrange that pinpoints the problem. The location should overlap the change under review.
+- Do not generate a full patch; describe the fix rather than writing the replacement code.
+
+PRIORITY:
+
+Assess each finding's priority and map it to `severity`:
+- P0 → `critical`: drop everything; a universal issue that does not depend on assumptions about the inputs.
+- P1 → `high`: urgent; should be addressed in the next cycle.
+- P2 → `medium`: normal; to be fixed eventually.
+- P3 → `low`: nice to have.
+
+OVERALL CORRECTNESS:
+
+Decide whether the change is correct — existing code and tests will not break and it is free of blocking bugs. Ignore non-blocking issues (style, formatting, typos, documentation, nits) for this verdict. Map a correct patch to `approve` and an incorrect one to `needs-attention`.
+
+## What to review
+
+Target: {{TARGET_LABEL}}
+
+{{REVIEW_COLLECTION_GUIDANCE}}
+
+## Output format
+
+Return only valid JSON matching the provided schema — no markdown fences and no extra prose. For each finding provide:
+- `title`: ≤ 80 chars, imperative.
+- `body`: one paragraph of Markdown explaining why it is a bug, citing the affected file/lines/function.
+- `severity`: mapped from the priority above.
+- `confidence`: a float from 0 to 1.
+- `file`, `line_start`, `line_end`: the affected location, kept as short as possible and overlapping the change.
+- `recommendation`: describe the fix concisely; do not provide a code patch.
+
+Set `verdict` from the overall-correctness decision, write `summary` as a terse 1–3 sentence justification of that verdict, and use `next_steps` for any follow-ups.
+
+## Repository context
+
+{{REVIEW_INPUT}}
diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs
index 35222fd5..c0c3603a 100644
--- a/plugins/codex/scripts/codex-companion.mjs
+++ b/plugins/codex/scripts/codex-companion.mjs
@@ -8,6 +8,7 @@ import { fileURLToPath } from "node:url";
 
 import { parseArgs, splitRawArgumentString } from "./lib/args.mjs";
 import {
+    buildPersistentReviewThreadName,
     buildPersistentTaskThreadName,
     DEFAULT_CONTINUE_PROMPT,
     findLatestTaskThread,
@@ -69,14 +70,22 @@ const DEFAULT_STATUS_POLL_INTERVAL_MS = 2000;
 const VALID_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]);
 const MODEL_ALIASES = new Map([["spark", "gpt-5.3-codex-spark"]]);
 const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Claude turn.";
+const DEFAULT_REVIEW_REUSE_WINDOW_HOURS = 3;
+const REVIEW_RESUME_CONTINUATION_NOTE = [
+  "<continuation>",
+  "You already reviewed this worktree earlier in this same thread.",
+  "Reuse what you already learned about the codebase; do not re-read files you have already seen unless they changed.",
+  "The working tree may have moved on since then. Concentrate on what changed, and re-check whether your earlier findings are now resolved or still open.",
+  "</continuation>"
+].join("\n");
 
 function printUsage() {
   console.log(
     [
       "Usage:",
       "  node scripts/codex-companion.mjs setup [--enable-review-gate|--disable-review-gate] [--json]",
-      "  node scripts/codex-companion.mjs review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>]",
-      "  node scripts/codex-companion.mjs adversarial-review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>] [focus text]",
+      "  node scripts/codex-companion.mjs review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>] [--resume|--fresh] [--within-hours <n>]",
+      "  node scripts/codex-companion.mjs adversarial-review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>] [--resume|--fresh] [--within-hours <n>] [focus text]",
       "  node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model <model|spark>] [--effort <none|minimal|low|medium|high|xhigh>] [prompt]",
       "  node scripts/codex-companion.mjs status [job-id] [--all] [--json]",
       "  node scripts/codex-companion.mjs result [job-id] [--json]",
@@ -235,15 +244,19 @@ async function handleSetup(argv) {
   outputResult(options.json ? finalReport : renderSetupReport(finalReport), options.json);
 }
 
-function buildAdversarialReviewPrompt(context, focusText) {
-  const template = loadPromptTemplate(ROOT_DIR, "adversarial-review");
-  return interpolateTemplate(template, {
-    REVIEW_KIND: "Adversarial Review",
+function buildReviewTurnPrompt(context, { reviewName, focusText = "", resuming = false } = {}) {
+  const templateName = reviewName === "Adversarial Review" ? "adversarial-review" : "review";
+  const template = loadPromptTemplate(ROOT_DIR, templateName);
+  const body = interpolateTemplate(template, {
+    REVIEW_KIND: reviewName,
     TARGET_LABEL: context.target.label,
     USER_FOCUS: focusText || "No extra focus provided.",
     REVIEW_COLLECTION_GUIDANCE: context.collectionGuidance,
     REVIEW_INPUT: context.content
   });
+  // Inject the reuse note only when actually resuming, so the no-flag path is
+  // byte-identical to the original one-shot prompt.
+  return resuming ? `${REVIEW_RESUME_CONTINUATION_NOTE}\n\n${body}` : body;
 }
 
 function ensureCodexAvailable(cwd) {
@@ -352,6 +365,54 @@ async function resolveLatestTrackedTaskThread(cwd, options = {}) {
   return findLatestTaskThread(workspaceRoot);
 }
 
+function resolveReviewReuseWindowMs(hours) {
+  const parsed = Number(hours);
+  if (!Number.isFinite(parsed) || parsed <= 0) {
+    return DEFAULT_REVIEW_REUSE_WINDOW_HOURS * 60 * 60 * 1000;
+  }
+  return parsed * 60 * 60 * 1000;
+}
+
+// Reuse is keyed by the git worktree (the per-worktree state dir already scopes
+// jobs) and bounded by recency, so stale exploration is not resumed.
+// Only `resumable` jobs are eligible: plain native `/codex:review` runs persist
+// a `threadId` too, but those threads are ephemeral and cannot be resumed.
+function resolveLatestReviewThread(workspaceRoot, { withinMs = null, excludeJobId = null, kind = null } = {}) {
+  // Take the newest resumable review run and reuse its thread only if that run
+  // completed. The newest run is considered even before it records a thread id
+  // (a concurrent run that has not yet emitted "Thread ready"); otherwise we
+  // could fall back past it to an older completed run on the same thread and
+  // let two runs resume the same thread and interleave turns. A cancelled,
+  // failed, or still-running newest run therefore means: start fresh.
+  const candidate = sortJobsNewestFirst(listJobs(workspaceRoot)).find(
+    (job) =>
+      job.id !== excludeJobId &&
+      job.jobClass === "review" &&
+      // Scope to the same review kind. `/codex:review` and
+      // `/codex:adversarial-review` share jobClass "review" but differ by
+      // `kind`; reusing across kinds would inject the other reviewer's prompt
+      // (e.g. the adversarial framing) into a plain review thread.
+      (kind === null || job.kind === kind) &&
+      job.resumable === true
+  );
+  if (!candidate || candidate.status !== "completed" || !candidate.threadId) {
+    return null;
+  }
+
+  if (Number.isFinite(withinMs)) {
+    const lastUsed = Date.parse(candidate.completedAt ?? candidate.createdAt ?? "");
+    if (!Number.isFinite(lastUsed) || Date.now() - lastUsed > withinMs) {
+      return null;
+    }
+  }
+
+  return {
+    id: candidate.threadId,
+    jobId: candidate.id,
+    lastUsedAt: candidate.completedAt ?? candidate.createdAt ?? null
+  };
+}
+
 async function executeReviewRun(request) {
   ensureCodexAvailable(request.cwd);
   ensureGitRepository(request.cwd);
@@ -362,7 +423,8 @@ async function executeReviewRun(request) {
   });
   const focusText = request.focusText?.trim() ?? "";
   const reviewName = request.reviewName ?? "Review";
-  if (reviewName === "Review") {
+  const sessionMode = Boolean(request.resume || request.fresh);
+  if (reviewName === "Review" && !sessionMode) {
     const reviewTarget = validateNativeReviewRequest(target, focusText);
     const result = await runAppServerReview(request.cwd, {
       target: reviewTarget,
@@ -404,14 +466,39 @@ async function executeReviewRun(request) {
   }
 
   const context = collectReviewContext(request.cwd, target);
-  const prompt = buildAdversarialReviewPrompt(context, focusText);
+
+  let resumeThreadId = null;
+  if (request.resume) {
+    const prior = resolveLatestReviewThread(resolveWorkspaceRoot(context.repoRoot), {
+      withinMs: request.reuseWindowMs ?? resolveReviewReuseWindowMs(),
+      excludeJobId: request.jobId ?? null,
+      kind: reviewName === "Adversarial Review" ? "adversarial-review" : "review"
+    });
+    if (prior) {
+      resumeThreadId = prior.id;
+    }
+  }
+
+  const persistThread = sessionMode;
   const result = await runAppServerTurn(context.repoRoot, {
-    prompt,
+    // Build the prompt from the ACTUAL outcome: if a pruned/expired thread
+    // forces a fresh-thread fallback, the continuation note must not claim
+    // prior context that the fresh thread does not have.
+    buildPrompt: ({ resumed }) => buildReviewTurnPrompt(context, { reviewName, focusText, resuming: resumed }),
     model: request.model,
     sandbox: "read-only",
     outputSchema: readOutputSchema(REVIEW_SCHEMA),
-    onProgress: request.onProgress
+    onProgress: request.onProgress,
+    resumeThreadId,
+    // If a persisted review thread was pruned/expired by Codex, don't fail the
+    // review — fall back to a fresh persisted thread.
+    resumeFallback: persistThread,
+    persistThread,
+    threadName: persistThread ? buildPersistentReviewThreadName(`${reviewName} ${target.label}`) : null
   });
+  // The resume may have fallen back to a fresh thread (pruned/expired); only
+  // report reuse when the returned thread is actually the one we resumed.
+  const actuallyResumed = Boolean(resumeThreadId) && result.threadId === resumeThreadId;
   const parsed = parseStructuredOutput(result.finalMessage, {
     status: result.status,
     failureMessage: result.error?.message ?? result.stderr
@@ -420,6 +507,9 @@ async function executeReviewRun(request) {
     review: reviewName,
     target,
     threadId: result.threadId,
+    sessionMode,
+    reused: actuallyResumed,
+    resumedThreadId: actuallyResumed ? resumeThreadId : null,
     context: {
       repoRoot: context.repoRoot,
       branch: context.branch,
@@ -561,7 +651,7 @@ function getJobKindLabel(kind, jobClass) {
   return jobClass === "review" ? "review" : "rescue";
 }
 
-function createCompanionJob({ prefix, kind, title, workspaceRoot, jobClass, summary, write = false }) {
+function createCompanionJob({ prefix, kind, title, workspaceRoot, jobClass, summary, write = false, resumable = false }) {
   return createJobRecord({
     id: generateJobId(prefix),
     kind,
@@ -570,7 +660,8 @@ function createCompanionJob({ prefix, kind, title, workspaceRoot, jobClass, summ
     workspaceRoot,
     jobClass,
     summary,
-    write
+    write,
+    resumable
   });
 }
 
@@ -681,13 +772,19 @@ function enqueueBackgroundTask(cwd, job, request) {
 
 async function handleReviewCommand(argv, config) {
   const { options, positionals } = parseCommandInput(argv, {
-    valueOptions: ["base", "scope", "model", "cwd"],
-    booleanOptions: ["json", "background", "wait"],
+    valueOptions: ["base", "scope", "model", "cwd", "within-hours"],
+    booleanOptions: ["json", "background", "wait", "resume", "fresh"],
     aliasMap: {
       m: "model"
     }
   });
 
+  const resume = Boolean(options.resume);
+  const fresh = Boolean(options.fresh);
+  if (resume && fresh) {
+    throw new Error("Choose either --resume or --fresh.");
+  }
+
   const cwd = resolveCommandCwd(options);
   const workspaceRoot = resolveCommandWorkspace(options);
   const focusText = positionals.join(" ").trim();
@@ -704,7 +801,8 @@ async function handleReviewCommand(argv, config) {
     title: metadata.title,
     workspaceRoot,
     jobClass: "review",
-    summary: metadata.summary
+    summary: metadata.summary,
+    resumable: resume || fresh
   });
   await runForegroundCommand(
     job,
@@ -716,6 +814,10 @@ async function handleReviewCommand(argv, config) {
         model: options.model,
         focusText,
         reviewName: config.reviewName,
+        resume,
+        fresh,
+        reuseWindowMs: resolveReviewReuseWindowMs(options["within-hours"]),
+        jobId: job.id,
         onProgress: progress
       }),
     { json: options.json }
diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs
index f2fe88bd..daa53458 100644
--- a/plugins/codex/scripts/lib/codex.mjs
+++ b/plugins/codex/scripts/lib/codex.mjs
@@ -41,6 +41,7 @@ import { binaryAvailable } from "./process.mjs";
 
 const SERVICE_NAME = "claude_code_codex_plugin";
 const TASK_THREAD_PREFIX = "Codex Companion Task";
+const REVIEW_THREAD_PREFIX = "Codex Companion Review";
 const DEFAULT_CONTINUE_PROMPT =
   "Continue from the current thread state. Pick the next highest-value step and follow through until the task is resolved.";
 
@@ -103,6 +104,11 @@ function buildTaskThreadName(prompt) {
   return excerpt ? `${TASK_THREAD_PREFIX}: ${excerpt}` : TASK_THREAD_PREFIX;
 }
 
+function buildReviewThreadName(label) {
+  const excerpt = shorten(label, 56);
+  return excerpt ? `${REVIEW_THREAD_PREFIX}: ${excerpt}` : REVIEW_THREAD_PREFIX;
+}
+
 function extractThreadId(message) {
   return message?.params?.threadId ?? null;
 }
@@ -969,16 +975,9 @@ export async function runAppServerTurn(cwd, options = {}) {
 
   return withAppServer(cwd, async (client) => {
     let threadId;
+    let resumedThread = false;
 
-    if (options.resumeThreadId) {
-      emitProgress(options.onProgress, `Resuming thread ${options.resumeThreadId}.`, "starting");
-      const response = await resumeThread(client, options.resumeThreadId, cwd, {
-        model: options.model,
-        sandbox: options.sandbox,
-        ephemeral: false
-      });
-      threadId = response.thread.id;
-    } else {
+    const startFreshThread = async () => {
       emitProgress(options.onProgress, "Starting Codex task thread.", "starting");
       const response = await startThread(client, cwd, {
         model: options.model,
@@ -986,14 +985,45 @@ export async function runAppServerTurn(cwd, options = {}) {
         ephemeral: options.persistThread ? false : true,
         threadName: options.persistThread ? options.threadName : options.threadName ?? null
       });
-      threadId = response.thread.id;
+      return response.thread.id;
+    };
+
+    if (options.resumeThreadId) {
+      emitProgress(options.onProgress, `Resuming thread ${options.resumeThreadId}.`, "starting");
+      try {
+        const response = await resumeThread(client, options.resumeThreadId, cwd, {
+          model: options.model,
+          sandbox: options.sandbox,
+          ephemeral: false
+        });
+        threadId = response.thread.id;
+        resumedThread = true;
+      } catch (error) {
+        // A persisted thread can be pruned/expired by Codex. When the caller
+        // opts into fallback, start a fresh thread instead of failing the run.
+        // Resume fails before any turn starts, so no tokens are wasted.
+        if (!options.resumeFallback) {
+          throw error;
+        }
+        emitProgress(
+          options.onProgress,
+          `Could not resume thread ${options.resumeThreadId}; starting a fresh thread.`,
+          "starting"
+        );
+        threadId = await startFreshThread();
+      }
+    } else {
+      threadId = await startFreshThread();
     }
 
     emitProgress(options.onProgress, `Thread ready (${threadId}).`, "starting", {
       threadId
     });
 
-    const prompt = options.prompt?.trim() || options.defaultPrompt || "";
+    const prompt =
+      typeof options.buildPrompt === "function"
+        ? options.buildPrompt({ resumed: resumedThread })
+        : options.prompt?.trim() || options.defaultPrompt || "";
     if (!prompt) {
       throw new Error("A prompt is required for this Codex run.");
     }
@@ -1054,6 +1084,10 @@ export function buildPersistentTaskThreadName(prompt) {
   return buildTaskThreadName(prompt);
 }
 
+export function buildPersistentReviewThreadName(label) {
+  return buildReviewThreadName(label);
+}
+
 export function parseStructuredOutput(rawOutput, fallback = {}) {
   if (!rawOutput) {
     return {
@@ -1085,4 +1119,4 @@ export function readOutputSchema(schemaPath) {
   return readJsonFile(schemaPath);
 }
 
-export { DEFAULT_CONTINUE_PROMPT, TASK_THREAD_PREFIX };
+export { DEFAULT_CONTINUE_PROMPT, TASK_THREAD_PREFIX, REVIEW_THREAD_PREFIX };
diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs
index 3724ffa4..0e0f9296 100644
--- a/tests/commands.test.mjs
+++ b/tests/commands.test.mjs
@@ -37,6 +37,10 @@ test("review command uses AskUserQuestion and background Bash while staying revi
   assert.match(source, /When in doubt, run the review/i);
   assert.match(source, /\(Recommended\)/);
   assert.match(source, /does not support staged-only review, unstaged-only review, or extra focus text/i);
+  assert.match(source, /\[--resume\|--fresh\]/);
+  assert.match(source, /--within-hours/);
+  assert.match(source, /reuses this git worktree's recent review thread/i);
+  assert.match(source, /reuses-or-creates/i);
 });
 
 test("adversarial review command uses AskUserQuestion and background Bash while staying review-only", () => {
@@ -49,7 +53,11 @@ test("adversarial review command uses AskUserQuestion and background Bash while
   assert.match(source, /```bash/);
   assert.match(source, /```typescript/);
   assert.match(source, /adversarial-review "\$ARGUMENTS"/);
-  assert.match(source, /\[--scope auto\|working-tree\|branch\] \[focus \.\.\.\]/);
+  assert.match(source, /\[--scope auto\|working-tree\|branch\]/);
+  assert.match(source, /\[focus \.\.\.\]/);
+  assert.match(source, /\[--resume\|--fresh\]/);
+  assert.match(source, /--within-hours/);
+  assert.match(source, /reuses-or-creates/i);
   assert.match(source, /run_in_background:\s*true/);
   assert.match(source, /command:\s*`node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/codex-companion\.mjs" adversarial-review "\$ARGUMENTS"`/);
   assert.match(source, /description:\s*"Codex adversarial review"/);
diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs
index 90408372..1278b4fb 100644
--- a/tests/runtime.test.mjs
+++ b/tests/runtime.test.mjs
@@ -300,6 +300,254 @@ test("adversarial review asks Codex to inspect larger diffs itself", () => {
   assert.doesNotMatch(state.lastTurnStart.prompt, /PROMPT_SELF_COLLECT_[ABC]/);
 });
 
+test("review --fresh starts a resumable thread and --resume reuses it for the worktree", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+  fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");
+
+  const fresh = run("node", [SCRIPT, "review", "--fresh"], { cwd: repo, env: buildEnv(binDir) });
+  assert.equal(fresh.status, 0, fresh.stderr);
+
+  let state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  const threadsAfterFresh = state.threads.length;
+  assert.equal(
+    state.threads.some((thread) => !thread.ephemeral && String(thread.name || "").startsWith("Codex Companion Review")),
+    true,
+    "fresh review should persist a named, non-ephemeral review thread"
+  );
+  assert.doesNotMatch(state.lastTurnStart.prompt, /<continuation>/);
+
+  const resume = run("node", [SCRIPT, "review", "--resume"], { cwd: repo, env: buildEnv(binDir) });
+  assert.equal(resume.status, 0, resume.stderr);
+
+  state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(state.threads.length, threadsAfterFresh, "resume should reuse the thread, not create a new one");
+  assert.match(state.lastTurnStart.prompt, /<continuation>/);
+});
+
+test("review --resume falls back to a fresh thread when the prior thread is too old", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+  fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");
+
+  const fresh = run("node", [SCRIPT, "review", "--fresh"], { cwd: repo, env: buildEnv(binDir) });
+  assert.equal(fresh.status, 0, fresh.stderr);
+  let state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  const threadsAfterFresh = state.threads.length;
+
+  // A ~36ms recency window is far shorter than the time to spawn the resume
+  // process, so the prior thread is always "too old" to reuse here.
+  const resume = run("node", [SCRIPT, "review", "--resume", "--within-hours", "0.00001"], {
+    cwd: repo,
+    env: buildEnv(binDir)
+  });
+  assert.equal(resume.status, 0, resume.stderr);
+
+  state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(state.threads.length, threadsAfterFresh + 1, "a stale prior thread should not be reused");
+  assert.doesNotMatch(state.lastTurnStart.prompt, /<continuation>/);
+});
+
+test("review --resume does not reuse a plain native review thread", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+  fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");
+
+  // Plain native review (no flag) persists a threadId but the thread is
+  // ephemeral and cannot be resumed.
+  const native = run("node", [SCRIPT, "review"], { cwd: repo, env: buildEnv(binDir) });
+  assert.equal(native.status, 0, native.stderr);
+  let state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  const threadsAfterNative = state.threads.length;
+
+  const resume = run("node", [SCRIPT, "review", "--resume"], { cwd: repo, env: buildEnv(binDir) });
+  assert.equal(resume.status, 0, resume.stderr);
+
+  state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(state.threads.length, threadsAfterNative + 1, "resume must start a fresh thread, not reuse the native one");
+  assert.doesNotMatch(state.lastTurnStart.prompt, /<continuation>/);
+});
+
+test("review --resume falls back to a fresh thread when the persisted thread is gone", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+  fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");
+
+  const fresh = run("node", [SCRIPT, "review", "--fresh"], { cwd: repo, env: buildEnv(binDir) });
+  assert.equal(fresh.status, 0, fresh.stderr);
+
+  // Simulate Codex pruning/expiring the persisted thread.
+  const statePath = path.join(binDir, "fake-codex-state.json");
+  const state = JSON.parse(fs.readFileSync(statePath, "utf8"));
+  state.threads = [];
+  fs.writeFileSync(statePath, JSON.stringify(state, null, 2));
+
+  // resume should recover (start a fresh thread) rather than fail.
+  const resume = run("node", [SCRIPT, "review", "--resume"], { cwd: repo, env: buildEnv(binDir) });
+  assert.equal(resume.status, 0, resume.stderr);
+
+  const after = JSON.parse(fs.readFileSync(statePath, "utf8"));
+  assert.equal(after.threads.length, 1, "fallback should create exactly one fresh thread");
+  // The fresh fallback thread has no prior context, so the prompt must NOT
+  // claim continuation ("reuse what you already saw").
+  assert.doesNotMatch(after.lastTurnStart.prompt, /<continuation>/);
+});
+
+test("review --resume does not reuse a thread whose review did not complete", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+  fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");
+
+  const fresh = run("node", [SCRIPT, "review", "--fresh"], { cwd: repo, env: buildEnv(binDir) });
+  assert.equal(fresh.status, 0, fresh.stderr);
+  let state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  const threadsAfterFresh = state.threads.length;
+
+  // Simulate the resumable review having failed: the thread id was recorded
+  // mid-run, but the review never completed.
+  const stateFile = path.join(resolveStateDir(repo), "state.json");
+  const companionState = JSON.parse(fs.readFileSync(stateFile, "utf8"));
+  let mutated = false;
+  for (const job of companionState.jobs) {
+    if (job.jobClass === "review" && job.resumable && job.threadId) {
+      job.status = "failed";
+      mutated = true;
+    }
+  }
+  assert.ok(mutated, "expected a resumable review job with a thread id to mutate");
+  fs.writeFileSync(stateFile, JSON.stringify(companionState, null, 2));
+
+  // --resume must not reuse the non-completed thread; it starts a fresh one.
+  const resume = run("node", [SCRIPT, "review", "--resume"], { cwd: repo, env: buildEnv(binDir) });
+  assert.equal(resume.status, 0, resume.stderr);
+  state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(state.threads.length, threadsAfterFresh + 1, "a non-completed review must not be reused");
+  assert.doesNotMatch(state.lastTurnStart.prompt, /<continuation>/);
+});
+
+test("review --resume starts fresh when the newest run did not complete, even if an older run did", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+  fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");
+
+  // J1 (fresh) completes on a new thread; J2 (resume) completes on the same thread.
+  assert.equal(run("node", [SCRIPT, "review", "--fresh"], { cwd: repo, env: buildEnv(binDir) }).status, 0);
+  assert.equal(run("node", [SCRIPT, "review", "--resume"], { cwd: repo, env: buildEnv(binDir) }).status, 0);
+
+  let codexState = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  const threadsBefore = codexState.threads.length;
+
+  // Mark the NEWEST review run (the resume) as failed — it shares the thread id
+  // with the older completed run, which must NOT be selected as a fallback.
+  const stateFile = path.join(resolveStateDir(repo), "state.json");
+  const companionState = JSON.parse(fs.readFileSync(stateFile, "utf8"));
+  const reviewJobs = companionState.jobs
+    .filter((j) => j.jobClass === "review" && j.resumable && j.threadId)
+    .sort((a, b) => String(b.updatedAt ?? "").localeCompare(String(a.updatedAt ?? "")));
+  assert.ok(reviewJobs.length >= 2, "expected two resumable review jobs");
+  assert.equal(reviewJobs[0].threadId, reviewJobs[1].threadId, "both runs should share the resumed thread id");
+  for (const job of companionState.jobs) {
+    if (job.id === reviewJobs[0].id) job.status = "failed";
+  }
+  fs.writeFileSync(stateFile, JSON.stringify(companionState, null, 2));
+
+  // --resume must start a fresh thread, not fall back to the older completed run.
+  assert.equal(run("node", [SCRIPT, "review", "--resume"], { cwd: repo, env: buildEnv(binDir) }).status, 0);
+  codexState = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(codexState.threads.length, threadsBefore + 1, "must start a fresh thread, not reuse the interrupted one");
+  assert.doesNotMatch(codexState.lastTurnStart.prompt, /<continuation>/);
+});
+
+test("review --resume starts fresh when a newer resumable run is still starting (no thread id yet)", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+  fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");
+
+  // J0: completed resumable review on a thread.
+  assert.equal(run("node", [SCRIPT, "review", "--fresh"], { cwd: repo, env: buildEnv(binDir) }).status, 0);
+  let codexState = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  const threadsBefore = codexState.threads.length;
+
+  // Inject a newer resumable review that is still running with no thread id yet
+  // (a concurrent --background run that has not emitted "Thread ready").
+  const stateFile = path.join(resolveStateDir(repo), "state.json");
+  const companionState = JSON.parse(fs.readFileSync(stateFile, "utf8"));
+  companionState.jobs.unshift({
+    id: "review-starting-0001",
+    kind: "review",
+    jobClass: "review",
+    resumable: true,
+    status: "running",
+    createdAt: "2099-01-01T00:00:00.000Z",
+    updatedAt: "2099-01-01T00:00:00.000Z"
+  });
+  fs.writeFileSync(stateFile, JSON.stringify(companionState, null, 2));
+
+  // --resume must not fall back past the still-starting run to the older thread.
+  assert.equal(run("node", [SCRIPT, "review", "--resume"], { cwd: repo, env: buildEnv(binDir) }).status, 0);
+  codexState = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(codexState.threads.length, threadsBefore + 1, "must start a fresh thread while a newer run is still starting");
+  assert.doesNotMatch(codexState.lastTurnStart.prompt, /<continuation>/);
+});
+
+test("review --resume does not reuse an adversarial-review thread (kind-scoped)", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.mkdirSync(path.join(repo, "src"));
+  fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0];\n");
+  run("git", ["add", "src/app.js"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+  fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0].id;\n");
+
+  // An adversarial review starts a resumable adversarial thread.
+  assert.equal(run("node", [SCRIPT, "adversarial-review", "--fresh"], { cwd: repo, env: buildEnv(binDir) }).status, 0);
+  let codexState = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  const threadsBefore = codexState.threads.length;
+
+  // Plain `/codex:review --resume` must not reuse the adversarial thread.
+  assert.equal(run("node", [SCRIPT, "review", "--resume"], { cwd: repo, env: buildEnv(binDir) }).status, 0);
+  codexState = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(codexState.threads.length, threadsBefore + 1, "plain review must not reuse an adversarial-review thread");
+  assert.doesNotMatch(codexState.lastTurnStart.prompt, /<continuation>/);
+});
+
 test("review includes reasoning output when the app server returns it", () => {
   const repo = makeTempDir();
   const binDir = makeTempDir();