diff --git a/src/common/reasoning-effort-manager.ts b/src/common/reasoning-effort-manager.ts
new file mode 100644
index 00000000..93fc48ef
--- /dev/null
+++ b/src/common/reasoning-effort-manager.ts
@@ -0,0 +1,188 @@
+import type { ReasoningEffort } from "../settings";
+import type { ToolCall, ToolExecutionResult } from "../tools/executor";
+
+// ── Public types ──────────────────────────────────────────────
+
+export type TurnInput = {
+  /** Raw tool calls from the assistant response (before execution). */
+  toolCalls: ToolCall[];
+  /** Execution results after tool calls completed (same order as toolCalls). */
+  toolExecutions: ToolExecutionResult[];
+};
+
+// ── Internal state ───────────────────────────────────────────
+
+type ManagerState = {
+  currentEffort: ReasoningEffort;
+  consecutiveFailures: number;
+  consecutiveIdenticalCalls: number;
+  lastFingerprint: string | null;
+  turnsAtCurrentEffort: number;
+  cleanTurnStreak: number;
+  downgradeCooldownRemaining: number;
+  downgradeThreshold: number;
+  escalateCooldownRemaining: number;
+};
+
+// ── Constants ─────────────────────────────────────────────────
+
+const FAILURE_ESCALATION_THRESHOLD = 2;
+/**
+ * Number of consecutive identical tool calls required to trigger escalation.
+ * Per spec: "≥3 consecutive tool calls with identical (name, arguments) pairs".
+ */
+const REPETITION_ESCALATION_THRESHOLD = 3;
+const DEFAULT_DOWNGRADE_THRESHOLD = 5;
+const DOWNGRADE_COOLDOWN_TURNS = 3;
+const ESCALATE_COOLDOWN_TURNS = 2;
+
+// ── Manager ───────────────────────────────────────────────────
+
+export class RuntimeReasoningEffortManager {
+  private state: ManagerState;
+
+  constructor() {
+    this.state = {
+      currentEffort: "high",
+      consecutiveFailures: 0,
+      consecutiveIdenticalCalls: 0,
+      lastFingerprint: null,
+      turnsAtCurrentEffort: 0,
+      cleanTurnStreak: 0,
+      downgradeCooldownRemaining: 0,
+      downgradeThreshold: DEFAULT_DOWNGRADE_THRESHOLD,
+      escalateCooldownRemaining: 0,
+    };
+  }
+
+  static computeFingerprint(toolCalls: ToolCall[]): string {
+    const normalized = toolCalls.map((tc) => ({
+      name: tc.function.name,
+      args: tc.function.arguments.replace(/\s+/g, ""),
+    }));
+    return JSON.stringify(normalized);
+  }
+
+  evaluate(input: TurnInput): ReasoningEffort | null {
+    const fingerprint = RuntimeReasoningEffortManager.computeFingerprint(input.toolCalls);
+    const allOk = input.toolExecutions.length > 0 && input.toolExecutions.every((e) => e.ok);
+
+    this.state.turnsAtCurrentEffort += 1;
+
+    let result: ReasoningEffort | null;
+    if (this.state.currentEffort === "high") {
+      result = this.evaluateEscalation(input, fingerprint, allOk);
+    } else {
+      result = this.evaluateDowngrade(allOk, fingerprint);
+    }
+
+    // Only decrement cooldowns when no state change occurred.
+    // If escalate()/downgrade() just fired, the new cooldown was set
+    // and should NOT be decremented in the same turn.
+    if (result === null) {
+      this.state.downgradeCooldownRemaining = Math.max(0, this.state.downgradeCooldownRemaining - 1);
+      this.state.escalateCooldownRemaining = Math.max(0, this.state.escalateCooldownRemaining - 1);
+    }
+
+    return result;
+  }
+
+  getCurrentEffort(): ReasoningEffort {
+    return this.state.currentEffort;
+  }
+
+  reset(): void {
+    this.state = {
+      currentEffort: "high",
+      consecutiveFailures: 0,
+      consecutiveIdenticalCalls: 0,
+      lastFingerprint: null,
+      turnsAtCurrentEffort: 0,
+      cleanTurnStreak: 0,
+      downgradeCooldownRemaining: 0,
+      downgradeThreshold: DEFAULT_DOWNGRADE_THRESHOLD,
+      escalateCooldownRemaining: 0,
+    };
+  }
+
+  getState(): Readonly<ManagerState> {
+    return { ...this.state };
+  }
+
+  // ── Private helpers ─────────────────────────────────────────
+
+  private evaluateEscalation(_input: TurnInput, fingerprint: string, allOk: boolean): ReasoningEffort | null {
+    if (this.state.escalateCooldownRemaining > 0) {
+      return null;
+    }
+
+    if (!allOk) {
+      this.state.consecutiveFailures += 1;
+      // A failure breaks the "identical success" streak.
+      this.state.consecutiveIdenticalCalls = 0;
+      if (this.state.consecutiveFailures >= FAILURE_ESCALATION_THRESHOLD) {
+        return this.escalate();
+      }
+    } else {
+      this.state.consecutiveFailures = 0;
+    }
+
+    if (fingerprint === this.state.lastFingerprint && fingerprint !== null && this.state.lastFingerprint !== null) {
+      this.state.consecutiveIdenticalCalls += 1;
+      if (this.state.consecutiveIdenticalCalls >= REPETITION_ESCALATION_THRESHOLD) {
+        return this.escalate();
+      }
+    } else {
+      // First occurrence of this fingerprint — start the streak at 1.
+      // (Per spec: escalation triggers on ≥3 identical calls; the 3rd triggers.)
+      this.state.consecutiveIdenticalCalls = 1;
+    }
+
+    this.state.lastFingerprint = fingerprint;
+    return null;
+  }
+
+  private evaluateDowngrade(allOk: boolean, fingerprint: string): ReasoningEffort | null {
+    if (this.state.downgradeCooldownRemaining > 0) {
+      this.state.lastFingerprint = fingerprint;
+      return null;
+    }
+
+    if (allOk && fingerprint !== this.state.lastFingerprint) {
+      this.state.cleanTurnStreak += 1;
+      if (this.state.cleanTurnStreak >= this.state.downgradeThreshold) {
+        return this.downgrade();
+      }
+    } else if (!allOk) {
+      this.state.cleanTurnStreak = 0;
+    }
+
+    this.state.lastFingerprint = fingerprint;
+    return null;
+  }
+
+  private escalate(): ReasoningEffort {
+    this.state.currentEffort = "max";
+    this.state.consecutiveFailures = 0;
+    this.state.consecutiveIdenticalCalls = 0;
+    this.state.cleanTurnStreak = 0;
+    this.state.downgradeCooldownRemaining = DOWNGRADE_COOLDOWN_TURNS;
+    this.state.turnsAtCurrentEffort = 0;
+    return "max";
+  }
+
+  private downgrade(): ReasoningEffort | null {
+    this.state.currentEffort = "high";
+    this.state.cleanTurnStreak = 0;
+    this.state.escalateCooldownRemaining = ESCALATE_COOLDOWN_TURNS;
+    this.state.consecutiveFailures = 0;
+    this.state.consecutiveIdenticalCalls = 0;
+    this.state.turnsAtCurrentEffort = 0;
+    if (this.state.downgradeThreshold === DEFAULT_DOWNGRADE_THRESHOLD) {
+      this.state.downgradeThreshold = DEFAULT_DOWNGRADE_THRESHOLD * 2;
+    } else {
+      this.state.downgradeThreshold = DEFAULT_DOWNGRADE_THRESHOLD * 4;
+    }
+    return "high";
+  }
+}
diff --git a/src/session.ts b/src/session.ts
index 9432a74d..d9266fb0 100644
--- a/src/session.ts
+++ b/src/session.ts
@@ -24,11 +24,13 @@ import {
   type CreateOpenAIClient,
   type ProcessTimeoutControl,
   type ProcessTimeoutInfo,
+  type ToolCall,
   type ToolCallExecution,
   type ToolExecutionHooks,
 } from "./tools/executor";
 import { McpManager } from "./mcp/mcp-manager";
-import type { McpServerConfig, PermissionScope, PermissionSettings } from "./settings";
+import { RuntimeReasoningEffortManager } from "./common/reasoning-effort-manager";
+import type { McpServerConfig, PermissionScope, PermissionSettings, ReasoningEffort } from "./settings";
 import { logApiError } from "./common/error-logger";
 import { logOpenAIChatCompletionDebug, normalizeDebugError } from "./common/debug-logger";
 import { killProcessTree } from "./common/process-tree";
@@ -171,14 +173,6 @@ function accumulateUsagePerModel(
   return usagePerModel;
 }
 
-function getTotalTokens(usage: ModelUsage | null | undefined): number {
-  if (!isUsageRecord(usage)) {
-    return 0;
-  }
-  const totalTokens = usage.total_tokens;
-  return typeof totalTokens === "number" ? totalTokens : 0;
-}
-
 export type SessionStatus =
   | "failed"
   | "pending"
@@ -339,6 +333,7 @@ export class SessionManager {
   private readonly mcpManager = new McpManager();
   private mcpToolDefinitions: ToolDefinition[] = [];
   private readonly messageConverter: OpenAIMessageConverter;
+  private static systemPromptCache = new Map<string, string>();
 
   constructor(options: SessionManagerOptions) {
     this.projectRoot = options.projectRoot;
@@ -414,6 +409,18 @@ export class SessionManager {
     return tokens;
   }
 
+  private estimateContextTokens(messages: SessionMessage[]): number {
+    let total = 0;
+    for (const msg of messages) {
+      if (msg.compacted) continue;
+      total += msg.content?.length ?? 0;
+      if (msg.messageParams) {
+        total += JSON.stringify(msg.messageParams).length;
+      }
+    }
+    return Math.ceil(total / 4);
+  }
+
   private formatEstimatedTokens(tokens: number): string {
     if (tokens <= 0) {
       return "0";
@@ -1072,7 +1079,12 @@ ${agentInstructions}
     }
 
     const promptToolOptions = this.getPromptToolOptions();
-    const systemPrompt = getSystemPrompt(this.projectRoot, promptToolOptions);
+    const cacheKey = `${promptToolOptions.model}`;
+    let systemPrompt = SessionManager.systemPromptCache.get(cacheKey);
+    if (!systemPrompt) {
+      systemPrompt = getSystemPrompt(this.projectRoot, promptToolOptions);
+      SessionManager.systemPromptCache.set(cacheKey, systemPrompt);
+    }
     const systemMessage = this.buildSystemMessage(sessionId, systemPrompt);
     this.appendSessionMessage(sessionId, systemMessage);
 
@@ -1220,6 +1232,8 @@ ${agentInstructions}
     const startedAt = Date.now();
     const { client, model, baseURL, temperature, thinkingEnabled, reasoningEffort, debugLogEnabled, notify, env } =
       this.createOpenAIClient();
+    const effortManager = new RuntimeReasoningEffortManager();
+    let currentReasoningEffort: ReasoningEffort = reasoningEffort ?? "high";
     const now = new Date().toISOString();
     rebuildSessionStateFromHistory(sessionId, this.listSessionMessages(sessionId));
 
@@ -1265,6 +1279,7 @@ ${agentInstructions}
     try {
       const maxIterations = 80000; // about 1K RMB cost
       let toolCalls: unknown[] | null = null;
+      const cachedTools = getTools(this.getPromptToolOptions(), this.mcpToolDefinitions);
 
       for (let iteration = 0; iteration < maxIterations; iteration++) {
         if (this.isInterrupted(sessionId)) {
@@ -1318,14 +1333,14 @@ ${agentInstructions}
           thinkingEnabled,
           model
         );
-        const thinkingOptions = buildThinkingRequestOptions(thinkingEnabled, baseURL, reasoningEffort);
+        const thinkingOptions = buildThinkingRequestOptions(thinkingEnabled, baseURL, currentReasoningEffort);
         const response = await this.createChatCompletionStream(
           client,
           {
             model,
             ...(temperature !== undefined ? { temperature } : {}),
             messages,
-            tools: getTools(this.getPromptToolOptions(), this.mcpToolDefinitions),
+            tools: cachedTools,
             ...thinkingOptions,
           },
           { signal: sessionController.signal },
@@ -1383,7 +1398,7 @@ ${agentInstructions}
               toolCalls,
               usage: accumulateUsage(entry.usage, responseUsage),
               usagePerModel: accumulateUsagePerModel(entry.usagePerModel, model, responseUsage),
-              activeTokens: getTotalTokens(responseUsage),
+              activeTokens: this.estimateContextTokens(this.listSessionMessages(sessionId)),
               status: "ask_permission",
               failReason: null,
               askPermissions: permissionPlan.askPermissions,
@@ -1395,6 +1410,17 @@ ${agentInstructions}
             messagePermissions: permissionPlan?.permissions,
           });
           waitingForUser = toolAppendResult.waitingForUser;
+
+          if (toolCalls && toolCalls.length > 0 && toolAppendResult.executions.length > 0) {
+            const turnInput = {
+              toolCalls: toolCalls as ToolCall[],
+              toolExecutions: toolAppendResult.executions.map((e) => e.result),
+            };
+            const nextEffort = effortManager.evaluate(turnInput);
+            if (nextEffort !== null && nextEffort !== currentReasoningEffort) {
+              currentReasoningEffort = nextEffort;
+            }
+          }
         }
 
         if (this.isInterrupted(sessionId)) {
@@ -1409,7 +1435,7 @@ ${agentInstructions}
           toolCalls,
           usage: accumulateUsage(entry.usage, responseUsage),
           usagePerModel: accumulateUsagePerModel(entry.usagePerModel, model, responseUsage),
-          activeTokens: getTotalTokens(responseUsage),
+          activeTokens: this.estimateContextTokens(this.listSessionMessages(sessionId)),
           status: refusal ? "failed" : waitingForUser ? "waiting_for_user" : toolCalls ? "processing" : "completed",
           failReason: refusal ? refusal : entry.failReason,
           askPermissions: undefined,
@@ -1417,14 +1443,17 @@ ${agentInstructions}
         }));
 
         if (refusal) {
+          effortManager.reset();
           return;
         }
 
         if (waitingForUser) {
+          effortManager.reset();
           return;
         }
 
         if (!toolCalls) {
+          effortManager.reset();
           return;
         }
       }
@@ -1514,7 +1543,16 @@ ${agentInstructions}
     this.throwIfAborted(signal);
     const rawLlmResponse = response.choices?.[0]?.message?.content;
     const llmResponse = typeof rawLlmResponse === "string" ? rawLlmResponse : "";
-    const compactedSummary = llmResponse.replace(/<analysis>[\s\S]*?<\/analysis>/gi, "").trim();
+    let compactedSummary: string;
+    try {
+      const parsed = JSON.parse(llmResponse);
+      compactedSummary =
+        typeof parsed.summary === "string" && parsed.summary.trim()
+          ? `Summary: ${parsed.summary.trim()}\nKey files: ${Array.isArray(parsed.keyFiles) ? parsed.keyFiles.join(", ") : "none"}\nPending: ${Array.isArray(parsed.pendingActions) ? parsed.pendingActions.join("; ") : "none"}`
+          : llmResponse.trim();
+    } catch {
+      compactedSummary = llmResponse.trim();
+    }
 
     const now = new Date().toISOString();
     const responseUsage = response.usage ?? null;
@@ -1522,7 +1560,7 @@ ${agentInstructions}
       ...entry,
       usage: accumulateUsage(entry.usage, responseUsage),
       usagePerModel: accumulateUsagePerModel(entry.usagePerModel, model, responseUsage),
-      activeTokens: getTotalTokens(responseUsage),
+      activeTokens: this.estimateContextTokens(sessionMessages),
       updateTime: now,
     }));
 
@@ -2233,7 +2271,7 @@ ${agentInstructions}
       permissionOverrides?: UserToolPermission[];
       messagePermissions?: MessageToolPermission[];
     } = {}
-  ): Promise<{ waitingForUser: boolean }> {
+  ): Promise<{ waitingForUser: boolean; executions: ToolCallExecution[] }> {
     const hooks: ToolExecutionHooks = {
       onProcessStart: (pid, command) => this.addSessionProcess(sessionId, pid, command),
       onProcessExit: (pid) => this.removeSessionProcess(sessionId, pid),
@@ -2261,7 +2299,7 @@ ${agentInstructions}
       toolExecutions.push(...executions);
     }
     if (this.isInterrupted(sessionId)) {
-      return { waitingForUser: false };
+      return { waitingForUser: false, executions: toolExecutions };
     }
     let waitingForUser = false;
     const followUpMessages: SessionMessage[] = [];
@@ -2287,7 +2325,7 @@ ${agentInstructions}
     for (const followUpMessage of followUpMessages) {
       this.appendSessionMessage(sessionId, followUpMessage);
     }
-    return { waitingForUser };
+    return { waitingForUser, executions: toolExecutions };
   }
 
   private cloneUserPromptForMeta(prompt: UserPromptContent): UserPromptContent {
diff --git a/src/tests/reasoning-effort-manager.test.ts b/src/tests/reasoning-effort-manager.test.ts
new file mode 100644
index 00000000..ac7cb37f
--- /dev/null
+++ b/src/tests/reasoning-effort-manager.test.ts
@@ -0,0 +1,155 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { RuntimeReasoningEffortManager, type TurnInput } from "../common/reasoning-effort-manager";
+
+function mkExec(ok: boolean, name = "bash"): TurnInput["toolExecutions"][number] {
+  return { ok, name, output: ok ? "success" : undefined, error: ok ? undefined : "fail" };
+}
+
+function mkCall(name: string, args: string): TurnInput["toolCalls"][number] {
+  return { id: "call-1", type: "function", function: { name, arguments: args } };
+}
+
+describe("RuntimeReasoningEffortManager", () => {
+  test("starts at high", () => {
+    const m = new RuntimeReasoningEffortManager();
+    assert.equal(m.getCurrentEffort(), "high");
+  });
+
+  test("escalates on 2 consecutive failures", () => {
+    const m = new RuntimeReasoningEffortManager();
+    assert.equal(
+      m.evaluate({
+        toolCalls: [mkCall("bash", '{"cmd":"x"}')],
+        toolExecutions: [mkExec(false)],
+      }),
+      null
+    );
+    assert.equal(m.getCurrentEffort(), "high");
+    assert.equal(
+      m.evaluate({
+        toolCalls: [mkCall("bash", '{"cmd":"y"}')],
+        toolExecutions: [mkExec(false)],
+      }),
+      "max"
+    );
+    assert.equal(m.getCurrentEffort(), "max");
+  });
+
+  test("resets failure counter on success", () => {
+    const m = new RuntimeReasoningEffortManager();
+    m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(false)] });
+    m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(true)] });
+    m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(false)] });
+    assert.equal(m.getCurrentEffort(), "high");
+  });
+
+  test("escalates on 3 identical tool calls", () => {
+    const m = new RuntimeReasoningEffortManager();
+    const call = mkCall("read", '{"file_path":"/x"}');
+    assert.equal(m.evaluate({ toolCalls: [call], toolExecutions: [mkExec(true)] }), null);
+    assert.equal(m.evaluate({ toolCalls: [call], toolExecutions: [mkExec(true)] }), null);
+    assert.equal(m.evaluate({ toolCalls: [call], toolExecutions: [mkExec(true)] }), "max");
+  });
+
+  test("downgrades after 5 clean turns (default threshold)", () => {
+    const m = new RuntimeReasoningEffortManager();
+    // Escalate first
+    m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(false)] });
+    m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(false)] });
+    assert.equal(m.getCurrentEffort(), "max");
+
+    // Cooldown: first 3 turns at "max" cannot downgrade
+    for (let i = 0; i < 3; i++) {
+      const call = mkCall("bash", `{"cmd":"cooldown${i}"}`);
+      assert.equal(m.evaluate({ toolCalls: [call], toolExecutions: [mkExec(true)] }), null);
+    }
+    assert.equal(m.getCurrentEffort(), "max");
+
+    // Now 5 clean turns with different fingerprints
+    for (let i = 0; i < 5; i++) {
+      const call = mkCall("bash", `{"cmd":"unique${i}"}`);
+      m.evaluate({ toolCalls: [call], toolExecutions: [mkExec(true)] });
+    }
+    assert.equal(m.getCurrentEffort(), "high");
+  });
+
+  test("fingerprint is independent of argument whitespace", () => {
+    const fp1 = RuntimeReasoningEffortManager.computeFingerprint([
+      { id: "a", type: "function", function: { name: "bash", arguments: '{"cmd":  "x"}' } },
+    ]);
+    const fp2 = RuntimeReasoningEffortManager.computeFingerprint([
+      { id: "b", type: "function", function: { name: "bash", arguments: '{"cmd":"x"}' } },
+    ]);
+    assert.equal(fp1, fp2);
+  });
+
+  test("reset clears all state", () => {
+    const m = new RuntimeReasoningEffortManager();
+    m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(false)] });
+    m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(false)] });
+    assert.equal(m.getCurrentEffort(), "max");
+    m.reset();
+    assert.equal(m.getCurrentEffort(), "high");
+    assert.equal(m.getState().consecutiveFailures, 0);
+    assert.equal(m.getState().cleanTurnStreak, 0);
+  });
+
+  test("cooldown prevents immediate re-escalation after downgrade", () => {
+    const m = new RuntimeReasoningEffortManager();
+    // Escalate
+    m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(false)] });
+    m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(false)] });
+    assert.equal(m.getCurrentEffort(), "max");
+    // Downgrade via 8 clean turns (3 cooldown + 5 threshold)
+    for (let i = 0; i < 3; i++) {
+      m.evaluate({ toolCalls: [mkCall("bash", `{"c":"a${i}"}`)], toolExecutions: [mkExec(true)] });
+    }
+    for (let i = 0; i < 5; i++) {
+      m.evaluate({ toolCalls: [mkCall("bash", `{"c":"b${i}"}`)], toolExecutions: [mkExec(true)] });
+    }
+    assert.equal(m.getCurrentEffort(), "high");
+    // Immediate failure should NOT re-escalate (cooldown active)
+    assert.equal(m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(false)] }), null);
+    assert.equal(m.getCurrentEffort(), "high");
+  });
+
+  test("anti-flapping doubles downgrade threshold on repeated cycles", () => {
+    const m = new RuntimeReasoningEffortManager();
+    // First cycle: escalate
+    m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(false)] });
+    m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [mkExec(false)] });
+    assert.equal(m.getCurrentEffort(), "max");
+    // Downgrade: 3 cooldown + 5 clean
+    for (let i = 0; i < 3; i++) {
+      m.evaluate({ toolCalls: [mkCall("b", `${i}`)], toolExecutions: [mkExec(true)] });
+    }
+    for (let i = 0; i < 5; i++) {
+      m.evaluate({ toolCalls: [mkCall("b", `d${i}`)], toolExecutions: [mkExec(true)] });
+    }
+    assert.equal(m.getCurrentEffort(), "high");
+    // Second cycle: escalate again (cooldown absorbs first 2 failures)
+    for (let i = 0; i < 2; i++) {
+      m.evaluate({ toolCalls: [mkCall("b", `e${i}`)], toolExecutions: [mkExec(false)] });
+    }
+    m.evaluate({ toolCalls: [mkCall("b", "e2")], toolExecutions: [mkExec(false)] });
+    m.evaluate({ toolCalls: [mkCall("b", "e3")], toolExecutions: [mkExec(false)] });
+    assert.equal(m.getCurrentEffort(), "max");
+    // Downgrade: 3 cooldown + now 10 clean turns needed (threshold doubled)
+    for (let i = 0; i < 3; i++) {
+      m.evaluate({ toolCalls: [mkCall("b", `f${i}`)], toolExecutions: [mkExec(true)] });
+    }
+    for (let i = 0; i < 9; i++) {
+      m.evaluate({ toolCalls: [mkCall("b", `g${i}`)], toolExecutions: [mkExec(true)] });
+    }
+    assert.equal(m.getCurrentEffort(), "max"); // still max, threshold not met
+    m.evaluate({ toolCalls: [mkCall("b", "g9")], toolExecutions: [mkExec(true)] }); // 10th clean
+    assert.equal(m.getCurrentEffort(), "high"); // now downgraded
+  });
+
+  test("no escalation on first turn (empty executions)", () => {
+    const m = new RuntimeReasoningEffortManager();
+    assert.equal(m.evaluate({ toolCalls: [mkCall("bash", "{}")], toolExecutions: [] }), null);
+    assert.equal(m.getCurrentEffort(), "high");
+  });
+});