diff --git a/src/utils/swarm/backends/WindowsTerminalBackend.ts b/src/utils/swarm/backends/WindowsTerminalBackend.ts index b0e45e3123..2b9671b65c 100644 --- a/src/utils/swarm/backends/WindowsTerminalBackend.ts +++ b/src/utils/swarm/backends/WindowsTerminalBackend.ts @@ -1,5 +1,5 @@ import { randomUUID } from 'crypto' -import { readFile } from 'fs/promises' +import { readFile, unlink } from 'fs/promises' import { join } from 'path' import { tmpdir } from 'os' import type { AgentColorName } from '@claude-code-best/builtin-tools/tools/AgentTool/agentColorManager.js' @@ -13,10 +13,15 @@ import type { CreatePaneResult, PaneBackend, PaneId } from './types.js' type CommandResult = { stdout: string; stderr: string; code: number } type CommandRunner = (command: string, args: string[]) => Promise +type PaneStatus = 'registered' | 'spawning' | 'ready' | 'killing' | 'dead' + type WindowsTerminalPane = { title: string mode: 'pane' | 'window' pidFile: string + status: PaneStatus + pid?: number + spawnPromise?: Promise } function quotePowerShellString(value: string): string { @@ -39,8 +44,42 @@ function wrapPowerShellCommand(command: string, pidFile: string): string { ].join('; ') } -function makePidFile(paneId: string): string { - return join(tmpdir(), `${paneId.replace(/[^a-zA-Z0-9_-]/g, '-')}.pid`) +const WT_PANE_TIMEOUT_DEFAULT_MS = 8000 +const WT_PANE_POLL_INTERVAL_MS = 200 + +function getWtPaneTimeoutMs(): number { + const raw = process.env.CLAUDE_WT_PANE_TIMEOUT_MS + if (!raw) return WT_PANE_TIMEOUT_DEFAULT_MS + const parsed = Number.parseInt(raw, 10) + return Number.isFinite(parsed) && parsed > 0 + ? parsed + : WT_PANE_TIMEOUT_DEFAULT_MS +} + +async function waitForPidFile( + pidFile: string, + timeoutMs: number, +): Promise { + const deadline = Date.now() + timeoutMs + let lastErr: unknown + while (Date.now() < deadline) { + try { + const content = (await readFile(pidFile, 'utf-8')).trim() + if (!/^\d+$/.test(content)) { + lastErr = new Error( + `pidFile content not a valid pid: ${JSON.stringify(content)}`, + ) + } else { + const pid = Number.parseInt(content, 10) + if (Number.isFinite(pid) && pid > 0) return pid + lastErr = new Error(`pidFile content parsed to invalid pid: ${pid}`) + } + } catch (err) { + lastErr = err + } + await new Promise(r => setTimeout(r, WT_PANE_POLL_INTERVAL_MS)) + } + throw lastErr ?? new Error('pidFile never appeared') } /** @@ -58,10 +97,40 @@ export class WindowsTerminalBackend implements PaneBackend { private panes = new Map() + private readonly runCommand: CommandRunner + private readonly getPlatformValue: () => Platform + private readonly pidFileDir: string + constructor( - private readonly runCommand: CommandRunner = execFileNoThrow, - private readonly getPlatformValue: () => Platform = getPlatform, - ) {} + runCommandOrOptions?: + | CommandRunner + | { + runCommand?: CommandRunner + getPlatform?: () => Platform + pidFileDir?: string + }, + getPlatformValue?: () => Platform, + ) { + if ( + typeof runCommandOrOptions === 'function' || + runCommandOrOptions === undefined + ) { + this.runCommand = runCommandOrOptions ?? execFileNoThrow + this.getPlatformValue = getPlatformValue ?? getPlatform + this.pidFileDir = tmpdir() + } else { + this.runCommand = runCommandOrOptions.runCommand ?? execFileNoThrow + this.getPlatformValue = runCommandOrOptions.getPlatform ?? getPlatform + this.pidFileDir = runCommandOrOptions.pidFileDir ?? tmpdir() + } + } + + private makePidFile(paneId: string): string { + return join( + this.pidFileDir, + `${paneId.replace(/[^a-zA-Z0-9_-]/g, '-')}.pid`, + ) + } async isAvailable(): Promise { if (this.getPlatformValue() !== 'windows') { @@ -92,7 +161,8 @@ export class WindowsTerminalBackend implements PaneBackend { this.panes.set(paneId, { title: name, mode: 'pane', - pidFile: makePidFile(paneId), + pidFile: this.makePidFile(paneId), + status: 'registered', }) return { paneId, isFirstTeammate } } @@ -106,7 +176,8 @@ export class WindowsTerminalBackend implements PaneBackend { this.panes.set(paneId, { title: name, mode: 'window', - pidFile: makePidFile(paneId), + pidFile: this.makePidFile(paneId), + status: 'registered', }) return { paneId, isFirstTeammate: false, windowName } } @@ -121,32 +192,95 @@ export class WindowsTerminalBackend implements PaneBackend { throw new Error(`Unknown Windows Terminal pane id: ${paneId}`) } - const launcher = wrapPowerShellCommand(command, pane.pidFile) - // wt.exe treats ';' as its own command separator, which breaks - // multi-statement PowerShell commands passed via -Command. Encode the - // entire script as Base64 UTF-16LE and use -EncodedCommand instead. - const encoded = Buffer.from(launcher, 'utf16le').toString('base64') - const args = - pane.mode === 'window' - ? ['-w', '-1', 'new-tab', '--title', pane.title] - : ['-w', '0', 'split-pane', '--vertical', '--title', pane.title] - - const result = await this.runCommand('wt.exe', [ - ...args, - 'powershell.exe', - '-NoLogo', - '-NoProfile', - '-ExecutionPolicy', - 'Bypass', - '-EncodedCommand', - encoded, - ]) - - if (result.code !== 0) { + // 拒绝 ready 态重 spawn(避免同 pidFile 双进程竞争) + if (pane.status === 'ready' || pane.status === 'killing') { throw new Error( - `Failed to launch Windows Terminal teammate ${paneId}: ${result.stderr}`, + `Pane ${paneId} already spawned (status=${pane.status}); create a new pane to re-launch`, ) } + if (pane.status === 'spawning') { + throw new Error( + `Pane ${paneId} is currently spawning; wait for the in-flight launch to complete`, + ) + } + if (pane.status === 'dead') { + throw new Error(`Pane ${paneId} is dead; create a new pane`) + } + // pane.status === 'registered' → 继续 + + // 提前赋值 spawnPromise 在任何 await 前(inner Promise 包装) + // Attach a no-op .catch() immediately to prevent unhandled rejection warnings + // in case killPane never awaits spawnPromise (e.g. sendCommandToPane fails + // before killPane is called). + let resolveSpawn!: () => void + let rejectSpawn!: (err: unknown) => void + const spawnPromise = new Promise((res, rej) => { + resolveSpawn = res + rejectSpawn = rej + }) + // Silence unhandled-rejection: killPane may .catch() this later, but if + // the pane dies before any kill is attempted, the rejection must not leak. + spawnPromise.catch(() => {}) + pane.status = 'spawning' + pane.spawnPromise = spawnPromise + + try { + const launcher = wrapPowerShellCommand(command, pane.pidFile) + // wt.exe treats ';' as its own command separator, which breaks + // multi-statement PowerShell commands passed via -Command. Encode the + // entire script as Base64 UTF-16LE and use -EncodedCommand instead. + const encoded = Buffer.from(launcher, 'utf16le').toString('base64') + const args = + pane.mode === 'window' + ? ['-w', '-1', 'new-tab', '--title', pane.title] + : ['-w', '0', 'split-pane', '--vertical', '--title', pane.title] + + await unlink(pane.pidFile).catch(() => {}) + + const result = await this.runCommand('wt.exe', [ + ...args, + 'powershell.exe', + '-NoLogo', + '-NoProfile', + '-ExecutionPolicy', + 'Bypass', + '-EncodedCommand', + encoded, + ]) + + if (result.code !== 0) { + throw new Error( + `Failed to launch Windows Terminal teammate ${paneId}: ${result.stderr}`, + ) + } + + const timeoutMs = getWtPaneTimeoutMs() + let pid: number + try { + pid = await waitForPidFile(pane.pidFile, timeoutMs) + } catch (err) { + throw new Error( + `Windows Terminal pane failed to launch within ${timeoutMs}ms\n` + + ` paneId: ${paneId}\n` + + ` pidFile: ${pane.pidFile}\n` + + ` wt.exe stdout: ${result.stdout || '(empty)'}\n` + + ` wt.exe stderr: ${result.stderr || '(empty)'}\n` + + ` underlying: ${err instanceof Error ? err.message : String(err)}\n` + + ` override timeout via env CLAUDE_WT_PANE_TIMEOUT_MS`, + ) + } + + pane.pid = pid + pane.status = 'ready' + resolveSpawn() + } catch (err) { + pane.status = 'dead' + pane.pid = undefined + rejectSpawn(err) + throw err + } finally { + pane.spawnPromise = undefined + } } async setPaneBorderColor( @@ -189,26 +323,69 @@ export class WindowsTerminalBackend implements PaneBackend { return false } - let pid: number - try { - pid = Number.parseInt((await readFile(pane.pidFile, 'utf-8')).trim(), 10) - } catch { - this.panes.delete(paneId) - return false + // 1. 解 kill-while-spawn race:await spawn 完成(不论成功失败) + if (pane.status === 'spawning' && pane.spawnPromise) { + await pane.spawnPromise.catch(() => {}) } - if (!Number.isFinite(pid)) { + // 2. TOCTOU 修正:重读 status/pid + if (pane.status === 'dead') { this.panes.delete(paneId) return false } + if (pane.status !== 'ready') { + // 还在其它非终态(理论不可达,保险) + return false + } + + pane.status = 'killing' + + // 3. 优先用缓存 pid + let pid: number | undefined = pane.pid + + // 4. fallback:缓存没有则读盘(保留 retry 3×500ms) + if (pid === undefined) { + let pidContent: string | null = null + for (let attempt = 0; attempt < 3; attempt++) { + try { + pidContent = (await readFile(pane.pidFile, 'utf-8')).trim() + break + } catch { + if (attempt === 2) { + pane.status = 'dead' + this.panes.delete(paneId) + return false + } + await new Promise(r => setTimeout(r, 500)) + } + } + if (!pidContent || !/^\d+$/.test(pidContent)) { + pane.status = 'dead' + this.panes.delete(paneId) + return false + } + const parsed = Number.parseInt(pidContent, 10) + if (!Number.isFinite(parsed) || parsed <= 0) { + pane.status = 'dead' + this.panes.delete(paneId) + return false + } + pid = parsed + } + // 5. 执行 Stop-Process const result = await this.runCommand('powershell.exe', [ '-NoLogo', '-NoProfile', '-Command', `Stop-Process -Id ${pid} -Force -ErrorAction Stop`, ]) + + // 6. 不管成功失败都清缓存 + 标 dead + 从 map 删(防 PID 复用误杀) + pane.pid = undefined + pane.status = 'dead' this.panes.delete(paneId) + logForDebugging( `[WindowsTerminalBackend] killPane ${paneId} pid=${pid} code=${result.code}`, ) diff --git a/src/utils/swarm/backends/__tests__/WindowsTerminalBackend.test.ts b/src/utils/swarm/backends/__tests__/WindowsTerminalBackend.test.ts index bd06effd4c..1156be50e3 100644 --- a/src/utils/swarm/backends/__tests__/WindowsTerminalBackend.test.ts +++ b/src/utils/swarm/backends/__tests__/WindowsTerminalBackend.test.ts @@ -14,20 +14,43 @@ beforeEach(async () => { `windows-terminal-backend-${Date.now()}-${Math.random().toString(16).slice(2)}`, ) await mkdir(tempDir, { recursive: true }) + process.env.CLAUDE_WT_PANE_TIMEOUT_MS = '2000' }) afterEach(async () => { await rm(tempDir, { recursive: true, force: true }) + delete process.env.CLAUDE_WT_PANE_TIMEOUT_MS }) -function createBackend(calls: Call[]): WindowsTerminalBackend { - return new WindowsTerminalBackend( - async (command, args) => { +function createBackend( + calls: Call[], + opts: { simulatePidWrite?: boolean | number } = {}, +): WindowsTerminalBackend { + const simulate = opts.simulatePidWrite !== false + const delayMs = + typeof opts.simulatePidWrite === 'number' ? opts.simulatePidWrite : 30 + return new WindowsTerminalBackend({ + runCommand: async (command, args) => { calls.push({ command, args }) + if (simulate && command === 'wt.exe') { + const encIdx = args.indexOf('-EncodedCommand') + if (encIdx >= 0) { + const decoded = Buffer.from(args[encIdx + 1]!, 'base64').toString( + 'utf16le', + ) + const match = decoded.match(/Set-Content -LiteralPath '([^']+)'/) + if (match) { + setTimeout(() => { + writeFile(match[1]!, '54321', 'utf-8').catch(() => {}) + }, delayMs) + } + } + } return { stdout: 'ok', stderr: '', code: 0 } }, - () => 'windows', - ) + getPlatform: () => 'windows', + pidFileDir: tempDir, + }) } function decodeEncodedCommand(call: Call): { @@ -78,25 +101,236 @@ describe('WindowsTerminalBackend', () => { expect(args.join(' ')).toContain('-w -1 new-tab --title') }) - test('force kills the recorded teammate shell pid when available', async () => { + test('force kills the cached pid from sendCommandToPane without reading pidFile', async () => { const calls: Call[] = [] const backend = createBackend(calls) const pane = await backend.createTeammatePaneInSwarmView('killer', 'red') + // sendCommandToPane resolves — simulate writes '54321' to pidFile, which + // becomes pane.pid. killPane should use the cached pid, not re-read the file. await backend.sendCommandToPane(pane.paneId, "Write-Output 'running'") - const { decodedLauncher } = decodeEncodedCommand(calls[0]!) - const pidFile = decodedLauncher.match( - /Set-Content -LiteralPath '([^']+)'/, - )?.[1] - expect(pidFile).toBeString() - await writeFile(pidFile!, '12345', 'utf-8') const killed = await backend.killPane(pane.paneId) expect(killed).toBe(true) expect(calls[calls.length - 1]!.command).toBe('powershell.exe') expect(calls[calls.length - 1]!.args.join(' ')).toContain( - 'Stop-Process -Id 12345', + 'Stop-Process -Id 54321', + ) + }) + + test('throws a diagnostic error when pidFile never appears within timeout', async () => { + process.env.CLAUDE_WT_PANE_TIMEOUT_MS = '300' + const calls: Call[] = [] + const backend = createBackend(calls, { simulatePidWrite: false }) + const pane = await backend.createTeammatePaneInSwarmView('slowpane', 'blue') + let caught: unknown + try { + await backend.sendCommandToPane(pane.paneId, "Write-Output 'x'") + } catch (err) { + caught = err + } + expect(caught).toBeInstanceOf(Error) + expect((caught as Error).message).toMatch( + /Windows Terminal pane failed to launch within 300ms/, + ) + }) + + test('error message includes paneId pidFile and override hint', async () => { + process.env.CLAUDE_WT_PANE_TIMEOUT_MS = '250' + const calls: Call[] = [] + const backend = createBackend(calls, { simulatePidWrite: false }) + const pane = await backend.createTeammatePaneInSwarmView( + 'diagpane', + 'green', + ) + let caught: unknown + try { + await backend.sendCommandToPane(pane.paneId, "Write-Output 'x'") + } catch (err) { + caught = err + } + expect(caught).toBeInstanceOf(Error) + const msg = (caught as Error).message + expect(msg).toContain(pane.paneId) + expect(msg).toContain('CLAUDE_WT_PANE_TIMEOUT_MS') + }) + + test('unlinks stale pidFile so a stale pid is not adopted', async () => { + const calls: Call[] = [] + const backend = createBackend(calls, { simulatePidWrite: 30 }) + const pane = await backend.createTeammatePaneInSwarmView('stale', 'pink') + // pidFile path is deterministic: /.pid + const stalePidFile = join( + tempDir, + `${pane.paneId.replace(/[^a-zA-Z0-9_-]/g, '-')}.pid`, + ) + // Pre-seed stale content. If sendCommandToPane did NOT unlink, waitForPidFile + // would immediately accept '99999' and cache it as pane.pid. With unlink, + // simulate's '54321' is the value killPane sees. + await writeFile(stalePidFile, '99999', 'utf-8') + + await backend.sendCommandToPane(pane.paneId, "Write-Output 'x'") + const killed = await backend.killPane(pane.paneId) + expect(killed).toBe(true) + expect(calls[calls.length - 1]!.args.join(' ')).toContain( + 'Stop-Process -Id 54321', ) }) + + test('rejects re-spawn on a ready pane', async () => { + const calls: Call[] = [] + const backend = createBackend(calls) + const pane = await backend.createTeammatePaneInSwarmView('reentry', 'cyan') + await backend.sendCommandToPane(pane.paneId, "Write-Output 'first'") + // pane.status === 'ready' now. Second sendCommandToPane must throw. + let caught: unknown + try { + await backend.sendCommandToPane(pane.paneId, "Write-Output 'second'") + } catch (err) { + caught = err + } + expect(caught).toBeInstanceOf(Error) + expect((caught as Error).message).toMatch(/already spawned/) + }) + + test('throws on unknown paneId in sendCommandToPane', async () => { + const calls: Call[] = [] + const backend = createBackend(calls) + let caught: unknown + try { + await backend.sendCommandToPane('wt-nonexistent', "Write-Output 'x'") + } catch (err) { + caught = err + } + expect(caught).toBeInstanceOf(Error) + expect((caught as Error).message).toContain('Unknown Windows Terminal pane') + }) + + test('rejects corrupted pidFile content ("123abc") and times out', async () => { + process.env.CLAUDE_WT_PANE_TIMEOUT_MS = '400' + const calls: Call[] = [] + // Custom runner writes invalid pid content (not all digits). + const backend = new WindowsTerminalBackend({ + runCommand: async (command, args) => { + calls.push({ command, args }) + if (command === 'wt.exe') { + const encIdx = args.indexOf('-EncodedCommand') + if (encIdx >= 0) { + const decoded = Buffer.from(args[encIdx + 1]!, 'base64').toString( + 'utf16le', + ) + const match = decoded.match(/Set-Content -LiteralPath '([^']+)'/) + if (match) { + setTimeout(() => { + writeFile(match[1]!, '123abc', 'utf-8').catch(() => {}) + }, 30) + } + } + } + return { stdout: 'ok', stderr: '', code: 0 } + }, + getPlatform: () => 'windows', + pidFileDir: tempDir, + }) + const pane = await backend.createTeammatePaneInSwarmView('corrupt', 'red') + let caught: unknown + try { + await backend.sendCommandToPane(pane.paneId, "Write-Output 'x'") + } catch (err) { + caught = err + } + expect(caught).toBeInstanceOf(Error) + // Inner error from waitForPidFile must reach the wrapped diagnostic message. + const msg = (caught as Error).message + expect(msg).toMatch(/failed to launch within 400ms/) + expect(msg).toMatch(/not a valid pid|invalid pid|123abc/) + }) + + test('killPane awaits in-flight spawn before killing (kill-while-spawn race)', async () => { + // simulatePidWrite: 800ms — sendCommandToPane stays in waitForPidFile for ~800ms. + process.env.CLAUDE_WT_PANE_TIMEOUT_MS = '3000' + const calls: Call[] = [] + const backend = createBackend(calls, { simulatePidWrite: 800 }) + const pane = await backend.createTeammatePaneInSwarmView('racy', 'blue') + + // Start spawn but don't await it yet. + const spawnP = backend.sendCommandToPane(pane.paneId, "Write-Output 'x'") + // 50ms later, call killPane — pane is still 'spawning', killPane must + // await spawnPromise (which resolves at ~800ms when simulate writes pid 54321), + // then kill using the cached pid. + await new Promise(r => setTimeout(r, 50)) + const killP = backend.killPane(pane.paneId) + + // Both must resolve cleanly. + await spawnP + const killed = await killP + expect(killed).toBe(true) + // The kill must target the freshly-spawned pid (54321), not have used a + // stale-or-missing fallback path. + const killCall = calls[calls.length - 1]! + expect(killCall.command).toBe('powershell.exe') + expect(killCall.args.join(' ')).toContain('Stop-Process -Id 54321') + }) + + test('Stop-Process failure clears cached pid and marks pane dead', async () => { + const calls: Call[] = [] + // Runner returns code 1 only for powershell.exe (kill); wt.exe succeeds. + const backend = new WindowsTerminalBackend({ + runCommand: async (command, args) => { + calls.push({ command, args }) + if (command === 'wt.exe') { + const encIdx = args.indexOf('-EncodedCommand') + if (encIdx >= 0) { + const decoded = Buffer.from(args[encIdx + 1]!, 'base64').toString( + 'utf16le', + ) + const match = decoded.match(/Set-Content -LiteralPath '([^']+)'/) + if (match) { + setTimeout(() => { + writeFile(match[1]!, '54321', 'utf-8').catch(() => {}) + }, 30) + } + } + return { stdout: 'ok', stderr: '', code: 0 } + } + // powershell Stop-Process fails + return { stdout: '', stderr: 'access denied', code: 1 } + }, + getPlatform: () => 'windows', + pidFileDir: tempDir, + }) + const pane = await backend.createTeammatePaneInSwarmView('dier', 'orange') + await backend.sendCommandToPane(pane.paneId, "Write-Output 'x'") + + const killed = await backend.killPane(pane.paneId) + expect(killed).toBe(false) // Stop-Process exit 1 → false + + // After kill failure, pane is removed from map: second killPane → false (not retry). + const killedAgain = await backend.killPane(pane.paneId) + expect(killedAgain).toBe(false) + // Critically: only ONE powershell call happened — the second killPane returned + // false from "pane not in map", not from another Stop-Process attempt. + const psCalls = calls.filter(c => c.command === 'powershell.exe') + expect(psCalls.length).toBe(1) + }) + + test('killPane uses cached pid and returns false when pane is unknown', async () => { + const calls: Call[] = [] + const backend = createBackend(calls, { simulatePidWrite: 30 }) + const pane = await backend.createTeammatePaneInSwarmView('cached', 'yellow') + await backend.sendCommandToPane(pane.paneId, "Write-Output 'x'") + + // After sendCommandToPane, pane.pid = 54321 (from simulate). killPane must + // use this cached pid without reading the pidFile at all. + const killed = await backend.killPane(pane.paneId) + expect(killed).toBe(true) + expect(calls[calls.length - 1]!.args.join(' ')).toContain( + 'Stop-Process -Id 54321', + ) + + // After kill, pane is removed — a second killPane must return false. + const killedAgain = await backend.killPane(pane.paneId) + expect(killedAgain).toBe(false) + }) })