From 86b1a5fe936c1aa8d18133bd7f8066cc5a36e3e1 Mon Sep 17 00:00:00 2001 From: cloudsigma Date: Mon, 25 May 2026 13:58:04 +0000 Subject: [PATCH] feat: capture TaaS autorouter response headers + expose via gateway RPC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an onResponse callback to the wrapStreamFn options that reads X-TaaS-Autorouter-* and X-TaaS-Thinking-Applied and X-TaaS-Routed-Context-Window response headers from autorouted TaaS calls, stores them in an in-memory per-session map (bounded to 256 entries, oldest-evicted), and exposes them via a new gateway RPC: taas.autorouter.lastRoute. Alien AI Studio (PRD Confluence 1901363271) polls this method after each turn to populate the routed model, algo, source, thinking-applied, and context-window fields in the AgentChatPanel for cloudsigma/auto and other autorouted requests. Smoke test extended to cover both the header capture and the gateway method response shape. Zero OpenClaw core changes — uses existing wrapStreamFn + registerGatewayMethod hooks. Registration is guarded for older API surfaces that lack registerGatewayMethod. --- index.ts | 119 ++++++++++++++++++++++++++++++++++++++++++++++++- test/smoke.mjs | 88 ++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 1 deletion(-) diff --git a/index.ts b/index.ts index b9521f7..794d931 100644 --- a/index.ts +++ b/index.ts @@ -614,6 +614,85 @@ async function patchPayloadMetadata( } } +/** + * Captured autorouter metadata per-session, populated by the onResponse callback + * installed in `buildWrapper`. Exposed via the `taas.autorouter.lastRoute` gateway + * RPC so Alien AI Studio (or any other client) can pull the latest TaaS routing + * decision for a session — including the actual model chosen by the autorouter, + * the algorithm used, the source of that algorithm (org/dept/key/user/system), + * the thinking level applied, and the chosen model's context window. + * + * The map is keyed by the affinity session ID we already derive in + * `resolveSessionId(ctx.workspaceDir)`. Stored values are bounded — see + * `LAST_ROUTE_LIMIT` — to avoid unbounded growth in long-lived gateways. + */ +type AutorouterCapture = { + sessionId: string + capturedAt: number + autorouterModel: string | null + autorouterAlgo: string | null + autorouterAlgoSource: string | null + thinkingApplied: string | null + routedContextWindow: number | null +} + +const LAST_ROUTE_LIMIT = 256 +const lastRouteBySessionId = new Map() + +function pruneLastRouteMap(): void { + if (lastRouteBySessionId.size <= LAST_ROUTE_LIMIT) return + // Drop oldest entries by capturedAt ascending until we're back under the cap. + const entries = [...lastRouteBySessionId.entries()].sort( + (a, b) => a[1].capturedAt - b[1].capturedAt + ) + const toDrop = entries.length - LAST_ROUTE_LIMIT + for (let i = 0; i < toDrop; i++) { + lastRouteBySessionId.delete(entries[i][0]) + } +} + +function captureAutorouterFromHeaders( + sessionId: string, + headers: Record +): void { + // Header names from TaaS proxy are emitted in canonical "X-TaaS-*" form + // but Node/undici lowercases incoming response headers. Read case-insensitively. + const lowered: Record = {} + for (const [k, v] of Object.entries(headers)) { + if (typeof v === "string") lowered[k.toLowerCase()] = v + } + const autorouted = lowered["x-taas-autorouted"] + if (autorouted !== "true") return // ignore non-autorouted responses + const capture: AutorouterCapture = { + sessionId, + capturedAt: Date.now(), + autorouterModel: lowered["x-taas-autorouter-model"] ?? null, + autorouterAlgo: lowered["x-taas-autorouter-mode"] ?? null, + autorouterAlgoSource: lowered["x-taas-autorouter-algorithm-source"] ?? null, + thinkingApplied: lowered["x-taas-thinking-applied"] ?? null, + routedContextWindow: (() => { + const raw = lowered["x-taas-routed-context-window"] + if (!raw) return null + const n = Number(raw) + return Number.isFinite(n) && n > 0 ? n : null + })(), + } + lastRouteBySessionId.set(sessionId, capture) + pruneLastRouteMap() + if (isDev) { + console.debug( + `[taas-affinity] captured autorouter sessionId=${sessionId} ` + + `model=${capture.autorouterModel} algo=${capture.autorouterAlgo} ` + + `source=${capture.autorouterAlgoSource} thinking=${capture.thinkingApplied} ` + + `ctxWindow=${capture.routedContextWindow}` + ) + } +} + +function getLastRouteForSession(sessionId: string): AutorouterCapture | null { + return lastRouteBySessionId.get(sessionId) ?? null +} + function buildWrapper(ctx: ProviderWrapStreamFnContext) { const { streamFn } = ctx if (!streamFn) return undefined @@ -642,7 +721,23 @@ function buildWrapper(ctx: ProviderWrapStreamFnContext) { if (prevOnPayload) return prevOnPayload(patched, payloadModel) return patched } - return inner(model, context, { ...options, onPayload }) + const prevOnResponse = options?.onResponse + const onResponse: NonNullable["onResponse"] = async ( + response, + responseModel + ) => { + try { + captureAutorouterFromHeaders(sessionId, response?.headers ?? {}) + } catch (err) { + if (isDev) { + console.debug( + `[taas-affinity] onResponse capture failed: ${(err as Error)?.message ?? err}` + ) + } + } + if (prevOnResponse) await prevOnResponse(response, responseModel) + } + return inner(model, context, { ...options, onPayload, onResponse }) } as typeof inner } @@ -700,5 +795,27 @@ export default { wrapStreamFn: buildWrapper, resolveTransportTurnState: buildTransportTurnState, }) + + // Expose captured TaaS autorouter metadata to gateway clients (Studio, etc.). + // The Alien AI Studio polls this after each turn to populate the model/algo/ + // thinking/context-window fields in the AgentChatPanel for cloudsigma/auto and + // other autorouted requests. See PRD "Alien AI Studio - Auto-Routing Model UX" + // (Confluence 1901363271). + if (typeof api.registerGatewayMethod === "function") api.registerGatewayMethod( + "taas.autorouter.lastRoute", + async ({ params, respond }) => { + // Accept either { workspaceDir } (preferred — derives sessionId the + // same way the wrapper does) or { sessionId } (direct lookup). + const p = (params ?? {}) as Record + const directSessionId = + typeof p.sessionId === "string" ? p.sessionId : null + const workspaceDir = + typeof p.workspaceDir === "string" ? p.workspaceDir : undefined + const resolvedSessionId = + directSessionId ?? resolveSessionId(workspaceDir).sessionId + const captured = getLastRouteForSession(resolvedSessionId) + respond(true, { sessionId: resolvedSessionId, capture: captured }) + } + ) }, } diff --git a/test/smoke.mjs b/test/smoke.mjs index cdfbc21..e15efc4 100644 --- a/test/smoke.mjs +++ b/test/smoke.mjs @@ -72,3 +72,91 @@ const transportState = provider.resolveTransportTurnState({ assert.match(transportState.headers["X-Session-Id"], /^oc:[a-f0-9]{16}$/) console.log("smoke ok") + +// === autorouter capture (R7.1/R7.2 from Studio PRD) === +// Verify that the wrapper threads an onResponse callback that captures +// X-TaaS-* headers and that taas.autorouter.lastRoute returns them. +let registeredMethod +let registeredHandler +const apiWithGateway = { + registerProvider(candidate) { + // keep previous provider too — second registration + }, + registerGatewayMethod(name, handler) { + registeredMethod = name + registeredHandler = handler + }, +} +plugin.register(apiWithGateway) +assert.equal(registeredMethod, "taas.autorouter.lastRoute", "gateway method registered") +assert.equal(typeof registeredHandler, "function", "handler is a function") + +// Drive the wrapper through onResponse with synthetic autorouter headers. +const captureStreamFn = async (_model, _context, options = {}) => { + // pi-ai protocol: call onPayload first (existing behaviour), then onResponse + // with the simulated HTTP response object, then stream. + if (options.onPayload) await options.onPayload({ messages: [], metadata: {} }, _model) + if (options.onResponse) { + await options.onResponse( + { + status: 200, + headers: { + "x-taas-autorouted": "true", + "x-taas-autorouter-model": "cloudsigma/gpt-5", + "x-taas-autorouter-mode": "best_fit", + "x-taas-autorouter-algorithm-source": "api_key_default", + "x-taas-thinking-applied": "medium", + "x-taas-routed-context-window": "128000", + }, + }, + _model + ) + } +} +const captureWrapped = provider.wrapStreamFn({ + streamFn: captureStreamFn, + workspaceDir: "/tmp/openclaw-token-cache-optimizer-smoke", + provider: "cloudsigma", + modelId: "cloudsigma/auto", + model: { id: "cloudsigma/auto" }, +}) +await captureWrapped("model", { messages: [] }, {}) + +// Now invoke the registered gateway handler and assert it returns the capture. +let respondedOk +let respondedPayload +await registeredHandler({ + req: { id: "test" }, + params: { workspaceDir: "/tmp/openclaw-token-cache-optimizer-smoke" }, + client: null, + isWebchatConnect: () => false, + respond: (ok, payload) => { + respondedOk = ok + respondedPayload = payload + }, + context: {}, +}) +assert.equal(respondedOk, true, "handler responded ok") +assert.ok(respondedPayload, "payload present") +assert.match(respondedPayload.sessionId, /^oc:[a-f0-9]{16}$/, "sessionId looks valid") +assert.ok(respondedPayload.capture, "capture present") +assert.equal(respondedPayload.capture.autorouterModel, "cloudsigma/gpt-5") +assert.equal(respondedPayload.capture.autorouterAlgo, "best_fit") +assert.equal(respondedPayload.capture.autorouterAlgoSource, "api_key_default") +assert.equal(respondedPayload.capture.thinkingApplied, "medium") +assert.equal(respondedPayload.capture.routedContextWindow, 128000) + +// Non-autorouted response should NOT overwrite (we explicitly drop it) +await captureWrapped("model", { messages: [] }, {}) +await registeredHandler({ + req: { id: "t2" }, + params: { workspaceDir: "/tmp/openclaw-token-cache-optimizer-smoke" }, + client: null, + isWebchatConnect: () => false, + respond: (_ok, payload) => { + assert.equal(payload.capture.autorouterModel, "cloudsigma/gpt-5", "still holds last good") + }, + context: {}, +}) + +console.log("autorouter capture smoke ok")