Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 118 additions & 1 deletion index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,85 @@ async function patchPayloadMetadata(
}
}

/**
* Captured autorouter metadata per-session, populated by the onResponse callback
* installed in `buildWrapper`. Exposed via the `taas.autorouter.lastRoute` gateway
* RPC so Alien AI Studio (or any other client) can pull the latest TaaS routing
* decision for a session — including the actual model chosen by the autorouter,
* the algorithm used, the source of that algorithm (org/dept/key/user/system),
* the thinking level applied, and the chosen model's context window.
*
* The map is keyed by the affinity session ID we already derive in
* `resolveSessionId(ctx.workspaceDir)`. Stored values are bounded — see
* `LAST_ROUTE_LIMIT` — to avoid unbounded growth in long-lived gateways.
*/
type AutorouterCapture = {
sessionId: string
capturedAt: number
autorouterModel: string | null
autorouterAlgo: string | null
autorouterAlgoSource: string | null
thinkingApplied: string | null
routedContextWindow: number | null
}

const LAST_ROUTE_LIMIT = 256
const lastRouteBySessionId = new Map<string, AutorouterCapture>()

function pruneLastRouteMap(): void {
if (lastRouteBySessionId.size <= LAST_ROUTE_LIMIT) return
// Drop oldest entries by capturedAt ascending until we're back under the cap.
const entries = [...lastRouteBySessionId.entries()].sort(
(a, b) => a[1].capturedAt - b[1].capturedAt
)
const toDrop = entries.length - LAST_ROUTE_LIMIT
for (let i = 0; i < toDrop; i++) {
lastRouteBySessionId.delete(entries[i][0])
}
}

function captureAutorouterFromHeaders(
sessionId: string,
headers: Record<string, string>
): void {
// Header names from TaaS proxy are emitted in canonical "X-TaaS-*" form
// but Node/undici lowercases incoming response headers. Read case-insensitively.
const lowered: Record<string, string> = {}
for (const [k, v] of Object.entries(headers)) {
if (typeof v === "string") lowered[k.toLowerCase()] = v
}
const autorouted = lowered["x-taas-autorouted"]
if (autorouted !== "true") return // ignore non-autorouted responses
const capture: AutorouterCapture = {
sessionId,
capturedAt: Date.now(),
autorouterModel: lowered["x-taas-autorouter-model"] ?? null,
autorouterAlgo: lowered["x-taas-autorouter-mode"] ?? null,
autorouterAlgoSource: lowered["x-taas-autorouter-algorithm-source"] ?? null,
thinkingApplied: lowered["x-taas-thinking-applied"] ?? null,
routedContextWindow: (() => {
const raw = lowered["x-taas-routed-context-window"]
if (!raw) return null
const n = Number(raw)
return Number.isFinite(n) && n > 0 ? n : null
})(),
}
lastRouteBySessionId.set(sessionId, capture)
pruneLastRouteMap()
if (isDev) {
console.debug(
`[taas-affinity] captured autorouter sessionId=${sessionId} ` +
`model=${capture.autorouterModel} algo=${capture.autorouterAlgo} ` +
`source=${capture.autorouterAlgoSource} thinking=${capture.thinkingApplied} ` +
`ctxWindow=${capture.routedContextWindow}`
)
}
}

function getLastRouteForSession(sessionId: string): AutorouterCapture | null {
return lastRouteBySessionId.get(sessionId) ?? null
}

function buildWrapper(ctx: ProviderWrapStreamFnContext) {
const { streamFn } = ctx
if (!streamFn) return undefined
Expand Down Expand Up @@ -642,7 +721,23 @@ function buildWrapper(ctx: ProviderWrapStreamFnContext) {
if (prevOnPayload) return prevOnPayload(patched, payloadModel)
return patched
}
return inner(model, context, { ...options, onPayload })
const prevOnResponse = options?.onResponse
const onResponse: NonNullable<typeof options>["onResponse"] = async (
response,
responseModel
) => {
try {
captureAutorouterFromHeaders(sessionId, response?.headers ?? {})
} catch (err) {
if (isDev) {
console.debug(
`[taas-affinity] onResponse capture failed: ${(err as Error)?.message ?? err}`
)
}
}
if (prevOnResponse) await prevOnResponse(response, responseModel)
}
return inner(model, context, { ...options, onPayload, onResponse })
} as typeof inner
}

Expand Down Expand Up @@ -700,5 +795,27 @@ export default {
wrapStreamFn: buildWrapper,
resolveTransportTurnState: buildTransportTurnState,
})

// Expose captured TaaS autorouter metadata to gateway clients (Studio, etc.).
// The Alien AI Studio polls this after each turn to populate the model/algo/
// thinking/context-window fields in the AgentChatPanel for cloudsigma/auto and
// other autorouted requests. See PRD "Alien AI Studio - Auto-Routing Model UX"
// (Confluence 1901363271).
if (typeof api.registerGatewayMethod === "function") api.registerGatewayMethod(
"taas.autorouter.lastRoute",
async ({ params, respond }) => {
// Accept either { workspaceDir } (preferred — derives sessionId the
// same way the wrapper does) or { sessionId } (direct lookup).
const p = (params ?? {}) as Record<string, unknown>
const directSessionId =
typeof p.sessionId === "string" ? p.sessionId : null
const workspaceDir =
typeof p.workspaceDir === "string" ? p.workspaceDir : undefined
const resolvedSessionId =
directSessionId ?? resolveSessionId(workspaceDir).sessionId
const captured = getLastRouteForSession(resolvedSessionId)
respond(true, { sessionId: resolvedSessionId, capture: captured })
}
)
},
}
88 changes: 88 additions & 0 deletions test/smoke.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,91 @@ const transportState = provider.resolveTransportTurnState({
assert.match(transportState.headers["X-Session-Id"], /^oc:[a-f0-9]{16}$/)

console.log("smoke ok")

// === autorouter capture (R7.1/R7.2 from Studio PRD) ===
// Verify that the wrapper threads an onResponse callback that captures
// X-TaaS-* headers and that taas.autorouter.lastRoute returns them.
let registeredMethod
let registeredHandler
const apiWithGateway = {
registerProvider(candidate) {
// keep previous provider too — second registration
},
registerGatewayMethod(name, handler) {
registeredMethod = name
registeredHandler = handler
},
}
plugin.register(apiWithGateway)
assert.equal(registeredMethod, "taas.autorouter.lastRoute", "gateway method registered")
assert.equal(typeof registeredHandler, "function", "handler is a function")

// Drive the wrapper through onResponse with synthetic autorouter headers.
const captureStreamFn = async (_model, _context, options = {}) => {
// pi-ai protocol: call onPayload first (existing behaviour), then onResponse
// with the simulated HTTP response object, then stream.
if (options.onPayload) await options.onPayload({ messages: [], metadata: {} }, _model)
if (options.onResponse) {
await options.onResponse(
{
status: 200,
headers: {
"x-taas-autorouted": "true",
"x-taas-autorouter-model": "cloudsigma/gpt-5",
"x-taas-autorouter-mode": "best_fit",
"x-taas-autorouter-algorithm-source": "api_key_default",
"x-taas-thinking-applied": "medium",
"x-taas-routed-context-window": "128000",
},
},
_model
)
}
}
const captureWrapped = provider.wrapStreamFn({
streamFn: captureStreamFn,
workspaceDir: "/tmp/openclaw-token-cache-optimizer-smoke",
provider: "cloudsigma",
modelId: "cloudsigma/auto",
model: { id: "cloudsigma/auto" },
})
await captureWrapped("model", { messages: [] }, {})

// Now invoke the registered gateway handler and assert it returns the capture.
let respondedOk
let respondedPayload
await registeredHandler({
req: { id: "test" },
params: { workspaceDir: "/tmp/openclaw-token-cache-optimizer-smoke" },
client: null,
isWebchatConnect: () => false,
respond: (ok, payload) => {
respondedOk = ok
respondedPayload = payload
},
context: {},
})
assert.equal(respondedOk, true, "handler responded ok")
assert.ok(respondedPayload, "payload present")
assert.match(respondedPayload.sessionId, /^oc:[a-f0-9]{16}$/, "sessionId looks valid")
assert.ok(respondedPayload.capture, "capture present")
assert.equal(respondedPayload.capture.autorouterModel, "cloudsigma/gpt-5")
assert.equal(respondedPayload.capture.autorouterAlgo, "best_fit")
assert.equal(respondedPayload.capture.autorouterAlgoSource, "api_key_default")
assert.equal(respondedPayload.capture.thinkingApplied, "medium")
assert.equal(respondedPayload.capture.routedContextWindow, 128000)

// Non-autorouted response should NOT overwrite (we explicitly drop it)
await captureWrapped("model", { messages: [] }, {})
await registeredHandler({
req: { id: "t2" },
params: { workspaceDir: "/tmp/openclaw-token-cache-optimizer-smoke" },
client: null,
isWebchatConnect: () => false,
respond: (_ok, payload) => {
assert.equal(payload.capture.autorouterModel, "cloudsigma/gpt-5", "still holds last good")
},
context: {},
})

console.log("autorouter capture smoke ok")
Loading