Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions apps/memos-local-plugin/core/llm/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,112 @@ export function createLlmClientWithProvider(
let lastFallbackAt: number | null = null;
let lastError: { at: number; message: string } | null = null;

// ─── Circuit breaker state (issue #1897) ─────────────────────────────────
// Per-client breaker that trips on terminal provider errors (401/402/403,
// "insufficient balance", "invalid api key", "unauthorized", "account
// suspended", "billing"). Short-circuits subsequent calls inside the
// facade so the broken provider is not contacted again until cool-down
// elapses. Half-open: the next call after `circuitOpenUntil` probes the
// provider; success closes the breaker, terminal failure re-opens it.
const breakerCfg = config.circuitBreaker ?? {};
const breakerEnabled = breakerCfg.enabled !== false;
const breakerCooldownMs = Math.max(30_000, breakerCfg.cooldownMs ?? 300_000);
const breakerIsTerminal = breakerCfg.isTerminal ?? defaultIsTerminal;
const breakerNow = breakerCfg.now ?? Date.now;
let circuitOpenUntil: number | null = null;
let circuitOpenedReason: string | null = null;
let lastCircuitOpenStatusAt: number | null = null;

function breakerIsOpen(): boolean {
if (!breakerEnabled) return false;
if (circuitOpenUntil === null) return false;
if (breakerNow() >= circuitOpenUntil) {
// Cool-down elapsed → transition to half-open. We do NOT clear
// `circuitOpenUntil` yet so the very first probe attempt that
// races with the cool-down boundary doesn't fall through to "no
// breaker" twice. The next call's success/failure handler resets
// or re-opens the breaker explicitly.
return false;
}
return true;
}

function breakerTrip(err: unknown): void {
if (!breakerEnabled) return;
circuitOpenUntil = breakerNow() + breakerCooldownMs;
circuitOpenedReason = summarizeErrMessage(err);
// Reset the coalescer so the first suppressed call after a fresh
// trip always emits a `circuit_open` row.
lastCircuitOpenStatusAt = null;
facadeLog.warn("circuit_breaker.trip", {
provider: provider.name,
model: config.model,
until: circuitOpenUntil,
reason: circuitOpenedReason,
});
}

function breakerRecordSuccess(): void {
if (!breakerEnabled) return;
if (circuitOpenUntil !== null) {
facadeLog.info("circuit_breaker.close", {
provider: provider.name,
model: config.model,
});
}
circuitOpenUntil = null;
circuitOpenedReason = null;
lastCircuitOpenStatusAt = null;
}

/**
* Emit a coalesced `circuit_open` audit row. At most one row per
* `cooldownMs/12` window per client — bounds audit-row spam while
* still surfacing the suppressed-call event in the Logs viewer.
* The first suppressed call after a fresh trip always emits.
*/
function maybeEmitCircuitOpenStatus(opts: LlmCallOptions | undefined, op: string): void {
if (!config.onStatus) return;
const at = breakerNow();
const coalesceWindow = Math.max(5_000, Math.floor(breakerCooldownMs / 12));
if (
lastCircuitOpenStatusAt !== null &&
at - lastCircuitOpenStatusAt < coalesceWindow
) {
return;
}
lastCircuitOpenStatusAt = at;
try {
config.onStatus({
status: "circuit_open",
provider: provider.name,
model: config.model,
message: circuitOpenedReason ?? "(unknown reason)",
at,
durationMs: 0,
op,
episodeId: opts?.episodeId,
phase: opts?.phase,
});
} catch {
/* status sink errors are non-fatal */
}
}

function throwBreakerOpen(): never {
const until = circuitOpenUntil ?? breakerNow();
throw new MemosError(
ERROR_CODES.LLM_UNAVAILABLE,
`circuit_open: ${circuitOpenedReason ?? "terminal provider error"}`,
{
circuitOpen: true,
until,
provider: provider.name,
model: config.model,
},
);
}

/**
* Mark a successful primary-provider call. We **do not** clear
* `lastError` / `lastFallbackAt` here — the viewer picks the most
Expand Down Expand Up @@ -151,6 +257,15 @@ export function createLlmClientWithProvider(
opts: LlmCallOptions | undefined,
op: string,
): Promise<{ completion: LlmCompletion }> {
// ── Circuit breaker short-circuit ──
// When the breaker is open we never reach the provider, so no paid
// request is generated. We still emit (coalesced) `circuit_open`
// status rows so the Logs viewer / Overview can surface that
// suppression is happening.
if (breakerIsOpen()) {
maybeEmitCircuitOpenStatus(opts, op);
throwBreakerOpen();
}
requests++;
const startedAt = Date.now();
try {
Expand All @@ -166,6 +281,7 @@ export function createLlmClientWithProvider(
};
record(completion, op, messages);
const okAt = markOk();
breakerRecordSuccess();
notifyStatus({
status: "ok",
provider: provider.name,
Expand Down Expand Up @@ -202,7 +318,16 @@ export function createLlmClientWithProvider(
// bridge saved this call. Tag the slot yellow (`lastFallbackAt`)
// and surface the upstream error to the user via the
// system_error log so they can see *why* fallback engaged.
//
// The circuit breaker stays CLOSED here: from the caller's
// perspective the call was rescued, and tripping the breaker
// on host-fallback success would defeat the point of the
// bridge (it exists precisely to keep going when the primary
// is down). The fallback path also already records the
// primary's failure, so the operator still sees the red trail
// in the Logs viewer.
const fallbackAt = markFallback(err);
breakerRecordSuccess();
notifyOnError(err);
notifyStatus({
status: "fallback",
Expand All @@ -225,6 +350,10 @@ export function createLlmClientWithProvider(
primary: summarizeErr(err),
host: summarizeErr(hostErr),
});
// Primary AND host bridge both failed terminally. Trip on the
// primary error (the one the operator typically needs to fix
// — host bridge failures are usually transient stdio issues).
if (breakerIsTerminal(err)) breakerTrip(err);
notifyOnError(hostErr);
notifyStatus({
status: "error",
Expand All @@ -249,6 +378,7 @@ export function createLlmClientWithProvider(
}
failures++;
const failAt = markFail(err);
if (breakerIsTerminal(err)) breakerTrip(err);
notifyOnError(err);
notifyStatus({
status: "error",
Expand Down Expand Up @@ -415,6 +545,12 @@ export function createLlmClientWithProvider(
const call = buildCallInput(opts, opts?.jsonMode === true);
const ctx = makeCtx(opts, asProviderLog(providerLog));

// Short-circuit stream calls when the breaker is open. We do not
// count a suppressed call against `requests` (no network hit).
if (breakerIsOpen()) {
maybeEmitCircuitOpenStatus(opts, opts?.op ?? "stream");
throwBreakerOpen();
}
requests++;
const start = Date.now();
let acc = "";
Expand Down Expand Up @@ -448,6 +584,7 @@ export function createLlmClientWithProvider(
if (usage?.promptTokens) totalPromptTokens += usage.promptTokens;
if (usage?.completionTokens) totalCompletionTokens += usage.completionTokens;
const okAt = markOk();
breakerRecordSuccess();
notifyStatus({
status: "ok",
provider: provider.name,
Expand All @@ -461,6 +598,7 @@ export function createLlmClientWithProvider(
} catch (err) {
failures++;
const failAt = markFail(err);
if (breakerIsTerminal(err)) breakerTrip(err);
facadeLog.error("stream.failed", { err: summarizeErr(err) });
notifyOnError(err);
notifyStatus({
Expand Down Expand Up @@ -497,6 +635,9 @@ export function createLlmClientWithProvider(
lastOkAt,
lastFallbackAt,
lastError,
circuitOpen: breakerIsOpen(),
circuitOpenUntil,
circuitOpenedReason,
};
},
resetStats(): void {
Expand All @@ -509,6 +650,9 @@ export function createLlmClientWithProvider(
lastOkAt = null;
lastFallbackAt = null;
lastError = null;
circuitOpenUntil = null;
circuitOpenedReason = null;
lastCircuitOpenStatusAt = null;
},
async close(): Promise<void> {
await provider.close?.();
Expand All @@ -522,6 +666,10 @@ export function createLlmClientWithProvider(
timeoutMs: config.timeoutMs,
maxRetries: config.maxRetries,
fallbackToHost: config.fallbackToHost,
circuitBreaker: {
enabled: breakerEnabled,
cooldownMs: breakerCooldownMs,
},
});

return client;
Expand Down Expand Up @@ -562,6 +710,40 @@ function shouldFallback(err: unknown, config: LlmConfig, providerName: LlmProvid
);
}

/**
* Default circuit-breaker classifier for terminal provider errors.
*
* A "terminal" error is one that will keep failing until the operator
* intervenes (top up balance, fix API key, fix model name). Retrying
* such an error just burns paid quota and pollutes the audit log, so
* the breaker opens and short-circuits further calls for the cool-
* down window. Issue #1897 reports the symptom — ~12,900 paid LLM
* requests in 24 h against a key with insufficient balance.
*
* Detection sources, in order:
* 1. `MemosError(LLM_UNAVAILABLE)` with `details.status` ∈ 401/402/403
* — set by `core/llm/fetcher.ts::httpPostJson` for non-ok HTTP
* responses.
* 2. Well-known lowercase phrases in the error message (so providers
* that return 400 for "Insufficient Balance" — looking at you,
* DeepSeek — are still recognized).
*/
function defaultIsTerminal(err: unknown): boolean {
if (!(err instanceof MemosError)) return false;
if (err.code !== ERROR_CODES.LLM_UNAVAILABLE) return false;
const status = Number((err.details as { status?: unknown } | undefined)?.status);
if (status === 401 || status === 402 || status === 403) return true;
const msg = (err.message ?? "").toLowerCase();
return (
msg.includes("insufficient balance") ||
msg.includes("invalid api key") ||
msg.includes("invalid_api_key") ||
msg.includes("unauthorized") ||
msg.includes("account suspended") ||
msg.includes("billing")
);
}

// ─── Logger adapter ──────────────────────────────────────────────────────────

function asProviderLog(log: Logger): LlmProviderLogger {
Expand Down
2 changes: 2 additions & 0 deletions apps/memos-local-plugin/core/llm/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export { LocalOnlyLlmProvider } from "./providers/local-only.js";
export * from "./prompts/index.js";
export type {
LlmCallOptions,
LlmCircuitBreakerConfig,
LlmCompleteJsonOptions,
LlmCompletion,
LlmClient,
Expand All @@ -40,6 +41,7 @@ export type {
LlmProviderLogger,
LlmProviderName,
LlmRole,
LlmStatusDetail,
LlmStreamChunk,
LlmUsage,
ProviderCallInput,
Expand Down
40 changes: 39 additions & 1 deletion apps/memos-local-plugin/core/llm/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,33 @@ export interface LlmConfig {
* daemon can display status produced by a separate stdio bridge.
*/
onStatus?: (detail: LlmStatusDetail) => void;
/**
* Optional circuit breaker config. The breaker trips on terminal
* provider errors (HTTP 401/402/403, or well-known phrases like
* "insufficient balance" / "invalid api key" / "unauthorized" /
* "account suspended" / "billing") and short-circuits subsequent
* calls for a cool-down window. Defaults to enabled. See
* `apps/memos-local-plugin/openspec/changes/.../design.md`
* (issue #1897) for the full state machine.
*/
circuitBreaker?: LlmCircuitBreakerConfig;
}

export interface LlmCircuitBreakerConfig {
/** Default true. Set false to restore legacy (no-breaker) behavior. */
enabled?: boolean;
/**
* Cool-down window before the breaker enters half-open. Default
* 300_000 ms (5 minutes); minimum clamped to 30_000 ms.
*/
cooldownMs?: number;
/**
* Override the default classifier. Returns true if the error should
* trip the breaker (terminal / non-recoverable).
*/
isTerminal?: (err: unknown) => boolean;
/** Injected clock for tests. Default `Date.now`. */
now?: () => number;
}

export interface LlmErrorDetail {
Expand All @@ -67,7 +94,7 @@ export interface LlmErrorDetail {
}

export interface LlmStatusDetail {
status: "ok" | "fallback" | "error";
status: "ok" | "fallback" | "error" | "circuit_open";
provider: LlmProviderName | string;
model: string;
message?: string;
Expand Down Expand Up @@ -260,6 +287,17 @@ export interface LlmClientStats extends LastCallStatus {
retries: number;
totalPromptTokens: number;
totalCompletionTokens: number;
/**
* True while the per-client circuit breaker is open (and any
* cooldown timer has not yet elapsed). When true, further calls are
* short-circuited inside the facade and throw immediately without
* touching the provider. See issue #1897.
*/
circuitOpen: boolean;
/** Epoch ms at which the open breaker becomes eligible for half-open probe. */
circuitOpenUntil: number | null;
/** Free-text reason from the error that opened the breaker. */
circuitOpenedReason: string | null;
}

export interface LlmClient {
Expand Down
Loading
Loading