diff --git a/.gitleaks.toml b/.gitleaks.toml new file mode 100644 index 0000000..369c978 --- /dev/null +++ b/.gitleaks.toml @@ -0,0 +1,30 @@ +# gitleaks configuration for conclave. +# +# conclave is a bring-your-own-keys tool whose security tests deliberately plant +# OBVIOUSLY-FAKE, key-SHAPED strings (e.g. "sk-FAKE...", "AIza-FAKE...") to prove +# that redact() / the cache / streaming never let a real key escape. Those fake +# fixtures are not secrets, but a key-shaped literal can trip gitleaks' generic +# rules. This allowlist scopes that exception to the test tree ONLY, so a real +# secret committed anywhere in the source or docs is still caught. +# +# We extend gitleaks' bundled default ruleset rather than replacing it, so every +# upstream detector stays active outside the allowlisted paths. + +[extend] +useDefault = true + +[allowlist] +description = "Fake, key-shaped fixtures used by the key-leak regression tests are not secrets." +# Restrict the allowance to the tests directory: production code, docs, and +# config are still fully scanned. +paths = [ + '''tests/.*\.py''', +] +# Defense in depth: also allow the explicit fake-key marker tokens anywhere they +# appear, so a fixture moved/quoted in a doc example is not flagged. These are +# intentionally synthetic sentinels, never real credentials. +regexes = [ + '''FAKE-?[A-Za-z0-9_\-]*''', + '''sk-(test|FAKE|streamleak|CONCLAVE)[A-Za-z0-9_\-]*''', + '''AIza-?(dummy|test|FAKE)[A-Za-z0-9_\-]*''', +] diff --git a/SECURITY.md b/SECURITY.md index c8519cb..52eb210 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -7,6 +7,155 @@ handling: a weakness that causes a real API key to be stored, logged, serialized or echoed back to the user breaks the core trust promise. We treat reports against that surface as the highest priority. +## Threat model + +This section is the honest, current map of what conclave's key handling **does** +and **does not** defend against. It backs the headline BYO-keys claim. The threat +we model is **credential leakage**: a real provider API key escaping the process +into a result object, a log line, a serialized payload, an on-disk cache file, or +terminal output. Accuracy here is the product — we document accepted limitations +rather than overclaim. + +### Trust boundary + +The boundary conclave defends is **"the user's key value never leaves the in-flight +HTTPS request to the provider, except where the user themselves directs it."** + +``` + environment ──(read by NAME, at call time)──▶ adapter builds request + │ │ + │ headers carry the key + │ ▼ + └────────────── INSIDE the boundary ──▶ httpx → provider (TLS) + │ + ════════════ TRUST BOUNDARY ══════════════ │ response / error + ▼ + redact() scrubs every error/diagnostic string conclave produces + │ + ▼ + CouncilResult / logs / cache / stdout ◀── OUTSIDE the boundary (must be key-free) +``` + +Inside the boundary the key value legitimately exists: in the environment, in the +local variable that reads it, and in the request headers handed to httpx. Outside +the boundary — anything conclave returns, logs, caches, serializes, or prints — +must be free of key material. Everything below is about keeping that second set +clean. + +### What IS protected + +- **Name-only key handling.** Keys are referenced by env var **name** in config + and code. The value is read from the environment **at call time** in + `providers._resolve_key`, used only to build the request, and **never assigned + to any object, cached field, or model**. `registry.key_present` / + `key_source` report only *whether* a var is set and its *name* — never its value. +- **`redact()` scope.** Every error/diagnostic string conclave surfaces passes + through `conclave.adapters.base.redact()` before it reaches a result field, a + log, or stdout. `redact()` scrubs, in order: (1) the live **value** of every env + var conclave knows a name for — built-in providers **and** custom-endpoint + `env_var` names declared in config (this catches a BYO key of *any* shape); + (2) `x-api-key` / `x-goog-api-key` header echoes; (3) `Authorization: Bearer …` + tokens; (4) standalone provider-shaped key tokens (`sk-…`, `xai-…`, `pplx-…`, + `AIza…`). `ProviderError` redacts **on construction**; the provider call path + redacts again at capture (belt-and-suspenders). +- **No key persistence — including the cache.** conclave never writes a key to + disk. The optional result cache (`conclave.cache`, off by default) stores only + the already-redacted `CouncilResult` (`model_dump(mode="json")`), and its cache + key is a SHA-256 over prompt + mode + member/synthesizer **names** + model ids + + params — **no env var name or value** is read when computing it. So neither a + cache file, a cache filename, nor the cache key can carry a secret. +- **Streaming path.** Streamed text deltas carry only parsed answer **content**. + A mid-stream provider error is captured and **redacted** on the final + `ModelAnswer` exactly like the buffered path; the error path emits no text + delta, so a key echoed in an error reaches neither a streamed event nor the + final answer. +- **Partial-failure isolation.** One member failing never aborts a run, and each + member's error is independently redacted, so a leak in one provider's error + response cannot smear into another member's result. The defense-in-depth + catch-alls in `Council.fan_out` and `streaming._drive_member` (which only fire on + an *unexpected* raise escaping the already-redacting provider call) also run + their exception text through `redact()`, so the "every surfaced error string is + scrubbed" invariant holds even on those paths. +- **`repr` / `str` safety.** No config, adapter, or result object stores a key, so + none can render one in a `repr`/`str` or a traceback frame that references it. + The transient request `headers` dict does carry the key (it must, to authenticate), + but it is built inside the adapter and handed straight to the transport — it is + not retained on any object. +- **Exception cause-chain hardening.** The transport raises its `TransportError` + with the cause chain dropped, so the surfaced error retains **no** reference to + the underlying httpx exception. That httpx exception's `.request.headers` holds + the live auth header; had it survived as `__cause__`/`__context__` it would be one + cause-chain hop from the surfaced error and would leak the key via + `traceback.format_exception`, `logging.exception`, or a cause-chain `repr` of a + transport error. `raise … from None` clears `__cause__` and sets + `__suppress_context__` (so no standard formatter renders the httpx exception or + its headers), and the transport additionally nulls `__context__` at a boundary so + even a direct `err.__context__` attribute walk finds no header-bearing exception. +- **CLI.** `conclave providers` prints key **presence** and the env var **name**, + never a value; `--json` serializes the same redacted `CouncilResult`. + +These guarantees are pinned by the regression suite in +[`tests/test_keyleak_audit.py`](tests/test_keyleak_audit.py) (one test class per +vector below), plus the redaction/cache/streaming tests in `tests/test_providers.py`, +`tests/test_cache.py`, and `tests/test_streaming.py`. + +### What redact() does NOT cover — accepted limitations + +`redact()` is a defense for the strings **conclave itself** produces. It is not a +universal egress filter, and we do not claim it is. Known gaps, accepted for 1.0: + +- **httpx / httpcore DEBUG logging (out of band — guarded by default).** httpx and + httpcore have their own loggers. At **DEBUG** level httpcore logs full request + headers, including the `Authorization` / `x-api-key` value, to whatever handler + the host application configured. This bypasses `redact()` entirely — it never + sees those records. The guard against it is now **default-on, opt-out**: + constructing a `Council` automatically calls `conclave.guard_transport_logging()`, + which installs a filter that drops httpx/httpcore **DEBUG** records (the only + level that emits header content) while leaving INFO+ diagnostics intact. The + guard is scoped to the `httpx`/`httpcore` loggers only — it never touches the + host application's root logger or any other logger. Opt out with + `Council(…, allow_transport_debug_logging=True)` for the rare case where you + need that DEBUG band in a process that holds no real keys; you remain responsible + for it then. Consumers using the provider functions directly **without** a + `Council` can install the same guard by calling `conclave.guard_transport_logging()` + once at startup (it is idempotent). Either way, the standing guidance remains: + do not enable httpx/httpcore DEBUG logging in a process that holds real provider + keys (e.g. avoid `logging.basicConfig(level=logging.DEBUG)` process-wide). +- **Partial / URL-encoded / transformed key fragments.** `redact()` masks the + exact env-var value and a fixed set of known key *shapes*. It does **not** catch + a key that a provider has split, truncated, URL-encoded, base64-wrapped, or + otherwise transformed before echoing it back, **unless** that transformed form + still equals the live env-var value (the value-based pass) or matches a known + shape. A novel provider error that leaks `…` of a key, or a + percent-encoded form, can slip the pattern pass. The value-based pass is the + primary defense; the shape patterns are best-effort secondary. +- **Anything the user explicitly logs or prints.** If a consumer reads a key from + the environment themselves, or logs/prints the request headers, the raw + `os.environ`, or their own constructed Authorization header, that is outside + conclave's control. conclave only governs the strings it returns and logs. +- **The in-flight request and the provider side.** The key is, by necessity, + present in the request headers and transmitted to the provider over TLS. What the + provider does with it (its logs, its breach posture) is outside scope. Memory + inspection of the running process (a local attacker with debugger access) is also + out of scope — the env var value is in process memory by design. +- **Dependencies.** Vulnerabilities in httpx, pydantic, typer, pyyaml, or rich + themselves are upstream; report conclave's *use* of them to us if exploitable, + but the libraries' own CVEs belong upstream. CI runs gitleaks on every push. + +### Key-leak vector map (what a reviewer probes day 1) + +| # | Vector | Risk | Status | +|---|--------|------|--------| +| 1 | Cache write path ordering | HIGH if pre-redaction | **Protected** — cache stores only the redacted `CouncilResult`; key never in file/filename/key. Test: V1. | +| 2 | Streaming chunk path | MED | **Protected** — deltas are answer content; mid-stream errors redacted on the final answer, never streamed. Test: V2. | +| 3 | config/transport `__repr__` in tracebacks | MED | **Protected** — no object stores a key; transient headers are not retained. Test: V3. | +| 4 | Provider 400/422 echoing request fragments | MED | **Protected** — error capture runs through `redact()` (and `ProviderError` redacts on construction). Test: V4. | +| 5 | httpx/httpcore DEBUG logging | HIGH (bypasses redact) | **Default-on guard (Council installs it) + opt-out** — `Council.__init__` calls `guard_transport_logging()` automatically; opt out with `allow_transport_debug_logging=True`. Tests: V5, V9. | +| 6 | redact() misses URL-encoded / partial fragments | MED | **Accepted limitation** — documented above; value-based pass is primary, shape patterns best-effort. | +| 7 | Test fixtures with key-shaped strings | LOW | **Protected** — all fixtures use obviously-fake `…FAKE…` patterns; `.gitleaks.toml` allowlists the test tree only. | +| 8 | Partial-failure catch-all error construction (audit-found; not in original map) | LOW | **Protected** — `fan_out` / `_drive_member` catch-alls now `redact()` the raw exception text too. Test: V7. | +| 9 | TransportError cause chain retaining the httpx exception (header-bearing `.request`) | HIGH (leaks via traceback/`logging.exception`) | **Protected** — transport raises `… from None`; surfaced error keeps no `__cause__`/`__context__` ref to the httpx exception, so its auth header cannot leak via traceback/cause-chain repr. Test: V8. | + ## Reporting a vulnerability **Do not open a public GitHub issue, pull request, or discussion for a security diff --git a/src/conclave/__init__.py b/src/conclave/__init__.py index 110447e..ba5761f 100644 --- a/src/conclave/__init__.py +++ b/src/conclave/__init__.py @@ -49,7 +49,7 @@ StreamEvent, TokenUsage, ) -from .transport import aclose +from .transport import aclose, guard_transport_logging __version__ = "0.3.0" @@ -64,5 +64,6 @@ "ConclaveConfig", "load_config", "aclose", + "guard_transport_logging", "__version__", ] diff --git a/src/conclave/cache.py b/src/conclave/cache.py index 455e303..71be1b9 100644 --- a/src/conclave/cache.py +++ b/src/conclave/cache.py @@ -195,6 +195,17 @@ def store(key: str, result: CouncilResult) -> None: try: path = _entry_path(key) path.parent.mkdir(parents=True, exist_ok=True) + # KEY-LEAK INVARIANT (audit vector 1): the cache only ever persists a + # CouncilResult that has ALREADY passed through redaction upstream. Every + # error string on the result (ModelAnswer.error, synthesis_error, + # verdict_error) is scrubbed by redact() at the point of capture in + # conclave.providers, BEFORE it is placed on the result and therefore long + # before it reaches this write. Member/synthesis answer TEXT is provider + # content, never key material. The cache KEY (make_key) is composed solely + # of prompt + mode + member/synthesizer NAMES + model ids + params -- no + # env var name or value is read here. Net: no raw key (name or value) can + # reach a cache file or filename. Do not move any un-redacted capture into + # the result after this contract -- it would persist a secret to disk. payload = result.model_dump(mode="json") payload["cached"] = False # Atomic-ish write: write to a temp sibling then replace, so a crash mid diff --git a/src/conclave/council.py b/src/conclave/council.py index 6f7508f..8cd8737 100644 --- a/src/conclave/council.py +++ b/src/conclave/council.py @@ -17,6 +17,7 @@ from . import cache as cache_mod from . import transport +from .adapters.base import redact from .config import ConclaveConfig, load_config from .logging import get_logger from .models import CouncilResult, ModelAnswer, StreamEvent @@ -55,6 +56,20 @@ class Council: identical repeat run is served from the on-disk cache instead of re-calling the providers. The cache never persists API keys -- see :mod:`conclave.cache`. + allow_transport_debug_logging: Opt **out** of the transport-logging guard. + Defaults to ``False``, which means the guard is **ON**: constructing a + ``Council`` installs :func:`conclave.transport.guard_transport_logging` + so httpx/httpcore ``DEBUG`` records -- the only band that emits request + headers, including the live ``Authorization``/``x-api-key`` value -- are + dropped before any handler formats them (key-leak audit, RANK 6). The + guard is idempotent, so constructing many councils installs it once. The + filter is scoped to the ``httpx``/``httpcore`` loggers only; it never + touches the host application's root logger or any other logger. + Set ``True`` to skip installation for the rare case where you genuinely + need httpx/httpcore ``DEBUG`` output in a process that does not hold real + keys; you remain responsible for that band then. Consumers using the + provider functions directly (without a ``Council``) can still call + :func:`conclave.guard_transport_logging` themselves. Example: >>> council = Council(models=["grok", "perplexity"], synthesizer="claude") @@ -70,6 +85,7 @@ def __init__( temperature: float = 0.7, timeout: float = 120.0, cache: bool | None = None, + allow_transport_debug_logging: bool = False, ) -> None: self.config = config or load_config() self.requested_models = list(models) @@ -78,6 +94,15 @@ def __init__( self.timeout = timeout # Explicit override wins; otherwise defer to config (off by default). self.cache_enabled = self.config.cache if cache is None else cache + # Default-on transport-logging guard (key-leak audit, RANK 6): drop + # httpx/httpcore DEBUG records (the only band that emits the auth header) + # so a process holding a real key cannot leak it via verbose transport + # logging, even if the host enables DEBUG app-wide. Idempotent, so many + # councils install it once; scoped to the httpx/httpcore loggers only. + # ``allow_transport_debug_logging=True`` opts out for callers who need + # that DEBUG band and accept the responsibility. + if not allow_transport_debug_logging: + transport.guard_transport_logging() def _available_members(self) -> tuple[list[tuple[str, str]], list[str]]: """Partition requested members into (available, skipped-for-no-key). @@ -205,14 +230,13 @@ async def fan_out( if isinstance(outcome, ModelAnswer): answers.append(outcome) else: - logger.warning("%s raised unexpectedly: %s", name, outcome) - answers.append( - ModelAnswer( - name=name, - model_id=model_id, - error=f"{type(outcome).__name__}: {outcome}", - ) - ) + # call_model already redacts and never raises, so this arm only + # fires on an UNEXPECTED escape. Redact the exception text anyway: + # the invariant "every error string conclave surfaces is scrubbed" + # must hold even on this defense-in-depth path (key-leak audit). + message = redact(f"{type(outcome).__name__}: {outcome}") + logger.warning("%s raised unexpectedly: %s", name, message) + answers.append(ModelAnswer(name=name, model_id=model_id, error=message)) return answers async def ask(self, prompt: str, synthesize: bool = True) -> CouncilResult: diff --git a/src/conclave/streaming.py b/src/conclave/streaming.py index e943567..599ec37 100644 --- a/src/conclave/streaming.py +++ b/src/conclave/streaming.py @@ -31,6 +31,7 @@ from collections.abc import AsyncIterator from typing import TYPE_CHECKING +from .adapters.base import redact from .logging import get_logger from .models import CouncilResult, ModelAnswer, StreamEvent from .providers import call_model_stream @@ -81,15 +82,16 @@ async def _drive_member( ) ) except Exception as exc: # noqa: BLE001 -- a member must never wedge the run - logger.warning("%s streaming raised unexpectedly: %s", name, exc) + # call_model_stream already redacts and never raises, so this arm only + # fires on an UNEXPECTED escape. Redact the exception text anyway so the + # "every surfaced error string is scrubbed" invariant holds even on this + # defense-in-depth path (key-leak audit, vector 2). + message = redact(f"{type(exc).__name__}: {exc}") + logger.warning("%s streaming raised unexpectedly: %s", name, message) await queue.put( ( "answer", - ModelAnswer( - name=name, - model_id=model_id, - error=f"{type(exc).__name__}: {exc}", - ), + ModelAnswer(name=name, model_id=model_id, error=message), ) ) finally: diff --git a/src/conclave/transport.py b/src/conclave/transport.py index 1107984..ea16f36 100644 --- a/src/conclave/transport.py +++ b/src/conclave/transport.py @@ -13,7 +13,9 @@ from __future__ import annotations +import logging from collections.abc import AsyncIterator +from typing import NoReturn import httpx @@ -25,6 +27,69 @@ # within a process. httpx.AsyncClient is safe to share across concurrent tasks. _client: httpx.AsyncClient | None = None +# --------------------------------------------------------------------------- # +# httpx/httpcore debug-logging leak guard (key-leak audit, vector 5) +# --------------------------------------------------------------------------- # +# +# SECURITY-CRITICAL, OUT-OF-BAND OF redact(): httpx and httpcore have their own +# `logging` loggers. At DEBUG level httpcore logs the full request headers -- +# which include ``Authorization: Bearer `` and ``x-api-key: `` -- to +# whatever handler the host application configured. conclave's ``redact()`` only +# scrubs the error/diagnostic strings *it* produces; it cannot reach inside the +# third-party transport loggers. So a consumer that turns on transport DEBUG +# logging (``logging.basicConfig(level=logging.DEBUG)`` app-wide, or explicitly +# raising the httpx/httpcore loggers) would leak live keys to their own logs, +# entirely bypassing every redaction conclave performs. +# +# We cannot (and should not) globally silence another library's logging for the +# whole process -- that would be surprising and could hide legitimate debugging. +# Instead we expose an explicit, opt-in guard a security-conscious library +# consumer can call once at startup. It installs a filter that drops any +# httpx/httpcore log record at DEBUG severity (the only level that emits header +# content), while leaving INFO+ records untouched. See SECURITY.md "Threat +# model" for the documented trust boundary and accepted limitation. +_TRANSPORT_LOGGER_NAMES = ("httpx", "httpcore") +_GUARD_INSTALLED = False + + +class _NoDebugHeadersFilter(logging.Filter): + """Drop DEBUG-level records from a transport logger (where headers appear). + + httpcore emits request/response headers only at ``DEBUG``; INFO and above + carry no header content. Filtering exactly the DEBUG band stops the header + leak without suppressing useful higher-severity transport diagnostics. + """ + + def filter(self, record: logging.LogRecord) -> bool: + # Returning False discards the record before any handler formats it. + return record.levelno > logging.DEBUG + + +def guard_transport_logging() -> None: + """Block httpx/httpcore DEBUG logging so auth headers can never be logged. + + **Opt-in, library-mode key-leak hardening.** httpx/httpcore log full request + headers (including the ``Authorization``/``x-api-key`` value) at ``DEBUG``. + That path is outside :func:`conclave.adapters.base.redact`'s reach, so a host + application that enables transport DEBUG logging would leak live keys to its + own log sinks. Calling this once at startup installs a logging filter on the + ``httpx`` and ``httpcore`` loggers that discards their DEBUG records, closing + the leak while leaving INFO+ diagnostics intact. Idempotent. + + This is intentionally **not** called automatically: silently reconfiguring a + third-party library's logging for the whole process would be surprising and + could mask legitimate debugging. A consumer that handles real keys and also + runs verbose transport logging should call it explicitly. The default, + documented guidance (SECURITY.md) is simply: do not enable httpx/httpcore + DEBUG logging in a process that holds real provider keys. + """ + global _GUARD_INSTALLED + if _GUARD_INSTALLED: + return + for name in _TRANSPORT_LOGGER_NAMES: + logging.getLogger(name).addFilter(_NoDebugHeadersFilter()) + _GUARD_INSTALLED = True + class TransportError(Exception): """A network-level failure (timeout, connection refused, DNS, etc.). @@ -32,7 +97,43 @@ class TransportError(Exception): Raised by :func:`post_json` so :func:`conclave.providers.call_model` can turn it into a non-raising ``ModelAnswer.error``. The message is built from the exception type only -- never from request headers -- so it carries no secret. + + KEY-LEAK NOTE (audit RANK 1/5): the raise sites route through + :func:`_raise_transport_error` (``raise ... from None``) and a boundary clear, + so the surfaced TransportError retains **no** reference to the underlying httpx + exception -- not as ``__cause__``, not as ``__context__``. That httpx + exception's ``.request.headers`` carries the live ``Authorization``/``x-api-key`` + value; had it survived it would leak the key one cause-chain hop away under + ``traceback.format_exception``, ``logging.exception``, a ``repr`` of the cause + chain, or a direct ``err.__context__`` attribute walk. Dropping the chain is + deliberate -- the message already names the failure kind, so no diagnostic + value is lost. + """ + + +def _raise_transport_error(message: str) -> NoReturn: + """Raise a :class:`TransportError` that retains no link to the httpx exception. + + KEY-LEAK NOTE (audit RANK 1/5). The httpx exception active when this is called + carries a live ``.request`` whose ``.headers`` hold the ``Authorization`` / + ``x-api-key`` value. We must not let it survive on the surfaced TransportError: + + * ``raise ... from None`` sets ``__cause__ = None`` and + ``__suppress_context__ = True`` -- enough that ``traceback.format_exception``, + ``logging.exception``, and a cause-chain ``repr`` render neither the httpx + exception nor its headers (those formatters honor ``__suppress_context__``). + * Python's implicit-context machinery still points ``__context__`` at the + active httpx exception at ``raise`` time, so a *direct attribute walk* + (``err.__context__.request.headers``) could still reach the key. We therefore + build the error and raise with ``from None`` here; the **caller** clears + ``__context__`` at a boundary where no exception is being handled (so Python + cannot re-chain it), making even a direct walk key-free. + + Centralizing the raise keeps the clear-and-raise contract identical at all four + transport raise sites. The message names only the failure kind, so dropping the + chain loses no diagnostic value. """ + raise TransportError(message) from None def _get_client() -> httpx.AsyncClient: @@ -65,17 +166,33 @@ async def post_json( Raises: TransportError: On any network-level failure (timeout, connection error, or other ``httpx.HTTPError``). The message names only the failure - kind and never echoes the headers, so no key can leak. + kind and never echoes the headers, so no key can leak. The underlying + httpx exception is deliberately dropped from the cause chain + (``__cause__`` and ``__context__`` both cleared) so its header-bearing + ``.request`` cannot leak the key via the surfaced error's traceback, + cause-chain repr, or a direct attribute walk (audit RANK 1/5). """ client = _get_client() + # Inner try maps httpx failures to TransportError via _raise_transport_error + # (which raises ``from None``); the outer try clears ``__context__`` at a + # boundary where no httpx exception is active, so even a direct attribute walk + # finds no header-bearing httpx exception (key-leak audit RANK 1/5). See + # _raise_transport_error for the full rationale. try: - response = await client.post(url, headers=headers, json=json_body, timeout=timeout) - except httpx.TimeoutException as exc: - raise TransportError(f"request timed out after {timeout:.0f}s") from exc - except httpx.HTTPError as exc: - # Use the exception class name, not str(exc): httpx error strings can - # include the request URL but never headers, yet we stay conservative. - raise TransportError(f"network error: {type(exc).__name__}") from exc + try: + response = await client.post(url, headers=headers, json=json_body, timeout=timeout) + except httpx.TimeoutException: + _raise_transport_error(f"request timed out after {timeout:.0f}s") + except httpx.HTTPError as exc: + # Use the exception class NAME, not str(exc): httpx error strings can + # include the request URL but never headers, yet we stay conservative. + _raise_transport_error(f"network error: {type(exc).__name__}") + except TransportError as err: + # Boundary clear: no httpx exception is being handled here, so nulling + # ``__context__`` sticks (Python will not re-chain) and re-raising + # ``from None`` keeps ``__cause__``/``__suppress_context__`` clean. + err.__context__ = None + raise err from None try: body: object = response.json() @@ -129,47 +246,78 @@ async def stream_sse( Raises: TransportError: On any network-level failure (timeout, connection error) or a non-2xx streaming status. The message names only the - failure kind / HTTP status and never echoes the headers. + failure kind / HTTP status and never echoes the headers. The + underlying httpx exception is dropped from the cause chain + (``__cause__`` and ``__context__`` both cleared) so its header-bearing + ``.request`` cannot leak the key via the surfaced error's traceback, + cause-chain repr, or a direct attribute walk (audit RANK 1/5). """ client = _get_client() + # Inner try maps httpx failures to TransportError via _raise_transport_error + # (raises ``from None``); the outer try clears ``__context__`` at a boundary + # where no httpx exception is active, so even a direct attribute walk finds no + # header-bearing httpx exception (key-leak audit RANK 1/5). The intentional + # ``HTTP : `` error (not chained from httpx) also passes the + # boundary harmlessly -- it carries no httpx context to clear. try: - async with client.stream( - "POST", url, headers=headers, json=json_body, timeout=timeout - ) as response: - if response.status_code < 200 or response.status_code >= 300: - # Drain the error body so we can report a useful, bounded detail. - # aread() is required before the response is consumed/closed. - raw = await response.aread() - detail = raw.decode("utf-8", "replace")[:500] - raise TransportError(f"HTTP {response.status_code}: {detail}") - - event_name = "" - data_lines: list[str] = [] - async for line in response.aiter_lines(): - # A blank line terminates the current event -> dispatch it. - if line == "": - if data_lines: - yield event_name, "\n".join(data_lines) - event_name = "" - data_lines = [] - continue - if line.startswith(":"): - # SSE comment / keep-alive ping; ignore. - continue - if line.startswith("event:"): - event_name = line[len("event:") :].strip() - elif line.startswith("data:"): - data_lines.append(line[len("data:") :].lstrip()) - # Any other field (id:, retry:, ...) is irrelevant here. - - # Flush a final event with no trailing blank line (some servers do - # not emit the terminating newline). - if data_lines: - yield event_name, "\n".join(data_lines) - except httpx.TimeoutException as exc: - raise TransportError(f"request timed out after {timeout:.0f}s") from exc - except httpx.HTTPError as exc: - raise TransportError(f"network error: {type(exc).__name__}") from exc + try: + async with client.stream( + "POST", url, headers=headers, json=json_body, timeout=timeout + ) as response: + if response.status_code < 200 or response.status_code >= 300: + # Drain the error body so we can report a useful, bounded detail. + # aread() is required before the response is consumed/closed. + raw = await response.aread() + detail = raw.decode("utf-8", "replace")[:500] + # KEY-LEAK NOTE (audit vector 2/4): this raw provider body may echo + # request fragments. It is intentionally NOT redacted here -- the + # transport stays provider-agnostic and never imports redact(). The + # single redaction boundary for the streaming path is + # conclave.providers.call_model_stream, which wraps every + # TransportError/ProviderError message in redact() before it lands + # on ModelAnswer.error or is logged. No streamed text delta is + # emitted on this path (deltas carry only parsed answer content), + # so the only surface for this string is that redacted final answer. + raise TransportError(f"HTTP {response.status_code}: {detail}") + + event_name = "" + data_lines: list[str] = [] + async for line in response.aiter_lines(): + # A blank line terminates the current event -> dispatch it. + if line == "": + if data_lines: + yield event_name, "\n".join(data_lines) + event_name = "" + data_lines = [] + continue + if line.startswith(":"): + # SSE comment / keep-alive ping; ignore. + continue + if line.startswith("event:"): + event_name = line[len("event:") :].strip() + elif line.startswith("data:"): + data_lines.append(line[len("data:") :].lstrip()) + # Any other field (id:, retry:, ...) is irrelevant here. + + # Flush a final event with no trailing blank line (some servers do + # not emit the terminating newline). + if data_lines: + yield event_name, "\n".join(data_lines) + except httpx.TimeoutException: + # Map to TransportError with the chain dropped (audit RANK 1/5). The + # streaming httpx exception also carries ``.request.headers`` with the + # live auth value; _raise_transport_error raises ``from None``. + _raise_transport_error(f"request timed out after {timeout:.0f}s") + except httpx.HTTPError as exc: + # Drop the httpx exception from the cause chain so its header-bearing + # ``.request`` cannot leak the key (audit RANK 1/5). + _raise_transport_error(f"network error: {type(exc).__name__}") + except TransportError as err: + # Boundary clear: no httpx exception is active here, so nulling + # ``__context__`` sticks (Python will not re-chain) and re-raising + # ``from None`` keeps ``__cause__``/``__suppress_context__`` clean. + err.__context__ = None + raise err from None async def aclose() -> None: diff --git a/tests/test_keyleak_audit.py b/tests/test_keyleak_audit.py new file mode 100644 index 0000000..2e54fa9 --- /dev/null +++ b/tests/test_keyleak_audit.py @@ -0,0 +1,794 @@ +"""Key-leak audit regression suite (conclave v1.0, SECURITY.md threat model). + +These tests back conclave's headline **bring-your-own-keys / key-rigor** claim. +Each test maps to one vector of the key-leak attack map in SECURITY.md's threat +model and the v1.0 readiness review. They plant an OBVIOUSLY-FAKE, key-shaped +secret somewhere a leak could occur and assert it never escapes -- so a security +reviewer (or a future refactor) cannot quietly break the contract. + +All tests run offline. Planted secrets use synthetic ``...FAKE...`` patterns that +gitleaks will not flag (see ``.gitleaks.toml``); no real credential is ever used. + +Vector map +========== +* **V1 cache write path** -- a key-shaped secret echoed in a provider error never + reaches a cache file, the cache key, or the cache filename (cache stores the + already-redacted ``CouncilResult``). +* **V2 streaming chunk path** -- a secret planted in a mid-stream provider error is + absent from every streamed ``StreamEvent`` AND from the final ``ModelAnswer``. +* **V3 __repr__/__str__** -- no config/adapter/result object renders a planted key + in its ``repr``/``str``; the key-holding request ``headers`` dict is built only + inside the adapter and never stored on any object. +* **V4 provider 400/422 echo** -- a buffered HTTP error whose body echoes the + submitted key is scrubbed in ``ModelAnswer.error`` (capture runs through + ``redact()``). +* **V5 transport debug logging** -- the ``guard_transport_logging`` helper blocks + httpx/httpcore DEBUG records (the only level that emits auth headers). +* **V8 transport cause-chain (RANK 1/5)** -- a ``TransportError`` surfaced from + ``post_json``/``stream_sse`` retains no reference to the underlying httpx + exception, whose ``.request.headers`` carries the live auth header; the chain is + dropped (``raise ... from None``) so the key cannot leak via + ``traceback.format_exception`` or a cause-chain repr. +* **V9 transport guard default-on (RANK 6)** -- ``Council.__init__`` installs the + httpx/httpcore DEBUG guard by default; ``allow_transport_debug_logging=True`` + opts out. +* **V10 client close hygiene (RANK 8)** -- the pooled client closes cleanly with no + ``ResourceWarning`` (cheap close-wiring regression guard). +""" + +from __future__ import annotations + +import json +import logging +import traceback +import warnings + +import httpx +import pytest + +from conclave import Council, guard_transport_logging, transport +from conclave.adapters.anthropic import AnthropicAdapter +from conclave.adapters.gemini import GeminiAdapter +from conclave.adapters.openai_compat import OpenAICompatAdapter +from conclave.config import ConclaveConfig, CustomEndpoint, clear_config_cache +from conclave.models import ModelAnswer +from conclave.providers import call_model, call_model_stream + +# An obviously-fake, key-SHAPED secret. The ``sk-`` prefix makes it match +# redact()'s pattern path; ``FAKE`` makes it unmistakably synthetic to a human +# reader and to gitleaks (allowlisted). If this string ever appears in a cache +# file, a streamed event, a repr, or a result error, a leak has occurred. +PLANTED = "sk-FAKEconclaveLEAK0123456789abcdefSECRET" + + +@pytest.fixture(autouse=True) +def _reset_config_cache(): + """Isolate each test from the in-process config memo.""" + clear_config_cache() + yield + clear_config_cache() + + +@pytest.fixture +async def mock_stream_client(): + """Install a MockTransport-backed pooled client; restore the global after. + + Mirrors the async fixture in ``test_streaming.py`` so the real + ``call_model_stream`` -> ``transport.stream_sse`` -> adapter path runs against + a caller-supplied handler with no network. + """ + saved = transport._client + created: list[httpx.AsyncClient] = [] + + def use(handler): + client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + created.append(client) + transport._client = client + return client + + yield use + + for client in created: + if not client.is_closed: + await client.aclose() + transport._client = saved + + +# --------------------------------------------------------------------------- # +# V1 -- cache write path: a planted secret never reaches the cache (HIGH) +# --------------------------------------------------------------------------- # + + +def _cache_config(cache: bool = True) -> ConclaveConfig: + """A deterministic, on-disk-independent config for cache tests.""" + return ConclaveConfig( + models={"grok": "xai/grok-4.3", "claude": "anthropic/claude-sonnet-4-6"}, + councils={"default": ["grok", "claude"]}, + synthesizer="claude", + cache=cache, + ) + + +async def test_cache_never_persists_planted_secret_from_provider_error(monkeypatch, tmp_path): + """V1: a key echoed in a provider error must not land in any cache artifact. + + End-to-end: run a cached council where every member's mocked transport returns + a 401 whose error body echoes the planted key. The member errors are captured + via redact() before they reach the CouncilResult, so the on-disk cache entry + (and its filename and the cache key) must contain neither the secret value nor + the env var name -- proving the cache write happens strictly post-redaction. + """ + monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path)) + # The planted secret IS the live key value, so redact() can mask it by value + # even though one member's error also echoes it inline. + for var in ("XAI_API_KEY", "ANTHROPIC_API_KEY"): + monkeypatch.setenv(var, PLANTED) + + async def echoing_401(url, headers, json_body, timeout): + # A gateway that echoes the submitted credential back on auth failure. + return 401, {"error": {"message": f"invalid api key: {PLANTED}"}} + + monkeypatch.setattr("conclave.transport.post_json", echoing_401) + + council = Council( + models=["grok", "claude"], synthesizer="claude", config=_cache_config(), cache=True + ) + result = await council.ask("audit prompt", synthesize=True) + + # Every member failed -> each error must already be redacted on the result. + assert result.answers, "expected attempted members" + for ans in result.answers: + assert ans.error is not None + assert PLANTED not in ans.error + assert "[REDACTED]" in ans.error + + cache_home = tmp_path / "conclave" + entries = list(cache_home.glob("*.json")) + assert entries, "a cached entry should have been written" + for entry in entries: + blob = entry.read_text(encoding="utf-8") + assert PLANTED not in blob, "planted secret leaked into a cache file" + assert "XAI_API_KEY" not in blob + assert "ANTHROPIC_API_KEY" not in blob + # The filename (= cache key) must carry no secret either. + assert PLANTED not in entry.name + + # And the computed cache key itself is secret-free. + key = council._cache_key("audit prompt", "synthesize") + assert PLANTED not in key + + +async def test_cache_key_and_payload_have_no_env_value(monkeypatch, tmp_path): + """V1: even a SUCCESSFUL run never writes the key value or name to the cache.""" + monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path)) + for var in ("XAI_API_KEY", "ANTHROPIC_API_KEY"): + monkeypatch.setenv(var, PLANTED) + + async def ok_post(url, headers, json_body, timeout): + return 200, {"choices": [{"message": {"content": "benign answer"}}]} + + monkeypatch.setattr("conclave.transport.post_json", ok_post) + + council = Council(models=["grok"], synthesizer="claude", config=_cache_config(), cache=True) + await council.ask("benign prompt", synthesize=False) + + cache_home = tmp_path / "conclave" + for entry in cache_home.glob("*.json"): + blob = entry.read_text(encoding="utf-8") + assert PLANTED not in blob + assert "XAI_API_KEY" not in blob + + +# --------------------------------------------------------------------------- # +# V2 -- streaming chunk path: planted secret absent from stream AND final (MED) +# --------------------------------------------------------------------------- # + + +async def _collect_stream(name, model_id, **kwargs): + """Run call_model_stream, returning (text_chunks, final_ModelAnswer).""" + chunks: list[str] = [] + final: ModelAnswer | None = None + async for item in call_model_stream( + name, model_id, [{"role": "user", "content": "hi"}], **kwargs + ): + if isinstance(item, ModelAnswer): + final = item + else: + chunks.append(item) + return chunks, final + + +def _sse(*frames: str) -> bytes: + return ("".join(f"{frame}\n\n" for frame in frames)).encode("utf-8") + + +async def test_stream_planted_secret_absent_from_chunks_and_final(monkeypatch, mock_stream_client): + """V2: a mid-stream error echoing the key leaks into no chunk and no final. + + The provider streams a couple of good deltas, then returns a non-2xx whose + body echoes the planted key... but here we drive it via a 401 status so the + transport raises before any delta. We separately assert the partial-text case + below. This case proves: zero streamed chunks carry the secret, and the final + ModelAnswer.error is redacted. + """ + monkeypatch.setenv("OPENAI_API_KEY", PLANTED) + monkeypatch.setenv("CONCLAVE_CONFIG", "/nonexistent/conclave.yml") + + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(401, json={"error": {"message": f"bad key {PLANTED}"}}) + + mock_stream_client(handler) + chunks, final = await _collect_stream("openai", "openai/gpt-4.1") + + # No streamed chunk may carry the secret (error path yields no text deltas). + for chunk in chunks: + assert PLANTED not in chunk + assert final is not None and not final.ok + assert final.error is not None + assert PLANTED not in final.error + assert "[REDACTED]" in final.error + + +async def test_stream_midstream_error_after_partial_text_is_redacted( + monkeypatch, mock_stream_client +): + """V2: a structured error frame arriving mid-stream (after real deltas) leaks nothing. + + A provider can send good content deltas and THEN a structured error data frame + that echoes the key. The good deltas must stream (they are answer content), the + error must be captured + redacted on the final answer, the partial text must be + preserved, and the planted key must appear in NO streamed event NOR the final. + """ + monkeypatch.setenv("OPENAI_API_KEY", PLANTED) + monkeypatch.setenv("CONCLAVE_CONFIG", "/nonexistent/conclave.yml") + + def handler(request: httpx.Request) -> httpx.Response: + body = _sse( + 'data: {"choices":[{"delta":{"content":"partial "}}]}', + # A structured error frame mid-stream that echoes the credential. + 'data: {"error":{"message":"auth rejected: ' + PLANTED + '"}}', + "data: [DONE]", + ) + return httpx.Response(200, content=body) + + mock_stream_client(handler) + chunks, final = await _collect_stream("openai", "openai/gpt-4.1") + + # The good content streamed; no chunk carries the secret. + assert chunks == ["partial "] + for chunk in chunks: + assert PLANTED not in chunk + assert final is not None and not final.ok + assert final.error is not None + assert PLANTED not in final.error + assert "[REDACTED]" in final.error + # Partial text preserved AND clean. + assert (final.answer or "") == "partial " + assert PLANTED not in (final.answer or "") + + +async def test_council_stream_events_carry_no_planted_secret(monkeypatch, mock_stream_client): + """V2 at the council level: no StreamEvent in a full run carries the secret.""" + monkeypatch.setenv("OPENAI_API_KEY", PLANTED) + monkeypatch.setenv("CONCLAVE_CONFIG", "/nonexistent/conclave.yml") + + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(401, json={"error": {"message": f"bad key {PLANTED}"}}) + + mock_stream_client(handler) + + config = ConclaveConfig( + models={"openai": "openai/gpt-4.1"}, + councils={"default": ["openai"]}, + synthesizer="openai", + ) + council = Council(models=["openai"], synthesizer="openai", config=config) + + events = [e async for e in council.ask_stream("hi", synthesize=False)] + # Serialize every event the way a consumer would and scan the whole payload. + for ev in events: + dumped = json.dumps(ev.model_dump(mode="json")) + assert PLANTED not in dumped, f"planted secret leaked into a {ev.type} event" + + +# --------------------------------------------------------------------------- # +# V3 -- __repr__/__str__ never render key material (MED) +# --------------------------------------------------------------------------- # + + +def test_adapters_hold_no_key_and_repr_is_clean(): + """V3: adapters never store a key; building a request does not retain it. + + The key VALUE is passed to ``build_request`` per call and used only to compose + the (transient) headers dict the caller hands to the transport. It is never + assigned to ``self``. So an adapter instance's repr -- the thing that would + surface in a traceback frame referencing ``self`` -- cannot contain a key. + """ + adapters = [ + OpenAICompatAdapter( + prefix="openai", + completions_url="https://api.openai.com/v1/chat/completions", + env_vars=("OPENAI_API_KEY",), + ), + AnthropicAdapter(), + GeminiAdapter(), + ] + messages = [{"role": "user", "content": "hi"}] + for adapter in adapters: + url, headers, _body = adapter.build_request( + f"{adapter.prefix}/some-model", messages, 0.7, 30.0, PLANTED + ) + # The headers the transport receives DO carry the key (they must, to auth) + # -- that is the in-flight request, redaction-exempt by design and handled + # by the transport-logging guard (V5), not by repr. + header_blob = json.dumps(headers) + assert PLANTED in header_blob, "sanity: the live request must carry the key" + # But the adapter object itself retains nothing. + assert PLANTED not in repr(adapter) + assert PLANTED not in str(adapter) + assert PLANTED not in str(adapter.__dict__) + + +def test_config_repr_has_no_key_value(monkeypatch): + """V3: ConclaveConfig holds only env var NAMES, never values; repr is clean.""" + monkeypatch.setenv("TOGETHER_API_KEY", PLANTED) + config = ConclaveConfig( + models={"x": "together/m"}, + endpoints={ + "together": CustomEndpoint( + completions_url="https://api.together.xyz/v1/chat/completions", + env_var="TOGETHER_API_KEY", # NAME only + ) + }, + ) + assert PLANTED not in repr(config) + assert PLANTED not in str(config) + # The name is fine to render; the value must never be present. + assert "TOGETHER_API_KEY" in repr(config) + + +async def test_model_answer_repr_clean_after_provider_error(monkeypatch): + """V3 (async): the redacted ModelAnswer's repr/str/json carry no secret.""" + monkeypatch.setenv("OPENAI_API_KEY", PLANTED) + monkeypatch.setenv("CONCLAVE_CONFIG", "/nonexistent/conclave.yml") + + async def echoing_401(url, headers, json_body, timeout): + return 401, {"error": {"message": f"invalid api key: {PLANTED}"}} + + monkeypatch.setattr("conclave.transport.post_json", echoing_401) + + answer = await call_model("openai", "openai/gpt-4.1", [{"role": "user", "content": "hi"}]) + assert not answer.ok + assert PLANTED not in repr(answer) + assert PLANTED not in str(answer) + assert PLANTED not in json.dumps(answer.model_dump(mode="json")) + + +# --------------------------------------------------------------------------- # +# V4 -- provider 400/422 echo is captured AFTER redact() (MED) +# --------------------------------------------------------------------------- # + + +@pytest.mark.parametrize("status", [400, 401, 422, 500]) +async def test_buffered_error_echoing_key_is_redacted(monkeypatch, status): + """V4: a buffered HTTP error whose body echoes the key is scrubbed in .error.""" + monkeypatch.setenv("OPENAI_API_KEY", PLANTED) + monkeypatch.setenv("CONCLAVE_CONFIG", "/nonexistent/conclave.yml") + + async def echoing_error(url, headers, json_body, timeout): + # Provider echoes the submitted Authorization header back in its error. + return status, { + "error": {"message": f"request failed; headers: Authorization: Bearer {PLANTED}"} + } + + monkeypatch.setattr("conclave.transport.post_json", echoing_error) + + answer = await call_model("openai", "openai/gpt-4.1", [{"role": "user", "content": "hi"}]) + assert not answer.ok + assert answer.error is not None + assert PLANTED not in answer.error + assert "[REDACTED]" in answer.error + assert str(status) in answer.error + + +async def test_custom_endpoint_unprefixed_key_echo_is_redacted(monkeypatch, tmp_path): + """V4: an UNPREFIXED custom-endpoint key (no sk-/AIza shape) echoed in a 400 is scrubbed. + + Only name-based scrubbing (sourced from config.endpoints[*].env_var) can catch + a key with no recognizable shape -- this guards the BYO-keys leak class for + user-declared providers. + """ + unshaped = "togetherFAKEsecret_unprefixed_no_known_shape_0123456789" + monkeypatch.setenv("TOGETHER_API_KEY", unshaped) + + config_file = tmp_path / "conclave.yml" + config_file.write_text( + "endpoints:\n" + " together:\n" + " completions_url: https://api.together.xyz/v1/chat/completions\n" + " env_var: TOGETHER_API_KEY\n", + encoding="utf-8", + ) + monkeypatch.setenv("CONCLAVE_CONFIG", str(config_file)) + + async def echoing_400(url, headers, json_body, timeout): + return 400, {"error": {"message": f"bad request, key was {unshaped}"}} + + monkeypatch.setattr("conclave.transport.post_json", echoing_400) + + answer = await call_model( + "together", "together/some-model", [{"role": "user", "content": "hi"}] + ) + assert not answer.ok + assert answer.error is not None + assert unshaped not in answer.error + assert "[REDACTED]" in answer.error + + +# --------------------------------------------------------------------------- # +# V5 -- transport debug-logging guard blocks header-bearing DEBUG records (HIGH) +# --------------------------------------------------------------------------- # + + +def test_guard_transport_logging_blocks_debug_records(monkeypatch): + """V5: after guard install, httpx/httpcore DEBUG records are dropped, INFO+ kept. + + httpcore logs request headers (incl. Authorization) only at DEBUG. The guard + installs a filter that discards DEBUG records on the httpx/httpcore loggers, so + a header value can never reach a handler -- while INFO+ diagnostics survive. + """ + # Reset the one-shot guard flag so this test exercises a fresh install, then + # restore it so we don't perturb global state for other tests. + saved_flag = transport._GUARD_INSTALLED + httpx_logger = logging.getLogger("httpx") + httpcore_logger = logging.getLogger("httpcore") + saved_httpx_filters = httpx_logger.filters[:] + saved_httpcore_filters = httpcore_logger.filters[:] + monkeypatch.setattr(transport, "_GUARD_INSTALLED", False) + try: + guard_transport_logging() + + # A DEBUG record carrying a (fake) auth header must be filtered out. + debug_rec = httpcore_logger.makeRecord( + "httpcore", + logging.DEBUG, + __file__, + 0, + "send_request_headers.complete return_value=[(b'Authorization', b'Bearer %s')]", + (PLANTED.encode(),), + None, + ) + assert all(f.filter(debug_rec) is False for f in httpcore_logger.filters), ( + "DEBUG httpcore record (header-bearing) must be dropped by the guard" + ) + + # An INFO record on the same logger must survive (guard is DEBUG-only). + info_rec = httpcore_logger.makeRecord( + "httpcore", logging.INFO, __file__, 0, "connection established", (), None + ) + guard_filter = next( + f for f in httpcore_logger.filters if f.__class__.__name__ == "_NoDebugHeadersFilter" + ) + assert guard_filter.filter(info_rec) is True + + # The httpx logger got the same guard. + assert any(f.__class__.__name__ == "_NoDebugHeadersFilter" for f in httpx_logger.filters) + + # Idempotent: a second install does not stack a second filter. + before = len(httpcore_logger.filters) + guard_transport_logging() + assert len(httpcore_logger.filters) == before + finally: + httpx_logger.filters = saved_httpx_filters + httpcore_logger.filters = saved_httpcore_filters + transport._GUARD_INSTALLED = saved_flag + + +def test_guard_transport_logging_is_exported(): + """V5: the guard is part of the public API so library consumers can call it.""" + import conclave + + assert hasattr(conclave, "guard_transport_logging") + assert "guard_transport_logging" in conclave.__all__ + + +# --------------------------------------------------------------------------- # +# V7 -- residual catch-all error construction is redacted (audit-found gap) +# --------------------------------------------------------------------------- # +# +# Not in the original attack map: the partial-failure catch-alls in +# Council.fan_out and streaming._drive_member build a ModelAnswer.error from a raw +# exception (`f"{type(exc).__name__}: {exc}"`). These only fire on an UNEXPECTED +# raise that escapes call_model / call_model_stream (which already redact), but the +# invariant "every surfaced error string is scrubbed" must hold even there. The +# fix wraps both in redact(); these tests pin it by forcing the underlying call to +# RAISE (not return a ModelAnswer) with the planted key in the message. + + +async def test_fan_out_catch_all_error_is_redacted(monkeypatch): + """V7 (buffered): an unexpected raise in fan_out yields a redacted error.""" + monkeypatch.setenv("XAI_API_KEY", PLANTED) + + import conclave.council as council_mod + + async def raising_call_model(name, model_id, messages, *, temperature=0.7, timeout=120.0): + # Simulate an unexpected escape carrying the key in its text. + raise RuntimeError(f"unexpected boom leaking {PLANTED}") + + monkeypatch.setattr(council_mod, "call_model", raising_call_model) + + config = ConclaveConfig( + models={"grok": "xai/grok-4.3"}, + councils={"default": ["grok"]}, + synthesizer="grok", + ) + council = Council(models=["grok"], synthesizer="grok", config=config) + result = await council.ask("hi", synthesize=False) + + assert result.answers and not result.answers[0].ok + err = result.answers[0].error + assert err is not None + assert PLANTED not in err + assert "[REDACTED]" in err + + +async def test_stream_drive_member_catch_all_error_is_redacted(monkeypatch): + """V7 (streaming): an unexpected raise in _drive_member yields a redacted error.""" + monkeypatch.setenv("XAI_API_KEY", PLANTED) + + import conclave.streaming as streaming_mod + + async def raising_stream( + name, model_id, messages, *, temperature=0.7, timeout=120.0, config=None + ): + # An unexpected raise (not a yielded error ModelAnswer) carrying the key. + raise RuntimeError(f"stream boom leaking {PLANTED}") + yield # pragma: no cover -- make this an async generator + + monkeypatch.setattr(streaming_mod, "call_model_stream", raising_stream) + + config = ConclaveConfig( + models={"grok": "xai/grok-4.3"}, + councils={"default": ["grok"]}, + synthesizer="grok", + ) + council = Council(models=["grok"], synthesizer="grok", config=config) + + events = [e async for e in council.ask_stream("hi", synthesize=False)] + done = events[-1] + assert done.type == "done" + answers = done.result.answers + assert answers and not answers[0].ok + err = answers[0].error + assert err is not None + assert PLANTED not in err + assert "[REDACTED]" in err + # No StreamEvent anywhere carries the secret. + for ev in events: + assert PLANTED not in json.dumps(ev.model_dump(mode="json")) + + +# --------------------------------------------------------------------------- # +# V6 -- fixtures sanity: the planted secret is obviously fake +# --------------------------------------------------------------------------- # + + +def test_planted_secret_is_obviously_fake(): + """V6: the audit's planted secret is a synthetic, gitleaks-allowlisted token.""" + assert "FAKE" in PLANTED + assert PLANTED.startswith("sk-FAKE") + + +# --------------------------------------------------------------------------- # +# V8 -- transport drops the httpx exception from the cause chain (RANK 1/5, HIGH) +# --------------------------------------------------------------------------- # +# +# httpx exceptions carry a live ``.request`` whose ``.headers`` hold the +# ``Authorization``/``x-api-key`` value. If the surfaced TransportError kept that +# exception as ``__cause__``/``__context__``, the key would sit one cause-chain hop +# away and leak under ``traceback.format_exception``, ``logging.exception``, or a +# cause-chain repr. The transport raises ``... from None``; these tests pin that the +# formatted traceback + repr/str of the surfaced error are key-free and the cause +# chain is dropped. + + +def _planted_header_request(url: str = "https://x/y") -> httpx.Request: + """A real httpx.Request carrying the planted auth header (so .request.headers populates).""" + return httpx.Request("POST", url, headers={"Authorization": f"Bearer {PLANTED}"}) + + +def _assert_cause_chain_key_free(err: transport.TransportError) -> None: + """Shared V8 assertions: the surfaced TransportError leaks the key nowhere. + + The load-bearing guarantee is that ``traceback.format_exception`` (which honors + ``__suppress_context__``) renders neither the httpx exception nor its headers, + and that ``__cause__`` is dropped. The transport additionally clears + ``__context__`` at a boundary (see ``transport._raise_transport_error``), so a + *direct attribute walk* also finds no header-bearing httpx exception -- we pin + that too. + """ + formatted = "".join(traceback.format_exception(type(err), err, err.__traceback__)) + assert PLANTED not in formatted, "planted key leaked via the formatted traceback" + assert PLANTED not in repr(err) + assert PLANTED not in str(err) + assert err.__cause__ is None + assert err.__suppress_context__ is True + # Explicit context-clear makes even a direct ``err.__context__`` walk key-free. + assert err.__context__ is None + + +async def test_transport_error_drops_httpx_cause_chain(monkeypatch, mock_stream_client): + """V8 (buffered): post_json's TransportError keeps no header-bearing httpx cause. + + The MockTransport handler raises ``httpx.ConnectError`` carrying a request whose + headers hold the planted Authorization token. ``post_json`` must raise a + ``TransportError`` whose formatted traceback, repr, and str are all key-free and + whose cause chain is dropped (audit RANK 1/5). + """ + # Value-based redact would also mask the key in any *string* the transport built; + # the POINT here is the cause chain, asserted on the formatted traceback. + monkeypatch.setenv("OPENAI_API_KEY", PLANTED) + monkeypatch.setenv("XAI_API_KEY", PLANTED) + + def handler(request: httpx.Request) -> httpx.Response: + # Raise a network error carrying a header-bearing request (httpx.HTTPError arm). + raise httpx.ConnectError("boom", request=_planted_header_request()) + + mock_stream_client(handler) + + with pytest.raises(transport.TransportError) as excinfo: + await transport.post_json( + "https://x/y", + {"Authorization": f"Bearer {PLANTED}"}, + {"hi": "there"}, + timeout=5.0, + ) + _assert_cause_chain_key_free(excinfo.value) + + +async def test_stream_sse_error_drops_httpx_cause_chain(monkeypatch, mock_stream_client): + """V8 (streaming): stream_sse's TransportError keeps no header-bearing httpx cause. + + Same guarantee at the streaming boundary: the handler raises an httpx error + carrying the planted auth header; iterating the async generator surfaces a + ``TransportError`` whose formatted traceback / repr / str are key-free and whose + cause chain is dropped (audit RANK 1/5). + """ + monkeypatch.setenv("OPENAI_API_KEY", PLANTED) + monkeypatch.setenv("XAI_API_KEY", PLANTED) + + def handler(request: httpx.Request) -> httpx.Response: + raise httpx.ConnectError("boom", request=_planted_header_request()) + + mock_stream_client(handler) + + async def drain() -> None: + async for _event in transport.stream_sse( + "https://x/y", + {"Authorization": f"Bearer {PLANTED}"}, + {"hi": "there"}, + timeout=5.0, + ): + pass # pragma: no cover -- the handler raises before any event + + with pytest.raises(transport.TransportError) as excinfo: + await drain() + _assert_cause_chain_key_free(excinfo.value) + + +# --------------------------------------------------------------------------- # +# V9 -- Council installs the transport-logging guard by default (RANK 6) +# --------------------------------------------------------------------------- # + + +def _minimal_council_config() -> ConclaveConfig: + """A deterministic, on-disk-independent config for the guard-install tests.""" + return ConclaveConfig( + models={"openai": "openai/gpt-4.1"}, + councils={"default": ["openai"]}, + synthesizer="openai", + ) + + +def _snapshot_transport_logger_state(): + """Snapshot the httpx/httpcore filter lists + the one-shot guard flag.""" + httpx_logger = logging.getLogger("httpx") + httpcore_logger = logging.getLogger("httpcore") + return ( + httpx_logger, + httpcore_logger, + httpx_logger.filters[:], + httpcore_logger.filters[:], + transport._GUARD_INSTALLED, + ) + + +def test_council_init_installs_transport_guard_by_default(monkeypatch): + """V9 (RANK 6): constructing a Council installs the httpx/httpcore DEBUG guard. + + With no ``allow_transport_debug_logging`` argument the guard is ON: after + construction ``_GUARD_INSTALLED`` is True, the httpcore logger carries a + ``_NoDebugHeadersFilter``, and a header-bearing DEBUG record is dropped. Global + logging state is snapshotted and restored (mirrors the V5 teardown discipline). + """ + httpx_logger, httpcore_logger, saved_httpx, saved_httpcore, saved_flag = ( + _snapshot_transport_logger_state() + ) + monkeypatch.setattr(transport, "_GUARD_INSTALLED", False) + try: + Council(models=["openai"], synthesizer="openai", config=_minimal_council_config()) + + assert transport._GUARD_INSTALLED is True + guard_filter = next( + f for f in httpcore_logger.filters if f.__class__.__name__ == "_NoDebugHeadersFilter" + ) + # A header-bearing DEBUG record on httpcore must be dropped by the guard. + debug_rec = httpcore_logger.makeRecord( + "httpcore", + logging.DEBUG, + __file__, + 0, + "send_request_headers.complete return_value=[(b'Authorization', b'Bearer %s')]", + (PLANTED.encode(),), + None, + ) + assert guard_filter.filter(debug_rec) is False + finally: + httpx_logger.filters = saved_httpx + httpcore_logger.filters = saved_httpcore + transport._GUARD_INSTALLED = saved_flag + + +def test_council_opt_out_skips_transport_guard(monkeypatch): + """V9 (RANK 6 opt-out): allow_transport_debug_logging=True skips guard install. + + Starting from ``_GUARD_INSTALLED = False``, constructing a Council with the + opt-out flag must NOT flip the flag and must NOT add a new + ``_NoDebugHeadersFilter`` to the httpcore logger. Global state is restored after. + """ + httpx_logger, httpcore_logger, saved_httpx, saved_httpcore, saved_flag = ( + _snapshot_transport_logger_state() + ) + monkeypatch.setattr(transport, "_GUARD_INSTALLED", False) + filters_before = httpcore_logger.filters[:] + try: + Council( + models=["openai"], + synthesizer="openai", + config=_minimal_council_config(), + allow_transport_debug_logging=True, + ) + + # This construction must not have installed the guard. + assert transport._GUARD_INSTALLED is False + assert httpcore_logger.filters == filters_before + finally: + httpx_logger.filters = saved_httpx + httpcore_logger.filters = saved_httpcore + transport._GUARD_INSTALLED = saved_flag + + +# --------------------------------------------------------------------------- # +# V10 -- pooled client closes without a ResourceWarning (RANK 8, cheap) +# --------------------------------------------------------------------------- # + + +async def test_pooled_client_closes_without_resource_warning(): + """V10 (RANK 8): creating then closing the pooled client emits no ResourceWarning. + + A cheap close-wiring regression guard. ``_get_client()`` lazily builds the + pooled client; ``aclose()`` must release it cleanly. We snapshot/restore the + global client so this does not clobber other tests, and turn ResourceWarning + into an error so a leaked client surfaces immediately. + """ + saved = transport._client + transport._client = None + try: + with warnings.catch_warnings(): + warnings.simplefilter("error", ResourceWarning) + client = transport._get_client() + assert client is not None + await transport.aclose() + assert transport._client is None + finally: + transport._client = saved diff --git a/tests/test_logging.py b/tests/test_logging.py index bc413d9..47c4733 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -19,6 +19,20 @@ from conclave.logging import get_logger +def _app_stream_handlers(logger: logging.Logger) -> list[logging.Handler]: + """Return only the handlers conclave itself installed on ``logger``. + + Under pytest's logging plugin the captured loggers also carry one or more + ``LogCaptureHandler`` instances (a ``StreamHandler`` subclass) injected by the + harness -- their presence is a property of the test runner, not of conclave's + one-shot logger config. To assert on conclave's OWN handler set without + coupling to the pytest version, we exclude any handler whose class name + contains ``Capture``. What remains is exactly the ``StreamHandler`` + ``get_logger`` adds. + """ + return [h for h in logger.handlers if "Capture" not in type(h).__name__] + + @pytest.fixture def fresh_logging(monkeypatch): """Reset the one-shot logger config so a fresh get_logger() reconfigures. @@ -54,8 +68,9 @@ def test_default_level_is_warning_when_env_unset(fresh_logging, monkeypatch): assert logger.name == "conclave" assert logger.level == logging.WARNING assert logger.propagate is False - assert len(logger.handlers) == 1 - assert isinstance(logger.handlers[0], logging.StreamHandler) + app_handlers = _app_stream_handlers(logger) + assert len(app_handlers) == 1 + assert isinstance(app_handlers[0], logging.StreamHandler) def test_env_var_sets_level_case_insensitively(fresh_logging, monkeypatch): @@ -84,8 +99,9 @@ def test_named_logger_is_child_of_root(fresh_logging, monkeypatch): assert child.name == "conclave.transport" assert child.parent is logging.getLogger("conclave") - # Child has no handler of its own; it propagates to the configured root. - assert child.handlers == [] + # Child has no handler of its OWN; it propagates to the configured root. + # (pytest may attach a capture handler; exclude it -- conclave adds none.) + assert _app_stream_handlers(child) == [] # Effective level is inherited from the root we just configured at INFO. assert child.getEffectiveLevel() == logging.INFO @@ -95,7 +111,7 @@ def test_configuration_happens_once(fresh_logging, monkeypatch): monkeypatch.setenv("CONCLAVE_LOG_LEVEL", "ERROR") first = get_logger() - assert len(first.handlers) == 1 + assert len(_app_stream_handlers(first)) == 1 assert logging_mod._CONFIGURED is True # Changing the env now must have no effect -- the guard short-circuits. @@ -103,5 +119,5 @@ def test_configuration_happens_once(fresh_logging, monkeypatch): second = get_logger() assert second is first - assert len(second.handlers) == 1 # not duplicated + assert len(_app_stream_handlers(second)) == 1 # not duplicated assert second.level == logging.ERROR # unchanged from first config