From 72a867de28b1a7edc1e20fb68e143c4010ab216d Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Thu, 11 Jun 2026 21:24:03 +1000 Subject: [PATCH] security: send lock_id via X-CacheKit-Lock-Id header, not query string (#131) The distributed-lock capability token was sent as ?lock_id= on the unlock DELETE. Query strings are captured by access/proxy logs and OpenTelemetry http.url spans (CWE-532), letting anyone with log access replay the token to release a lock within its ~30s TTL. _release_lock now sends the token in the X-CacheKit-Lock-Id request header and drops it from the URL. The SaaS handler dual-reads header + legacy ?lock_id= (deployed in saas 0.1.7), so this is wire-compatible with prod. Ratified in protocol spec/saas-api.md. Tests updated: the URL-injection cases now assert the token is isolated in the header and never touches the URL; added a positive header-transport assertion. SECURITY.md documents the transport. --- SECURITY.md | 5 ++ src/cachekit/backends/cachekitio/backend.py | 16 ++++-- .../backends/test_cachekitio_lock_protocol.py | 50 +++++++++++++++---- 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index b449f0d..008160a 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -187,6 +187,10 @@ When using `@cache.io` (CachekitIOBackend), the SDK includes built-in Server-Sid See [SSRF Protection](docs/features/ssrf-protection.md) for full details, including custom host configuration for development environments. +### Lock Token Transport (CWE-532) + +The distributed-lock capability token (`lock_id`) is sent in the `X-CacheKit-Lock-Id` request header when releasing a lock (`DELETE /v1/cache/{key}/lock`), **never** in the URL query string. Query strings are routinely captured by access logs, proxy/CDN logs, and OpenTelemetry `http.url` spans ([CWE-532][cwe-532]); a leaked token could be replayed to release a lock within its short TTL. The CacheKit SaaS backend dual-reads the header and the legacy `?lock_id=` query during migration, preferring the header (removed in protocol 2.0). + --- ## FFI Boundary Security @@ -394,3 +398,4 @@ We appreciate responsible disclosure from the security community. Security resea [core-kani]: https://github.com/cachekit-io/cachekit-core/blob/main/SECURITY.md#kani-verification [rustsec]: https://rustsec.org/ [cwe-502]: https://cwe.mitre.org/data/definitions/502.html +[cwe-532]: https://cwe.mitre.org/data/definitions/532.html diff --git a/src/cachekit/backends/cachekitio/backend.py b/src/cachekit/backends/cachekitio/backend.py index 9d4a9a7..beb87d6 100644 --- a/src/cachekit/backends/cachekitio/backend.py +++ b/src/cachekit/backends/cachekitio/backend.py @@ -27,6 +27,13 @@ # Module-level logger _logger = get_structured_logger(__name__) +# Lock capability token travels in this request header, never the query string: +# a ?lock_id= query leaks the token into access/proxy logs and OpenTelemetry +# http.url spans (CWE-532), letting anyone with log access replay it. The SaaS +# handler dual-reads this header + the legacy ?lock_id= query during rollout, +# preferring the header. See protocol spec/saas-api.md (DELETE .../lock). +LOCK_ID_HEADER = "X-CacheKit-Lock-Id" + def _inject_metrics_headers(stats: _FunctionStats | None) -> dict[str, str]: """Extract cache metrics and format as HTTP headers. @@ -649,12 +656,13 @@ async def _release_lock(self, lock_key: str, lock_id: str) -> bool: inside ``__aexit__`` cannot mask the user's exception. The server-side ``timeout`` on the lock is the safety net if the DELETE never lands. """ - # URL-encode both segments: lock_key is caller-controlled, lock_id is server-issued - # but the SaaS contract doesn't pin a charset. + # lock_key is caller-controlled → percent-encode it into the path. lock_id is a + # capability token and travels in the X-CacheKit-Lock-Id header, NOT the query + # string (CWE-532): a ?lock_id= query leaks it into access/proxy logs and OTel + # http.url spans. It is server-issued, so it needs no URL-encoding. encoded_key = quote(lock_key, safe="") - encoded_id = quote(lock_id, safe="") try: - await self._request_async("DELETE", f"{encoded_key}/lock?lock_id={encoded_id}") + await self._request_async("DELETE", f"{encoded_key}/lock", headers={LOCK_ID_HEADER: lock_id}) return True except BackendError: return False diff --git a/tests/unit/backends/test_cachekitio_lock_protocol.py b/tests/unit/backends/test_cachekitio_lock_protocol.py index 52df44f..e045f54 100644 --- a/tests/unit/backends/test_cachekitio_lock_protocol.py +++ b/tests/unit/backends/test_cachekitio_lock_protocol.py @@ -9,7 +9,8 @@ SaaS contract (saas/apps/cache/src/index.ts:732): POST {key}/lock always returns HTTP 200 with body {"lock_id": }; null indicates lock held by another -caller. DELETE {key}/lock?lock_id=... releases. +caller. DELETE {key}/lock releases, sending the lock id in the X-CacheKit-Lock-Id +header (CWE-532) rather than a ?lock_id= query param. """ from __future__ import annotations @@ -218,12 +219,37 @@ async def test_url_injection_via_lock_key_encoded(self, backend: CachekitIOBacke # POST path is "{encoded_key}/lock"; the key must end with %3D (encoded `=`) before `/lock`. assert post_path == "evil%3Flock_id%3DBOGUS%26x%3D/lock", f"unexpected POST path: {post_path!r}" delete_path = _path_calls(request_mock)[1] - # DELETE path: "{encoded_key}/lock?lock_id={encoded_id}" — exactly one `?` separates path/query. - assert delete_path.startswith("evil%3Flock_id%3DBOGUS%26x%3D/lock?lock_id="), f"bad DELETE: {delete_path!r}" - assert delete_path.count("?") == 1 + # DELETE path: "{encoded_key}/lock" — lock_id now rides the X-CacheKit-Lock-Id + # header (CWE-532), so the path carries no query and the token never hits the URL. + assert delete_path == "evil%3Flock_id%3DBOGUS%26x%3D/lock", f"bad DELETE: {delete_path!r}" + assert "?" not in delete_path + + async def test_lock_id_sent_in_header_not_query(self, backend: CachekitIOBackend) -> None: + """CWE-532: the lock capability token rides the X-CacheKit-Lock-Id request + header, never the query string (which leaks via access/proxy logs + OTel spans).""" + responses = [ + _json_response(200, {"lock_id": "lock-secret-123"}), # acquire + _json_response(200, {}), # release + ] + request_mock = AsyncMock(side_effect=responses) + backend._request_async = request_mock # type: ignore[method-assign] - async def test_url_injection_via_lock_id_encoded(self, backend: CachekitIOBackend) -> None: - """Server-issued lock_id with `&` must be URL-encoded in the DELETE query string.""" + async with backend.acquire_lock("k", timeout=30.0, blocking_timeout=None): + pass + + delete_call = next(c for c in request_mock.await_args_list if c.args[0] == "DELETE") + headers = delete_call.kwargs.get("headers") or {} + endpoint = delete_call.args[1] + # Token present in the header under the exact wire name... + assert headers.get("X-CacheKit-Lock-Id") == "lock-secret-123" + # ...and absent from the URL entirely (no query param, no leak). + assert "lock_id" not in endpoint + assert "lock-secret-123" not in endpoint + + async def test_lock_id_with_query_metachars_isolated_in_header(self, backend: CachekitIOBackend) -> None: + """A server-issued lock_id containing query metacharacters can no longer smuggle a + query param: it travels in the X-CacheKit-Lock-Id header, not the URL. The raw value + is sent verbatim (headers are not URL-encoded; the server compares the raw token).""" request_mock = AsyncMock( side_effect=[ _json_response(200, {"lock_id": "abc&injected=1"}), @@ -235,10 +261,14 @@ async def test_url_injection_via_lock_id_encoded(self, backend: CachekitIOBacken async with backend.acquire_lock("k", timeout=30.0, blocking_timeout=None): pass - delete_path = _path_calls(request_mock)[1] - # The lock_id's `&` must be percent-encoded; raw `&` would smuggle a new param. - assert "abc&injected" not in delete_path, f"unencoded `&` in lock_id: {delete_path!r}" - assert "abc%26injected" in delete_path, f"expected percent-encoded lock_id in {delete_path!r}" + delete_call = next(c for c in request_mock.await_args_list if c.args[0] == "DELETE") + headers = delete_call.kwargs.get("headers") or {} + endpoint = delete_call.args[1] + # Sent verbatim in the header (server compares the raw token)... + assert headers.get("X-CacheKit-Lock-Id") == "abc&injected=1" + # ...and the metacharacters never touch the URL, so query smuggling is impossible. + assert "injected" not in endpoint + assert "?" not in endpoint async def test_malformed_json_body_treated_as_held(self, backend: CachekitIOBackend) -> None: """Empty/non-JSON 200 response must not crash the wrapper (root cause of #129 class)."""