From 60f431e3657c8c760dd24353f9408e6af0bd8a58 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 27 May 2026 17:38:00 -0500 Subject: [PATCH] perf(waterdata): emit compact CQL2 JSON to halve POST chunk count monitoring-locations is the one service that POSTs a CQL2 body (it doesn't support comma-separated multi-value GET). The body was pretty-printed via json.dumps(indent=4), ~39 B/value, so it counted ~2x against both the server's ~8 KB request-size cap and the chunk planner's byte budget. The tightest separators (~17 B/value) roughly double how many ids fit per sub-request, halving the chunk count and API requests for large id lists: n_ids indent=4 compact 500 4 2 1000 8 4 5000 32 16 Live check: a 500-id query returns all 500 rows in 2 sub-requests (was 4). The WAF body limit (403) is empirically ~8.2-8.4 KB, so 8000-byte compact bodies stay safely under it. Locked in with a compactness assertion on the monitoring-locations POST test. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/waterdata/utils.py | 14 ++++++++++++-- tests/waterdata_test.py | 7 +++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 9a2be2c4..475998ca 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -321,7 +321,17 @@ def _cql2_param(args: dict[str, Any]) -> str: Returns ------- str - JSON string representation of the CQL2 query. + Compact JSON string representation of the CQL2 query. + + Notes + ----- + Serialized with the tightest separators (no indentation or + whitespace). The body counts against the server's ~8 KB request-size + limit and against :func:`chunking._request_bytes` when planning + chunks, so every saved byte fits more values per POST: compact + encoding roughly halves the per-value cost versus pretty-printing, + which roughly doubles how many monitoring-location ids fit in one + sub-request and so halves the chunk count for large id lists. """ filters = [] for key, values in args.items(): @@ -329,7 +339,7 @@ def _cql2_param(args: dict[str, Any]) -> str: query = {"op": "and", "args": filters} - return json.dumps(query, indent=4) + return json.dumps(query, separators=(",", ":")) def _default_headers(): diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 9e0d4c70..c3836534 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -158,6 +158,13 @@ def test_construct_api_requests_monitoring_locations_post(): assert req.method == "POST" assert req.headers["Content-Type"] == "application/query-cql-json" + # Body is serialized compactly (tight separators, no whitespace): the + # body counts against the server's ~8 KB request-size cap and the + # chunk planner's byte budget, so pretty-printing would needlessly + # halve how many ids fit per sub-request and double the chunk count. + raw = req.content.decode() + assert "\n" not in raw and ", " not in raw and ": " not in raw + body = json.loads(req.content) # Top-level shape: AND over a list of per-param predicates. assert body["op"] == "and"