From 7465a27dd04d86383c462ce4696d0f09212de151 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 13 Jun 2026 11:05:25 +0000 Subject: [PATCH] bridge: add k3-beta-scorecard preset (NIAH ctx280 fused all-mlx CUDA-trim, Kakeya vs MLX-only) Co-authored-by: FluffyAIcode --- inference_engine/bridge/manifest.py | 29 +++++++++++++++++++ .../inference_engine/bridge/test_manifest.py | 1 + 2 files changed, 30 insertions(+) diff --git a/inference_engine/bridge/manifest.py b/inference_engine/bridge/manifest.py index d1224895..64b6a3bb 100644 --- a/inference_engine/bridge/manifest.py +++ b/inference_engine/bridge/manifest.py @@ -266,6 +266,35 @@ def _harness_preset( }, validate_reports=False, ), + Preset( + name="k3-beta-scorecard", + description="Beta scorecard: all-MLX fused + CUDA-trim on NIAH ctx280 " + "(S5), natural stop. Reports Kakeya vs MLX-only oracle: " + "bounded KV (S5 vs naive), recall, context length, decode tok/s.", + command_templates=( + ( + "python3", "scripts/research/k3_integrated_niah_eval_mac.py", + "--verifier-path", "${ENV:KAKEYA_MAC_VERIFIER_PATH}", + "--drafter-id", "${ENV:KAKEYA_MAC_DRAFTER_ID}", + "--f-theta-dir", "${ENV:KAKEYA_MAC_FTHETA_DIR}", + "--s5-exact-full-attn", "--fused-specdecode", + "--all-mlx-drafter", "--cuda-trim", + "--n-samples", "{n_samples}", + "--max-new-tokens", "{max_new_tokens}", + "--block-size", "{block_size}", + "--prefill-chunk-size", "512", + "--output", + "results/research/k3_mac_bridge_k3_beta_scorecard.json", + ), + ), + timeout_minutes=120, + params={ + "n_samples": ("int:n_samples", "5"), + "max_new_tokens": ("int:max_new_tokens", "32"), + "block_size": ("int:block_size", "8"), + }, + validate_reports=False, + ), Preset( name="k3-fused-allmlx-code-trim", description="CUDA-parity rollback test: all-MLX fused + --cuda-trim " diff --git a/tests/inference_engine/bridge/test_manifest.py b/tests/inference_engine/bridge/test_manifest.py index c69930d2..ffe43c99 100644 --- a/tests/inference_engine/bridge/test_manifest.py +++ b/tests/inference_engine/bridge/test_manifest.py @@ -56,6 +56,7 @@ def _manifest(**overrides): def test_allowlist_contains_exactly_the_documented_presets(): assert sorted(PRESETS) == [ "integration-tests", + "k3-beta-scorecard", "k3-drafter-parity", "k3-drafter-parity-fp32", "k3-evidence-gate",