From fcd52ef26797d8b27f8449f3ca70a3ce0f8fb0d2 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 18 Jun 2026 11:06:17 +0000 Subject: [PATCH] fix(mac-chat): stop [stream] progress lines interleaving with the answer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The per-block '[stream] blk=.. t=..s' lines went to stderr while the answer delta went to stdout; on a shared terminal they interleaved INTO the text (在计算机[stream] blk=1...). Emit the timing line ONLY on the non-interactive (scripted/bridge) path; the interactive CLI now streams ONLY the clean answer delta. Add --chat-stream-stdout so a non-tty bridge run can capture the exact clean live format for validation. Co-authored-by: FluffyAIcode --- inference_engine/bridge/manifest.py | 3 +- .../research/k3_integrated_niah_eval_mac.py | 29 +++++++++++++++---- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/inference_engine/bridge/manifest.py b/inference_engine/bridge/manifest.py index a167efb..8a27bbb 100644 --- a/inference_engine/bridge/manifest.py +++ b/inference_engine/bridge/manifest.py @@ -767,7 +767,8 @@ def _harness_preset( "--s5-exact-full-attn", "--fused-specdecode", "--force-f-theta", "--sink-size", "4", "--window-size", "64", "--block-size", "4", "--max-new-tokens", "{max_new_tokens}", "--ignore-turn-stop", - "--chat", "--chat-scripted", "根据pow的机制,给出完整的c代码实现", + "--chat", "--chat-stream-stdout", + "--chat-scripted", "根据pow的机制,给出完整的c代码实现", "--output", "results/research/chat_stream_probe_2815.json", ), ), diff --git a/scripts/research/k3_integrated_niah_eval_mac.py b/scripts/research/k3_integrated_niah_eval_mac.py index a2e1651..f00c151 100644 --- a/scripts/research/k3_integrated_niah_eval_mac.py +++ b/scripts/research/k3_integrated_niah_eval_mac.py @@ -180,6 +180,11 @@ def parse_args() -> argparse.Namespace: ap.add_argument("--chat-scripted", default=None, help="Non-interactive chat: '||'-separated user turns " "(for Mac-bridge verification); writes a transcript.") + ap.add_argument("--chat-stream-stdout", action="store_true", + help="In scripted chat, stream the clean answer delta to " + "stdout (as the interactive CLI does) instead of the " + "per-block [stream] timing lines — lets a non-tty bridge " + "run capture the exact live output format.") ap.add_argument("--chat-native-ref", action="store_true", help="DIAGNOSTIC opt-in: before each chat turn, also run a " "plain NATIVE greedy AR decode of the SAME prompt for " @@ -870,15 +875,21 @@ def cb(toks: List[int]) -> None: except TypeError: txt = tokenizer.decode(toks) if to_stdout: + # Interactive: emit ONLY the clean answer delta to stdout. + # (No per-block progress line here — stderr would interleave + # with the streamed text in the terminal and mangle it.) delta = txt[st["chars"]:] if delta: sys.stdout.write(delta) sys.stdout.flush() st["chars"] = len(txt) - sys.stderr.write( - f"[stream] blk={st['blk']} tok={len(toks)} " - f"t={time.perf_counter() - st['t0']:.1f}s\n") - sys.stderr.flush() + else: + # Non-interactive (bridge/scripted): timing-only progress to + # stderr (proves streaming / liveness in the captured log). + sys.stderr.write( + f"[stream] blk={st['blk']} tok={len(toks)} " + f"t={time.perf_counter() - st['t0']:.1f}s\n") + sys.stderr.flush() return cb print(f"[chat] FULL fused engine: verifier={args.verifier_path} " @@ -893,8 +904,14 @@ def cb(toks: List[int]) -> None: transcript = [] for u in turns: history.append({"role": "user", "content": u}) - res = _gen_turn(_encode_chat(history), - on_commit=_make_stream_cb(to_stdout=False)) + if args.chat_stream_stdout: + sys.stdout.write(f"\ngemma-4 [{u[:24]}]> ") + sys.stdout.flush() + res = _gen_turn(_encode_chat(history), on_commit=_make_stream_cb( + to_stdout=args.chat_stream_stdout)) + if args.chat_stream_stdout: + sys.stdout.write("\n") + sys.stdout.flush() history.append({"role": "assistant", "content": res["text"]}) tps = (res["decode_tokens"] / res["decode_s"] if res["decode_s"] > 0 else 0.0)