From 27c51c0da68e3867d220635eb0ed00b016d9c845 Mon Sep 17 00:00:00 2001
From: gongchensu <zhuyue_134@qq.com>
Date: Wed, 3 Jun 2026 07:57:32 +0000
Subject: [PATCH 1/4] Add operator report tooling

---
 scripts/compare_op_reports.py              | 321 ++++++++++++++++
 scripts/render_operator_coverage_tables.py | 391 +++++++++++++++++++
 tests/conftest.py                          |  11 +
 tests/op_report.py                         | 412 +++++++++++++++++++++
 4 files changed, 1135 insertions(+)
 create mode 100644 scripts/compare_op_reports.py
 create mode 100644 scripts/render_operator_coverage_tables.py
 create mode 100644 tests/op_report.py
diff --git a/scripts/compare_op_reports.py b/scripts/compare_op_reports.py
new file mode 100644
index 000000000..231a07f33
--- /dev/null
+++ b/scripts/compare_op_reports.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import argparse
+import json
+import pathlib
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Compare two InfiniOps pytest operator reports."
+    )
+    parser.add_argument("left", type=pathlib.Path, help="First report JSON path")
+    parser.add_argument("right", type=pathlib.Path, help="Second report JSON path")
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=40,
+        help="Max rows to print per diff section (default: 40)",
+    )
+    parser.add_argument(
+        "--output",
+        type=pathlib.Path,
+        default=None,
+        help="Optional JSON path for writing the full diff report.",
+    )
+    args = parser.parse_args()
+
+    diff_report = _build_diff_report(args.left, args.right)
+    rendered = _render_report(diff_report, args.limit)
+    print(rendered)
+
+    if args.output is not None:
+        args.output.parent.mkdir(parents=True, exist_ok=True)
+        args.output.write_text(
+            json.dumps(diff_report, indent=2, sort_keys=True) + "\n",
+            encoding="utf-8",
+        )
+        print(f"\nfull diff report written to {args.output}")
+
+
+def _run_header(label, path, summary):
+    env = summary.get("environment", {})
+    totals = summary.get("totals", {})
+    requested = ",".join(env.get("requested_devices") or ["<auto>"])
+
+    return (
+        f"{label}: {path}\n"
+        f"  requested_devices={requested}\n"
+        f"  torch={env.get('torch_version')}\n"
+        f"  collected={totals.get('collected')} "
+        f"passed={totals.get('passed')} "
+        f"skipped={totals.get('skipped')} "
+        f"failed={totals.get('failed')}"
+    )
+
+
+def _build_diff_report(left_path, right_path):
+    left_summary = _load_json(left_path)
+    right_summary = _load_json(right_path)
+    left_details, left_detail_path = _load_details(left_path)
+    right_details, right_detail_path = _load_details(right_path)
+
+    return {
+        "left": {
+            "summary_path": str(left_path),
+            "detail_path": str(left_detail_path),
+            "detail_exists": left_detail_path.exists(),
+            "summary": left_summary,
+        },
+        "right": {
+            "summary_path": str(right_path),
+            "detail_path": str(right_detail_path),
+            "detail_exists": right_detail_path.exists(),
+            "summary": right_summary,
+        },
+        "operator_diff": _build_operator_summary_diff(left_summary, right_summary),
+        "case_diff": _build_case_diff(left_details, right_details),
+    }
+
+
+def _build_operator_summary_diff(left_summary, right_summary):
+    left_ops = {_operator_key(row): row for row in left_summary.get("operators", [])}
+    right_ops = {_operator_key(row): row for row in right_summary.get("operators", [])}
+
+    left_only = sorted(set(left_ops) - set(right_ops))
+    right_only = sorted(set(right_ops) - set(left_ops))
+    changed = [
+        key
+        for key in sorted(set(left_ops) & set(right_ops))
+        if _operator_payload(left_ops[key]) != _operator_payload(right_ops[key])
+    ]
+
+    return {
+        "only_left_count": len(left_only),
+        "only_right_count": len(right_only),
+        "changed_count": len(changed),
+        "only_left": [{"key": key, "row": left_ops[key]} for key in left_only],
+        "only_right": [{"key": key, "row": right_ops[key]} for key in right_only],
+        "changed": [
+            {
+                "key": key,
+                "left": left_ops[key],
+                "right": right_ops[key],
+                "left_skip_reasons": {
+                    entry["reason"]: entry["count"]
+                    for entry in left_ops[key]["skip_reasons"]
+                },
+                "right_skip_reasons": {
+                    entry["reason"]: entry["count"]
+                    for entry in right_ops[key]["skip_reasons"]
+                },
+            }
+            for key in changed
+        ],
+    }
+
+
+def _build_case_diff(left_details, right_details):
+    left_cases = {
+        _case_key(record): record for record in left_details if record.get("operator")
+    }
+    right_cases = {
+        _case_key(record): record for record in right_details if record.get("operator")
+    }
+
+    left_only = sorted(set(left_cases) - set(right_cases))
+    right_only = sorted(set(right_cases) - set(left_cases))
+    changed = [
+        key
+        for key in sorted(set(left_cases) & set(right_cases))
+        if _case_payload(left_cases[key]) != _case_payload(right_cases[key])
+    ]
+
+    return {
+        "only_left_count": len(left_only),
+        "only_right_count": len(right_only),
+        "changed_count": len(changed),
+        "only_left": [{"key": key, "record": left_cases[key]} for key in left_only],
+        "only_right": [{"key": key, "record": right_cases[key]} for key in right_only],
+        "changed": [
+            {
+                "key": key,
+                "left": left_cases[key],
+                "right": right_cases[key],
+            }
+            for key in changed
+        ],
+    }
+
+
+def _render_report(diff_report, limit):
+    lines = []
+    left = diff_report["left"]
+    right = diff_report["right"]
+
+    lines.append(_run_header("left", left["summary_path"], left["summary"]))
+    lines.append(_run_header("right", right["summary_path"], right["summary"]))
+    lines.append("")
+
+    if not left["detail_exists"] or not right["detail_exists"]:
+        missing = []
+
+        if not left["detail_exists"]:
+            missing.append(left["detail_path"])
+
+        if not right["detail_exists"]:
+            missing.append(right["detail_path"])
+
+        lines.append("Warning")
+        lines.append("  Missing detail file(s): " + ", ".join(missing))
+        lines.append("  Case Diff needs both sibling `.details.jsonl` files.")
+        lines.append("")
+
+    lines.extend(_render_operator_summary_diff(diff_report["operator_diff"], limit))
+    lines.append("")
+    lines.extend(_render_case_diff(diff_report["case_diff"], limit))
+
+    return "\n".join(lines)
+
+
+def _render_operator_summary_diff(operator_diff, limit):
+    lines = []
+
+    lines.append("Operator Diff")
+    lines.append(
+        "  "
+        f"only_left={operator_diff['only_left_count']} "
+        f"only_right={operator_diff['only_right_count']} "
+        f"changed={operator_diff['changed_count']}"
+    )
+
+    if operator_diff["only_left"]:
+        lines.append("  only in left:")
+
+        for entry in operator_diff["only_left"][:limit]:
+            lines.append(f"    {entry['key']}")
+
+    if operator_diff["only_right"]:
+        lines.append("  only in right:")
+
+        for entry in operator_diff["only_right"][:limit]:
+            lines.append(f"    {entry['key']}")
+
+    if operator_diff["changed"]:
+        lines.append("  changed outcomes:")
+
+        for entry in operator_diff["changed"][:limit]:
+            lines.append(
+                "    "
+                f"{entry['key']}: "
+                f"left={entry['left']['outcomes']} "
+                f"right={entry['right']['outcomes']}"
+            )
+
+            if entry["left_skip_reasons"] != entry["right_skip_reasons"]:
+                lines.append(f"      left_skip_reasons={entry['left_skip_reasons']}")
+                lines.append(f"      right_skip_reasons={entry['right_skip_reasons']}")
+
+    return lines
+
+
+def _render_case_diff(case_diff, limit):
+    lines = []
+
+    lines.append("Case Diff")
+    lines.append(
+        "  "
+        f"only_left={case_diff['only_left_count']} "
+        f"only_right={case_diff['only_right_count']} "
+        f"changed={case_diff['changed_count']}"
+    )
+
+    if case_diff["only_left"]:
+        lines.append("  cases only in left:")
+
+        for entry in case_diff["only_left"][:limit]:
+            lines.append(f"    {entry['key']}")
+
+    if case_diff["only_right"]:
+        lines.append("  cases only in right:")
+
+        for entry in case_diff["only_right"][:limit]:
+            lines.append(f"    {entry['key']}")
+
+    if case_diff["changed"]:
+        lines.append("  same case, different result:")
+
+        for entry in case_diff["changed"][:limit]:
+            lines.append(
+                "    "
+                f"{entry['key']}: "
+                f"left={entry['left']['outcome']} "
+                f"right={entry['right']['outcome']}"
+            )
+
+            if entry["left"].get("reason") != entry["right"].get("reason"):
+                lines.append(f"      left_reason={entry['left'].get('reason')}")
+                lines.append(f"      right_reason={entry['right'].get('reason')}")
+
+    return lines
+
+
+def _operator_key(row):
+    return f"{row['module']}::{row['operator']}::{row.get('aten_name')}"
+
+
+def _operator_payload(row):
+    return {
+        "cases": row["cases"],
+        "outcomes": row["outcomes"],
+        "skip_reasons": row["skip_reasons"],
+        "implementation_indices": row["implementation_indices"],
+        "dtypes": row["dtypes"],
+    }
+
+
+def _case_key(record):
+    params = {
+        key: value
+        for key, value in sorted(record.get("params", {}).items())
+        if key not in {"device", "rtol", "atol"}
+    }
+    key = {
+        "module": record.get("module"),
+        "operator": record.get("operator"),
+        "aten_name": record.get("aten_name"),
+        "implementation_index": record.get("implementation_index"),
+        "params": params,
+    }
+
+    return json.dumps(key, sort_keys=True, ensure_ascii=True)
+
+
+def _case_payload(record):
+    return {"outcome": record.get("outcome"), "reason": record.get("reason")}
+
+
+def _load_json(path):
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def _load_details(summary_path):
+    detail_path = summary_path.with_name(f"{summary_path.stem}.details.jsonl")
+
+    if not detail_path.exists():
+        return [], detail_path
+
+    return (
+        [
+            json.loads(line)
+            for line in detail_path.read_text(encoding="utf-8").splitlines()
+            if line
+        ],
+        detail_path,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/render_operator_coverage_tables.py b/scripts/render_operator_coverage_tables.py
new file mode 100644
index 000000000..e25a1ce6f
--- /dev/null
+++ b/scripts/render_operator_coverage_tables.py
@@ -0,0 +1,391 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import argparse
+import json
+import pathlib
+from collections import Counter
+
+_REPO_ROOT = pathlib.Path(__file__).resolve().parents[1]
+_BASE_DIR = _REPO_ROOT / "src" / "base"
+_TORCH_OPS_YAML = _REPO_ROOT / "scripts" / "torch_ops.yaml"
+
+_DISPLAY_NAMES = {
+    "ascend": "Ascend",
+    "cambricon": "Cambricon",
+    "cpu": "CPU",
+    "hygon": "Hygon",
+    "iluvatar": "Iluvatar",
+    "metax": "MetaX",
+    "moore": "Moore",
+    "nvidia": "Nvidia",
+}
+
+_STATUS_ORDER = {
+    "FAILED": 0,
+    "PASSED_WITH_SKIPS": 1,
+    "PASSED": 2,
+    "SKIPPED_ONLY": 3,
+    "NO_PYTEST_RECORD": 4,
+}
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=(
+            "Render Markdown coverage tables from pytest operator reports "
+            "and the source operator inventory."
+        )
+    )
+    parser.add_argument(
+        "inputs",
+        nargs="+",
+        type=pathlib.Path,
+        help="Report JSON path(s). Supports single-platform reports and diff reports.",
+    )
+    parser.add_argument(
+        "--output",
+        type=pathlib.Path,
+        default=None,
+        help="Optional Markdown output path. Defaults to stdout only.",
+    )
+    args = parser.parse_args()
+
+    inventory = _load_source_inventory()
+    platforms = _load_platform_reports(args.inputs)
+    markdown = _render_markdown(platforms, inventory)
+    print(markdown)
+
+    if args.output is not None:
+        args.output.parent.mkdir(parents=True, exist_ok=True)
+        args.output.write_text(markdown + "\n", encoding="utf-8")
+        print(f"\ncoverage tables written to {args.output}")
+
+
+def _load_source_inventory():
+    inventory = {}
+
+    for path in sorted(_BASE_DIR.glob("*.h")):
+        inventory[path.stem] = {
+            "operator": path.stem,
+            "category": "native",
+        }
+
+    for line in _TORCH_OPS_YAML.read_text(encoding="utf-8").splitlines():
+        stripped = line.strip()
+
+        if not stripped.startswith("- "):
+            continue
+
+        operator = _public_op_name(stripped[2:])
+        inventory.setdefault(
+            operator,
+            {
+                "operator": operator,
+                "category": "torch-generated",
+            },
+        )
+
+    return [inventory[name] for name in sorted(inventory)]
+
+
+def _public_op_name(aten_name):
+    public_name = aten_name
+
+    if public_name.startswith("_"):
+        public_name = f"aten{public_name}"
+
+    if public_name.endswith("_") and not public_name.endswith("__"):
+        public_name = public_name[:-1] + "_inplace"
+
+    return public_name
+
+
+def _load_platform_reports(paths):
+    platforms = []
+    seen_labels = Counter()
+
+    for path in paths:
+        payload = json.loads(path.read_text(encoding="utf-8"))
+
+        for summary in _extract_summaries(payload):
+            label = _platform_label(summary)
+            seen_labels[label] += 1
+
+            if seen_labels[label] > 1:
+                label = f"{label}-{seen_labels[label]}"
+
+            platforms.append(
+                {
+                    "label": label,
+                    "summary": summary,
+                }
+            )
+
+    return platforms
+
+
+def _extract_summaries(payload):
+    if {"left", "right", "operator_diff", "case_diff"} <= set(payload):
+        return [payload["left"]["summary"], payload["right"]["summary"]]
+
+    return [payload]
+
+
+def _platform_label(summary):
+    requested = summary.get("environment", {}).get("requested_devices") or []
+
+    if requested:
+        key = requested[0]
+    else:
+        output_path = summary.get("output_path", "")
+        key = pathlib.Path(output_path).stem or "report"
+
+    return _DISPLAY_NAMES.get(key, key)
+
+
+def _render_markdown(platforms, inventory):
+    lines = []
+    platform_views = [
+        _build_platform_view(platform, inventory) for platform in platforms
+    ]
+
+    lines.append("# Pytest Operator Coverage")
+    lines.append("")
+    lines.append(
+        f"Source inventory: {len(inventory)} operators "
+        f"({sum(1 for item in inventory if item['category'] == 'native')} native, "
+        f"{sum(1 for item in inventory if item['category'] == 'torch-generated')} torch-generated)"
+    )
+    lines.append("")
+    lines.extend(_render_platform_summary(platform_views))
+    lines.append("")
+    lines.extend(_render_category_summary(platform_views))
+    lines.append("")
+    lines.extend(_render_cross_platform_matrix(platform_views))
+
+    for view in platform_views:
+        lines.append("")
+        lines.extend(_render_platform_missing(view))
+        lines.append("")
+        lines.extend(_render_platform_detail(view))
+
+    return "\n".join(lines).rstrip()
+
+
+def _build_platform_view(platform, inventory):
+    summary = platform["summary"]
+    summary_rows = {row["operator"]: row for row in summary.get("operators", [])}
+    detail_rows = []
+
+    for item in inventory:
+        summary_row = summary_rows.get(item["operator"])
+        detail_rows.append(_build_detail_row(item, summary_row))
+
+    return {
+        "label": platform["label"],
+        "summary": summary,
+        "rows": detail_rows,
+    }
+
+
+def _build_detail_row(item, summary_row):
+    if summary_row is None:
+        return {
+            "operator": item["operator"],
+            "category": item["category"],
+            "status": "NO_PYTEST_RECORD",
+            "cases": 0,
+            "passed": 0,
+            "skipped": 0,
+            "failed": 0,
+            "module": "-",
+            "torch_device": "-",
+            "implementation_indices": [],
+            "dtypes": [],
+            "skip_reasons": [],
+        }
+
+    outcomes = summary_row["outcomes"]
+    passed = outcomes.get("passed", 0)
+    skipped = outcomes.get("skipped", 0)
+    failed = outcomes.get("failed", 0)
+
+    if failed > 0:
+        status = "FAILED"
+    elif passed > 0 and skipped > 0:
+        status = "PASSED_WITH_SKIPS"
+    elif passed > 0:
+        status = "PASSED"
+    elif skipped > 0:
+        status = "SKIPPED_ONLY"
+    else:
+        status = "NO_PYTEST_RECORD"
+
+    return {
+        "operator": item["operator"],
+        "category": item["category"],
+        "status": status,
+        "cases": summary_row.get("cases", 0),
+        "passed": passed,
+        "skipped": skipped,
+        "failed": failed,
+        "module": summary_row.get("module", "-"),
+        "torch_device": summary_row.get("torch_device", "-"),
+        "implementation_indices": summary_row.get("implementation_indices", []),
+        "dtypes": summary_row.get("dtypes", []),
+        "skip_reasons": summary_row.get("skip_reasons", []),
+    }
+
+
+def _render_platform_summary(platform_views):
+    lines = []
+
+    lines.append("## Platform Summary")
+    lines.append("")
+    lines.append(
+        "| Platform | Total Ops | Tested Ops | Pass>0 Ops | Skip-only Ops | Failed Ops | No Pytest Record |"
+    )
+    lines.append("| --- | ---: | ---: | ---: | ---: | ---: | ---: |")
+
+    for view in platform_views:
+        rows = view["rows"]
+        lines.append(
+            "| "
+            f"{view['label']} | "
+            f"{len(rows)} | "
+            f"{sum(row['status'] != 'NO_PYTEST_RECORD' for row in rows)} | "
+            f"{sum(row['passed'] > 0 for row in rows)} | "
+            f"{sum(row['status'] == 'SKIPPED_ONLY' for row in rows)} | "
+            f"{sum(row['failed'] > 0 for row in rows)} | "
+            f"{sum(row['status'] == 'NO_PYTEST_RECORD' for row in rows)} |"
+        )
+
+    return lines
+
+
+def _render_category_summary(platform_views):
+    lines = []
+
+    lines.append("## Category Summary")
+    lines.append("")
+    lines.append(
+        "| Platform | Native Total | Native Tested | Native Pass>0 | Native Skip-only | Native No Record | Generated Total | Generated Tested | Generated Pass>0 | Generated Skip-only | Generated No Record |"
+    )
+    lines.append(
+        "| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |"
+    )
+
+    for view in platform_views:
+        native_rows = [row for row in view["rows"] if row["category"] == "native"]
+        generated_rows = [
+            row for row in view["rows"] if row["category"] == "torch-generated"
+        ]
+        lines.append(
+            "| "
+            f"{view['label']} | "
+            f"{len(native_rows)} | "
+            f"{sum(row['status'] != 'NO_PYTEST_RECORD' for row in native_rows)} | "
+            f"{sum(row['passed'] > 0 for row in native_rows)} | "
+            f"{sum(row['status'] == 'SKIPPED_ONLY' for row in native_rows)} | "
+            f"{sum(row['status'] == 'NO_PYTEST_RECORD' for row in native_rows)} | "
+            f"{len(generated_rows)} | "
+            f"{sum(row['status'] != 'NO_PYTEST_RECORD' for row in generated_rows)} | "
+            f"{sum(row['passed'] > 0 for row in generated_rows)} | "
+            f"{sum(row['status'] == 'SKIPPED_ONLY' for row in generated_rows)} | "
+            f"{sum(row['status'] == 'NO_PYTEST_RECORD' for row in generated_rows)} |"
+        )
+
+    return lines
+
+
+def _render_cross_platform_matrix(platform_views):
+    lines = []
+    labels = [view["label"] for view in platform_views]
+    row_maps = {
+        view["label"]: {row["operator"]: row for row in view["rows"]}
+        for view in platform_views
+    }
+    operators = [row["operator"] for row in platform_views[0]["rows"]]
+    categories = {row["operator"]: row["category"] for row in platform_views[0]["rows"]}
+
+    lines.append("## Cross-Platform Matrix")
+    lines.append("")
+    lines.append("| Operator | Category | " + " | ".join(labels) + " |")
+    lines.append("| --- | --- | " + " | ".join("---" for _ in labels) + " |")
+
+    for operator in operators:
+        cells = [_matrix_cell(row_maps[label][operator]) for label in labels]
+        lines.append(
+            f"| {operator} | {categories[operator]} | " + " | ".join(cells) + " |"
+        )
+
+    return lines
+
+
+def _matrix_cell(row):
+    if row["status"] == "NO_PYTEST_RECORD":
+        return "NO_PYTEST_RECORD"
+
+    return f"{row['status']} (P={row['passed']}, S={row['skipped']}, F={row['failed']})"
+
+
+def _render_platform_missing(view):
+    missing = [
+        row["operator"] for row in view["rows"] if row["status"] == "NO_PYTEST_RECORD"
+    ]
+
+    lines = []
+    lines.append(f"## {view['label']} Missing From Pytest")
+    lines.append("")
+    lines.append(f"- Count: {len(missing)}")
+    lines.append(f"- Operators: {', '.join(missing) if missing else '-'}")
+
+    return lines
+
+
+def _render_platform_detail(view):
+    lines = []
+
+    lines.append(f"## {view['label']} Detailed Coverage")
+    lines.append("")
+    lines.append(
+        "| Operator | Category | Status | Cases | Passed | Skipped | Failed | Test Module | Device | Impl | Top Skip Reason |"
+    )
+    lines.append(
+        "| --- | --- | --- | ---: | ---: | ---: | ---: | --- | --- | --- | --- |"
+    )
+
+    sorted_rows = sorted(
+        view["rows"],
+        key=lambda row: (
+            row["category"] != "native",
+            _STATUS_ORDER[row["status"]],
+            row["operator"],
+        ),
+    )
+
+    for row in sorted_rows:
+        skip_reason = row["skip_reasons"][0]["reason"] if row["skip_reasons"] else "-"
+        impls = ",".join(str(value) for value in row["implementation_indices"]) or "-"
+        lines.append(
+            "| "
+            f"{row['operator']} | "
+            f"{row['category']} | "
+            f"{row['status']} | "
+            f"{row['cases']} | "
+            f"{row['passed']} | "
+            f"{row['skipped']} | "
+            f"{row['failed']} | "
+            f"{row['module']} | "
+            f"{row['torch_device']} | "
+            f"{impls} | "
+            f"{skip_reason.replace('|', '/')} |"
+        )
+
+    return lines
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/conftest.py b/tests/conftest.py
index 540c66602..93b05bcaa 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,6 +5,7 @@
 import torch
 import torch.utils.benchmark as benchmark
 
+from tests.op_report import register_operator_reporter
 from tests.utils import clone_strided, get_available_devices
 
 
@@ -18,6 +19,15 @@ def pytest_addoption(parser):
         default=None,
         help="Device(s) to test on (e.g., `--devices ascend cpu`). Accepts platform names (`nvidia`, `metax`, `iluvatar`, `hygon`, `moore`, `cambricon`, `ascend`) or PyTorch device types (`cuda`, `mlu`, `musa`, `npu`). Defaults to all available devices.",
     )
+    parser.addoption(
+        "--op-report",
+        action="store",
+        default=None,
+        help=(
+            "Write a structured operator coverage report to the given JSON path. "
+            "Also emits sibling `.details.jsonl` and `.summary.txt` files."
+        ),
+    )
 
 
 def pytest_configure(config):
@@ -27,6 +37,7 @@ def pytest_configure(config):
         "markers",
         "auto_act_and_assert: automatically perform Act and Assert phases using the return values",
     )
+    register_operator_reporter(config)
 
 
 def pytest_collectstart(collector):
diff --git a/tests/op_report.py b/tests/op_report.py
new file mode 100644
index 000000000..5072bb900
--- /dev/null
+++ b/tests/op_report.py
@@ -0,0 +1,412 @@
+from __future__ import annotations
+
+import json
+import pathlib
+import platform
+import sys
+import uuid
+from collections import Counter, defaultdict
+
+import pytest
+import torch
+
+from tests.utils import get_available_devices
+
+_REPORT_FORMAT_VERSION = 1
+_DETAIL_SUFFIX = ".details.jsonl"
+_TEXT_SUFFIX = ".summary.txt"
+_TORCH_OPS_SLOT = 8
+_NON_OPERATOR_MODULES = frozenset({"generate_torch_ops"})
+
+
+def register_operator_reporter(config):
+    report_arg = config.getoption("--op-report")
+
+    if not report_arg:
+        return
+
+    reporter = _OperatorReportPlugin(config, pathlib.Path(report_arg))
+    config._infini_operator_reporter = reporter
+    config.pluginmanager.register(reporter, "infini-operator-report")
+
+
+class _OperatorReportPlugin:
+    def __init__(self, config, output_path):
+        self._config = config
+        self._output_path = output_path.expanduser()
+        self._records_by_nodeid = {}
+        self._tests_collected = 0
+        self._worker_input = getattr(config, "workerinput", None)
+        self._worker_id = (
+            self._worker_input.get("workerid") if self._worker_input else None
+        )
+        self._run_id = (
+            self._worker_input.get("op_report_run_id")
+            if self._worker_input
+            else uuid.uuid4().hex[:8]
+        )
+
+    def pytest_configure_node(self, node):
+        node.workerinput["op_report_run_id"] = self._run_id
+
+    def pytest_collection_finish(self, session):
+        self._tests_collected = session.testscollected
+
+    @pytest.hookimpl(hookwrapper=True)
+    def pytest_runtest_makereport(self, item, call):
+        outcome = yield
+        report = outcome.get_result()
+        record = self._record_from_item(item, report)
+
+        if record is not None:
+            self._records_by_nodeid[record["nodeid"]] = record
+
+    def pytest_sessionfinish(self, session, exitstatus):
+        self._tests_collected = session.testscollected or self._tests_collected
+
+        if self._worker_id is not None:
+            self._write_detail_records(
+                self._worker_detail_path(),
+                self._sorted_records(self._records_by_nodeid.values()),
+            )
+
+            return
+
+        records = self._sorted_records(self._records_by_nodeid.values())
+
+        if self._xdist_enabled():
+            worker_records = self._load_worker_records()
+
+            if worker_records:
+                records = worker_records
+
+        self._write_final_reports(records, exitstatus)
+
+    def _record_from_item(self, item, report):
+        if report.when == "teardown" and not report.failed:
+            return None
+
+        if report.when == "setup" and report.passed:
+            return None
+
+        if report.when == "teardown" and report.failed:
+            outcome = "failed"
+        else:
+            outcome = report.outcome
+
+        context = _item_context(item)
+        context["outcome"] = outcome
+        context["stage"] = report.when
+
+        reason = _report_reason(report)
+        if reason:
+            context["reason"] = reason
+
+        return context
+
+    def _xdist_enabled(self):
+        numprocesses = getattr(self._config.option, "numprocesses", 0) or 0
+
+        return numprocesses > 0
+
+    def _detail_path(self):
+        return self._output_path.with_name(f"{self._output_path.stem}{_DETAIL_SUFFIX}")
+
+    def _text_path(self):
+        return self._output_path.with_name(f"{self._output_path.stem}{_TEXT_SUFFIX}")
+
+    def _worker_detail_path(self):
+        return self._output_path.with_name(
+            f"{self._output_path.stem}.{self._run_id}.{self._worker_id}{_DETAIL_SUFFIX}"
+        )
+
+    def _load_worker_records(self):
+        pattern = f"{self._output_path.stem}.{self._run_id}.*{_DETAIL_SUFFIX}"
+        records = []
+
+        for path in sorted(self._output_path.parent.glob(pattern)):
+            records.extend(_read_detail_records(path))
+
+        return self._sorted_records(records)
+
+    def _write_final_reports(self, records, exitstatus):
+        self._output_path.parent.mkdir(parents=True, exist_ok=True)
+        summary = _build_summary(
+            config=self._config,
+            output_path=self._output_path,
+            run_id=self._run_id,
+            tests_collected=self._tests_collected,
+            exitstatus=exitstatus,
+            records=records,
+        )
+        self._output_path.write_text(
+            json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8"
+        )
+        self._write_detail_records(self._detail_path(), records)
+        self._text_path().write_text(_render_text_summary(summary), encoding="utf-8")
+
+        terminal = self._config.pluginmanager.get_plugin("terminalreporter")
+        if terminal is not None:
+            terminal.write_line(
+                "operator report written to "
+                f"{self._output_path} "
+                f"(details: {self._detail_path()}, text: {self._text_path()})"
+            )
+
+    def _write_detail_records(self, path, records):
+        path.parent.mkdir(parents=True, exist_ok=True)
+        lines = [json.dumps(record, sort_keys=True) for record in records]
+        payload = "\n".join(lines)
+
+        if payload:
+            payload += "\n"
+
+        path.write_text(payload, encoding="utf-8")
+
+    @staticmethod
+    def _sorted_records(records):
+        return sorted(records, key=lambda r: (r["nodeid"], r["stage"], r["outcome"]))
+
+
+def _build_summary(config, output_path, run_id, tests_collected, exitstatus, records):
+    outcome_counts = Counter(record["outcome"] for record in records)
+    operator_rows = _build_operator_rows(records)
+    collected = tests_collected or len(records)
+
+    return {
+        "format_version": _REPORT_FORMAT_VERSION,
+        "run_id": run_id,
+        "output_path": str(output_path),
+        "invocation": {
+            "args": list(config.invocation_params.args),
+            "cwd": str(config.invocation_params.dir),
+        },
+        "environment": {
+            "python": sys.version.split()[0],
+            "platform": platform.platform(),
+            "torch_version": torch.__version__,
+            "available_devices": list(get_available_devices()),
+            "requested_devices": list(config.getoption("--devices") or []),
+        },
+        "totals": {
+            "collected": collected,
+            "reported": len(records),
+            "passed": outcome_counts.get("passed", 0),
+            "skipped": outcome_counts.get("skipped", 0),
+            "failed": outcome_counts.get("failed", 0),
+            "exitstatus": exitstatus,
+        },
+        "operator_count": len(operator_rows),
+        "operators": operator_rows,
+    }
+
+
+def _build_operator_rows(records):
+    grouped = defaultdict(list)
+
+    for record in records:
+        operator = record.get("operator")
+
+        if not operator:
+            continue
+
+        key = (
+            operator,
+            record.get("aten_name"),
+            record.get("module"),
+            record.get("torch_device"),
+        )
+        grouped[key].append(record)
+
+    rows = []
+
+    for key, group in sorted(grouped.items()):
+        outcome_counts = Counter(record["outcome"] for record in group)
+        skip_reasons = Counter(
+            record["reason"] for record in group if record["outcome"] == "skipped"
+        )
+        dtypes = sorted(
+            {record.get("dtype") for record in group if record.get("dtype") is not None}
+        )
+        implementations = sorted(
+            {
+                record.get("implementation_index")
+                for record in group
+                if record.get("implementation_index") is not None
+            }
+        )
+        rows.append(
+            {
+                "operator": key[0],
+                "aten_name": key[1],
+                "module": key[2],
+                "torch_device": key[3],
+                "cases": len(group),
+                "outcomes": {
+                    "passed": outcome_counts.get("passed", 0),
+                    "skipped": outcome_counts.get("skipped", 0),
+                    "failed": outcome_counts.get("failed", 0),
+                },
+                "dtypes": dtypes,
+                "implementation_indices": implementations,
+                "skip_reasons": [
+                    {"reason": reason, "count": count}
+                    for reason, count in sorted(
+                        skip_reasons.items(), key=lambda item: (-item[1], item[0])
+                    )
+                ],
+            }
+        )
+
+    return rows
+
+
+def _render_text_summary(summary):
+    lines = []
+    totals = summary["totals"]
+
+    lines.append(f"run_id: {summary['run_id']}")
+    lines.append(f"report: {summary['output_path']}")
+    lines.append(
+        "requested_devices: "
+        + ", ".join(summary["environment"]["requested_devices"] or ["<auto>"])
+    )
+    lines.append(
+        "available_devices: "
+        + ", ".join(summary["environment"]["available_devices"] or ["<none>"])
+    )
+    lines.append(
+        "totals: "
+        f"collected={totals['collected']} "
+        f"reported={totals['reported']} "
+        f"passed={totals['passed']} "
+        f"skipped={totals['skipped']} "
+        f"failed={totals['failed']} "
+        f"exitstatus={totals['exitstatus']}"
+    )
+    lines.append(f"operator_count: {summary['operator_count']}")
+    lines.append("")
+    lines.append("operators:")
+
+    for row in summary["operators"]:
+        impls = ",".join(str(i) for i in row["implementation_indices"]) or "-"
+        dtypes = ",".join(row["dtypes"]) or "-"
+        lines.append(
+            f"{row['operator']} [{row['torch_device']}] "
+            f"cases={row['cases']} "
+            f"passed={row['outcomes']['passed']} "
+            f"skipped={row['outcomes']['skipped']} "
+            f"failed={row['outcomes']['failed']} "
+            f"impls={impls} "
+            f"dtypes={dtypes}"
+        )
+
+        for entry in row["skip_reasons"]:
+            lines.append(f"  skip x{entry['count']}: {entry['reason']}")
+
+    lines.append("")
+
+    return "\n".join(lines)
+
+
+def _item_context(item):
+    params = getattr(getattr(item, "callspec", None), "params", {})
+    op_meta = params.get("op_meta") if isinstance(params, dict) else None
+    module_name = item.module.__name__.rsplit(".", 1)[-1]
+    module_stem = (
+        module_name.removeprefix("test_")
+        if module_name.startswith("test_")
+        else module_name
+    )
+
+    operator = None
+    aten_name = None
+    overload_name = None
+
+    if isinstance(op_meta, dict):
+        operator = op_meta.get("name")
+        aten_name = op_meta.get("aten_name", operator)
+        overload_name = op_meta.get("overload_name")
+    elif module_name.startswith("test_") and module_stem not in _NON_OPERATOR_MODULES:
+        operator = module_stem
+        aten_name = module_stem
+
+    implementation_index = params.get("implementation_index")
+
+    if implementation_index is None and module_name == "test_torch_ops":
+        implementation_index = _TORCH_OPS_SLOT
+
+    normalized_params = {}
+
+    for key, value in params.items():
+        if key == "op_meta":
+            continue
+
+        normalized_params[key] = _normalize_value(value)
+
+    return {
+        "nodeid": item.nodeid,
+        "module": item.location[0],
+        "test_name": item.originalname or item.name,
+        "operator": operator,
+        "aten_name": aten_name,
+        "overload_name": overload_name,
+        "torch_device": params.get("device"),
+        "dtype": _normalize_value(params.get("dtype")),
+        "implementation_index": implementation_index,
+        "params": normalized_params,
+    }
+
+
+def _normalize_value(value):
+    if isinstance(value, (str, int, float, bool)) or value is None:
+        return value
+
+    if isinstance(value, torch.dtype):
+        return str(value)
+
+    if isinstance(value, torch.device):
+        return str(value)
+
+    if isinstance(value, pathlib.Path):
+        return str(value)
+
+    if isinstance(value, tuple):
+        return [_normalize_value(v) for v in value]
+
+    if isinstance(value, list):
+        return [_normalize_value(v) for v in value]
+
+    if isinstance(value, dict):
+        return {str(k): _normalize_value(v) for k, v in value.items()}
+
+    if isinstance(value, (set, frozenset)):
+        return sorted(_normalize_value(v) for v in value)
+
+    return repr(value)
+
+
+def _report_reason(report):
+    if report.passed:
+        return None
+
+    longrepr = report.longrepr
+
+    if report.skipped and isinstance(longrepr, tuple) and len(longrepr) == 3:
+        return str(longrepr[2]).strip()
+
+    text = getattr(report, "longreprtext", "") or str(longrepr)
+    lines = [line.strip() for line in text.splitlines() if line.strip()]
+
+    return lines[-1] if lines else None
+
+
+def _read_detail_records(path):
+    records = []
+    text = path.read_text(encoding="utf-8")
+
+    for line in text.splitlines():
+        if line:
+            records.append(json.loads(line))
+
+    return records

From 6fcb4a645479cf075ab96ea06055d879899c2a0c Mon Sep 17 00:00:00 2001
From: gongchensu <zhuyue_134@qq.com>
Date: Wed, 3 Jun 2026 07:57:32 +0000
Subject: [PATCH 2/4] Add local dev build/test workflows

---
 .gitignore                     |   2 +
 scripts/dev_build.sh           | 218 +++++++++++++++++++++++++++++++++
 scripts/dev_platforms.sh       | 138 +++++++++++++++++++++
 scripts/dev_test.sh            | 144 ++++++++++++++++++++++
 scripts/skip_stats.sh          |  44 +++++++
 scripts/summarize_op_report.py | 210 +++++++++++++++++++++++++++++++
 tests/test_dev_scripts.py      |  84 +++++++++++++
 7 files changed, 840 insertions(+)
 create mode 100755 scripts/dev_build.sh
 create mode 100644 scripts/dev_platforms.sh
 create mode 100755 scripts/dev_test.sh
 create mode 100755 scripts/skip_stats.sh
 create mode 100755 scripts/summarize_op_report.py
 create mode 100644 tests/test_dev_scripts.py

diff --git a/.gitignore b/.gitignore
index 2effaff2f..c6501fb2d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
 # Generated files
 build/
+build-*/
 generated/
+reports/
 
 # Prerequisites
 *.d
diff --git a/scripts/dev_build.sh b/scripts/dev_build.sh
new file mode 100755
index 000000000..90e72627a
--- /dev/null
+++ b/scripts/dev_build.sh
@@ -0,0 +1,218 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+repo_root="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+source "${repo_root}/scripts/dev_platforms.sh"
+
+usage() {
+    cat <<'EOF'
+Usage:
+  scripts/dev_build.sh [cpu|nvidia|iluvatar|hygon|metax|moore|cambricon|ascend|auto] [--jobs N]
+
+Examples:
+  scripts/dev_build.sh
+  scripts/dev_build.sh cambricon
+  scripts/dev_build.sh metax --jobs 8
+  scripts/dev_build.sh nvidia
+  scripts/dev_build.sh auto
+
+What it does:
+  1. Re-runs CMake configure in a persistent build directory.
+  2. Incrementally builds the Python extension target (`ops`).
+  3. Installs the result into `build-<platform>/install/infini/`.
+
+Why this is faster than `pip install .[dev] --no-build-isolation`:
+  - It reuses the same build directory, so unchanged objects are not rebuilt.
+  - It avoids wheel build/install work in a temp directory on every run.
+  - `[dev]` dependencies only need to be installed once.
+
+Important:
+  - This repo's wrapper/codegen runs during CMake configure, not only build.
+    So this script always runs `cmake -S -B ...` first, ensuring generator
+    changes (`generate_torch_ops.py`, `generate_wrappers.py`, YAML edits) are
+    picked up before the incremental build.
+EOF
+}
+
+platform="auto"
+jobs="${JOBS:-$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 4)}"
+torch_jobs="${INFINIOPS_TORCH_COMPILE_JOBS:-2}"
+binding_jobs="${INFINIOPS_BINDING_COMPILE_JOBS:-2}"
+
+while (($#)); do
+    if infiniops_is_supported_platform "$1"; then
+        platform="$1"
+        shift
+        continue
+    fi
+
+    case "$1" in
+        auto)
+            platform="$1"
+            shift
+            ;;
+        -j|--jobs)
+            jobs="$2"
+            shift 2
+            ;;
+        --torch-jobs)
+            torch_jobs="$2"
+            shift 2
+            ;;
+        --binding-jobs)
+            binding_jobs="$2"
+            shift 2
+            ;;
+        -h|--help)
+            usage
+            exit 0
+            ;;
+        *)
+            echo "Unknown argument: $1" >&2
+            usage >&2
+            exit 1
+            ;;
+    esac
+done
+
+python_bin="${PYTHON_BIN:-$(command -v python3)}"
+
+if [[ "$platform" == "auto" ]]; then
+    mapfile -t detected_platforms < <(infiniops_detect_platforms)
+
+    case "${#detected_platforms[@]}" in
+        0)
+            platform="cpu"
+            echo "[dev_build] auto-detect: no accelerator platform found, using cpu"
+            ;;
+        1)
+            platform="${detected_platforms[0]}"
+            echo "[dev_build] auto-detect: using ${platform}"
+            ;;
+        *)
+            echo "Auto-detected multiple accelerator platforms: ${detected_platforms[*]}." >&2
+            echo "Pass one explicitly: $(infiniops_supported_platforms_csv)." >&2
+            exit 1
+            ;;
+    esac
+fi
+
+with_cpu="ON"
+with_torch="ON"
+with_nvidia="OFF"
+with_iluvatar="OFF"
+with_hygon="OFF"
+with_cambricon="OFF"
+with_metax="OFF"
+with_moore="OFF"
+with_ascend="OFF"
+
+case "$platform" in
+    nvidia)
+        with_nvidia="ON"
+        ;;
+    iluvatar)
+        with_iluvatar="ON"
+        ;;
+    hygon)
+        with_hygon="ON"
+        ;;
+    cambricon)
+        with_cambricon="ON"
+        ;;
+    metax)
+        with_metax="ON"
+        ;;
+    moore)
+        with_moore="ON"
+        ;;
+    ascend)
+        with_ascend="ON"
+        ;;
+    cpu)
+        ;;
+    *)
+        echo "Unsupported platform: $platform" >&2
+        exit 1
+        ;;
+esac
+
+build_dir="${repo_root}/build-${platform}"
+install_root="${build_dir}/install"
+install_dir="${install_root}/infini"
+generator="${CMAKE_GENERATOR:-}"
+cached_generator=""
+
+if [[ -z "${generator}" && -f "${build_dir}/CMakeCache.txt" ]]; then
+    cached_generator="$(sed -n 's/^CMAKE_GENERATOR:INTERNAL=//p' "${build_dir}/CMakeCache.txt" | head -n 1)"
+    generator="${cached_generator}"
+fi
+
+if [[ "${generator}" == "Ninja" ]] && ! command -v ninja > /dev/null 2>&1; then
+    generator=""
+fi
+
+if [[ -z "${generator}" ]]; then
+    if command -v ninja > /dev/null 2>&1; then
+        generator="Ninja"
+    else
+        generator="Unix Makefiles"
+    fi
+fi
+
+if [[ -n "${cached_generator}" && "${cached_generator}" != "${generator}" ]]; then
+    echo "[dev_build] generator changed: ${cached_generator} -> ${generator}"
+    rm -f "${build_dir}/CMakeCache.txt"
+    rm -rf "${build_dir}/CMakeFiles"
+fi
+
+echo "[dev_build] repo      : ${repo_root}"
+echo "[dev_build] platform  : ${platform}"
+echo "[dev_build] python    : ${python_bin}"
+echo "[dev_build] build dir : ${build_dir}"
+echo "[dev_build] install   : ${install_root}"
+echo "[dev_build] package   : ${install_dir}"
+echo "[dev_build] generator : ${generator}"
+echo "[dev_build] jobs      : build=${jobs} torch=${torch_jobs} binding=${binding_jobs}"
+
+cmake -S "${repo_root}" -B "${build_dir}" -G "${generator}" \
+    -DPython_EXECUTABLE="${python_bin}" \
+    -DWITH_CPU="${with_cpu}" \
+    -DWITH_TORCH="${with_torch}" \
+    -DWITH_NVIDIA="${with_nvidia}" \
+    -DWITH_ILUVATAR="${with_iluvatar}" \
+    -DWITH_HYGON="${with_hygon}" \
+    -DWITH_CAMBRICON="${with_cambricon}" \
+    -DWITH_METAX="${with_metax}" \
+    -DWITH_MOORE="${with_moore}" \
+    -DWITH_ASCEND="${with_ascend}" \
+    -DAUTO_DETECT_DEVICES=OFF \
+    -DAUTO_DETECT_BACKENDS=OFF \
+    -DGENERATE_PYTHON_BINDINGS=ON \
+    -DINFINIOPS_TORCH_COMPILE_JOBS="${torch_jobs}" \
+    -DINFINIOPS_BINDING_COMPILE_JOBS="${binding_jobs}"
+
+cmake --build "${build_dir}" --target ops -j "${jobs}"
+mkdir -p "${install_root}" "${install_dir}"
+rm -f \
+    "${install_root}/__init__.py" \
+    "${install_root}/libinfiniops.so" \
+    "${install_root}/torch_ops_metadata.json" \
+    "${install_root}"/ops*.so
+mkdir -p "${install_dir}"
+cmake --install "${build_dir}" --prefix "${install_dir}"
+
+cat <<EOF
+
+[dev_build] done
+
+Use this build in pytest with:
+  PYTHONPATH="${install_root}:\$PYTHONPATH" pytest ...
+
+For example:
+  PYTHONPATH="${install_root}:\$PYTHONPATH" pytest --devices ${platform}
+
+Installed files:
+  ${install_dir}/ops*.so
+  ${install_dir}/torch_ops_metadata.json
+EOF
diff --git a/scripts/dev_platforms.sh b/scripts/dev_platforms.sh
new file mode 100644
index 000000000..c92a7a2ea
--- /dev/null
+++ b/scripts/dev_platforms.sh
@@ -0,0 +1,138 @@
+#!/usr/bin/env bash
+
+INFINIOPS_SUPPORTED_PLATFORMS=(
+    cpu
+    nvidia
+    iluvatar
+    hygon
+    metax
+    moore
+    cambricon
+    ascend
+)
+
+infiniops_supported_platforms_usage() {
+    local IFS='|'
+    printf '%s|auto' "${INFINIOPS_SUPPORTED_PLATFORMS[*]}"
+}
+
+infiniops_supported_platforms_csv() {
+    local IFS=', '
+    printf '%s' "${INFINIOPS_SUPPORTED_PLATFORMS[*]}"
+}
+
+infiniops_is_supported_platform() {
+    local candidate="${1:-}"
+    local platform
+
+    for platform in "${INFINIOPS_SUPPORTED_PLATFORMS[@]}"; do
+        if [[ "${platform}" == "${candidate}" ]]; then
+            return 0
+        fi
+    done
+
+    return 1
+}
+
+_infiniops_has_glob_match() {
+    compgen -G "$1" > /dev/null
+}
+
+_infiniops_find_hygon_cuda_root() {
+    local dtk_root="$1"
+    local candidate
+    local versioned_candidates=()
+
+    for candidate in \
+        "${dtk_root}/cuda" \
+        "${dtk_root}/cuda/cuda"; do
+        if [[ -x "${candidate}/bin/nvcc" ]]; then
+            printf '%s\n' "${candidate}"
+            return 0
+        fi
+    done
+
+    shopt -s nullglob
+    versioned_candidates=("${dtk_root}"/cuda/cuda-*)
+    shopt -u nullglob
+
+    for candidate in "${versioned_candidates[@]}"; do
+        if [[ -x "${candidate}/bin/nvcc" ]]; then
+            printf '%s\n' "${candidate}"
+            return 0
+        fi
+    done
+
+    return 1
+}
+
+_infiniops_detect_nvidia() {
+    _infiniops_has_glob_match "${INFINIOPS_NVIDIA_DEVICE_GLOB:-/dev/nvidia*}"
+}
+
+_infiniops_detect_iluvatar() {
+    _infiniops_has_glob_match "${INFINIOPS_ILUVATAR_DEVICE_GLOB:-/dev/iluvatar*}"
+}
+
+_infiniops_detect_hygon() {
+    local dtk_root="${DTK_ROOT:-${INFINIOPS_HYGON_DTK_ROOT:-/opt/dtk}}"
+
+    _infiniops_find_hygon_cuda_root "${dtk_root}" > /dev/null
+}
+
+_infiniops_detect_metax() {
+    if [[ -n "${MACA_PATH:-}" ]]; then
+        return 0
+    fi
+
+    grep -hqs 9999 ${INFINIOPS_METAX_PCI_VENDOR_GLOB:-/sys/bus/pci/devices/*/vendor} 2> /dev/null
+}
+
+_infiniops_detect_moore() {
+    [[ -n "${MUSA_ROOT:-}" || -n "${MUSA_HOME:-}" || -n "${MUSA_PATH:-}" ]]
+}
+
+_infiniops_detect_cambricon() {
+    [[ -n "${NEUWARE_HOME:-}" ]]
+}
+
+_infiniops_detect_ascend() {
+    [[ -n "${ASCEND_HOME_PATH:-}" ]] || \
+        _infiniops_has_glob_match "${INFINIOPS_ASCEND_DEVICE_GLOB:-/dev/davinci0}"
+}
+
+infiniops_detect_platforms() {
+    local detected=()
+
+    if _infiniops_detect_nvidia; then
+        detected+=("nvidia")
+    fi
+
+    if _infiniops_detect_iluvatar; then
+        detected+=("iluvatar")
+    fi
+
+    if _infiniops_detect_hygon; then
+        detected+=("hygon")
+    fi
+
+    if _infiniops_detect_metax; then
+        detected+=("metax")
+    fi
+
+    if _infiniops_detect_cambricon; then
+        detected+=("cambricon")
+    fi
+
+    if _infiniops_detect_moore; then
+        detected+=("moore")
+    fi
+
+    if _infiniops_detect_ascend; then
+        detected+=("ascend")
+    fi
+
+    if ((${#detected[@]} > 0)); then
+        printf '%s\n' "${detected[@]}"
+    fi
+}
diff --git a/scripts/dev_test.sh b/scripts/dev_test.sh
new file mode 100755
index 000000000..4fe20d4f5
--- /dev/null
+++ b/scripts/dev_test.sh
@@ -0,0 +1,144 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+repo_root="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+source "${repo_root}/scripts/dev_platforms.sh"
+
+usage() {
+    cat <<'EOF'
+Usage:
+  scripts/dev_test.sh [cpu|nvidia|iluvatar|hygon|metax|moore|cambricon|ascend|auto] [--report PATH] [--no-build] [-- pytest-args...]
+
+Examples:
+  scripts/dev_test.sh cambricon
+  scripts/dev_test.sh metax
+  scripts/dev_test.sh nvidia
+  scripts/dev_test.sh cambricon -- tests/test_torch_ops.py -k index
+
+Default behavior:
+  1. Run `scripts/dev_build.sh <platform>`
+  2. Run pytest with `--devices <platform> --op-report reports/<platform>.json`
+  3. Print case-level and operator-level skip stats
+
+Notes:
+  - `--no-build` skips the build step and reuses the last local install.
+  - Extra args after `--` are passed straight to pytest.
+EOF
+}
+
+platform="auto"
+report_path=""
+do_build=1
+pytest_args=()
+pytest_targets=()
+
+while (($#)); do
+    if infiniops_is_supported_platform "$1"; then
+        platform="$1"
+        shift
+        continue
+    fi
+
+    case "$1" in
+        auto)
+            platform="$1"
+            shift
+            ;;
+        --report)
+            report_path="$2"
+            shift 2
+            ;;
+        --no-build)
+            do_build=0
+            shift
+            ;;
+        --)
+            shift
+            pytest_args=("$@")
+            break
+            ;;
+        -h|--help)
+            usage
+            exit 0
+            ;;
+        *)
+            echo "Unknown argument: $1" >&2
+            usage >&2
+            exit 1
+            ;;
+    esac
+done
+
+if [[ "$platform" == "auto" ]]; then
+    mapfile -t detected_platforms < <(infiniops_detect_platforms)
+
+    case "${#detected_platforms[@]}" in
+        0)
+            platform="cpu"
+            echo "[dev_test] auto-detect: no accelerator platform found, using cpu"
+            ;;
+        1)
+            platform="${detected_platforms[0]}"
+            echo "[dev_test] auto-detect: using ${platform}"
+            ;;
+        *)
+            echo "Auto-detected multiple accelerator platforms: ${detected_platforms[*]}." >&2
+            echo "Pass one explicitly: $(infiniops_supported_platforms_csv)." >&2
+            exit 1
+            ;;
+    esac
+fi
+
+if [[ -z "$report_path" ]]; then
+    report_path="${repo_root}/reports/${platform}.json"
+fi
+
+install_dir="${repo_root}/build-${platform}/install"
+summary_script="${repo_root}/scripts/summarize_op_report.py"
+detail_path="${report_path%.json}.details.jsonl"
+text_path="${report_path%.json}.summary.txt"
+
+if [[ "$do_build" -eq 1 ]]; then
+    "${repo_root}/scripts/dev_build.sh" "$platform"
+fi
+
+if [[ ! -d "$install_dir" ]]; then
+    echo "Install dir not found: $install_dir" >&2
+    echo "Run scripts/dev_build.sh $platform first." >&2
+    exit 1
+fi
+
+mkdir -p "$(dirname "$report_path")"
+rm -f "$report_path" "$detail_path" "$text_path"
+
+for arg in "${pytest_args[@]}"; do
+    candidate="${arg%%::*}"
+
+    if [[ -e "$candidate" || -e "${repo_root}/$candidate" ]]; then
+        pytest_targets+=("$arg")
+    fi
+done
+
+if [[ ${#pytest_targets[@]} -eq 0 ]]; then
+    pytest_targets=("tests")
+fi
+
+echo "[dev_test] platform : ${platform}"
+echo "[dev_test] report   : ${report_path}"
+echo "[dev_test] install  : ${install_dir}"
+echo "[dev_test] targets  : ${pytest_targets[*]}"
+
+set +e
+PYTHONPATH="${install_dir}${PYTHONPATH:+:${PYTHONPATH}}" \
+    python3 -m pytest "${pytest_targets[@]}" --devices "${platform}" --op-report "${report_path}" "${pytest_args[@]}"
+pytest_status=$?
+set -e
+
+if [[ -f "$report_path" ]]; then
+    echo ""
+    python3 "$summary_script" "$report_path"
+else
+    echo "[dev_test] report not found: ${report_path}" >&2
+fi
+
+exit "$pytest_status"
diff --git a/scripts/skip_stats.sh b/scripts/skip_stats.sh
new file mode 100755
index 000000000..680281678
--- /dev/null
+++ b/scripts/skip_stats.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+repo_root="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+source "${repo_root}/scripts/dev_platforms.sh"
+
+usage() {
+    cat <<'EOF'
+Usage:
+  scripts/skip_stats.sh [cpu|nvidia|iluvatar|hygon|metax|moore|cambricon|ascend|PATH-TO-REPORT] [--show-skip-only]
+
+Examples:
+  scripts/skip_stats.sh cambricon
+  scripts/skip_stats.sh metax
+  scripts/skip_stats.sh nvidia
+  scripts/skip_stats.sh reports/cambricon.json --show-skip-only
+EOF
+}
+
+if (($# == 0)); then
+    usage >&2
+    exit 1
+fi
+
+summary_script="${repo_root}/scripts/summarize_op_report.py"
+first_arg="$1"
+shift
+
+if infiniops_is_supported_platform "$first_arg"; then
+        report_path="${repo_root}/reports/${first_arg}.json"
+else
+    report_path="$first_arg"
+fi
+
+if [[ "$report_path" != /* ]]; then
+    report_path="${repo_root}/${report_path}"
+fi
+
+if [[ ! -f "$report_path" ]]; then
+    echo "Report not found: $report_path" >&2
+    exit 1
+fi
+
+python3 "$summary_script" "$report_path" "$@"
diff --git a/scripts/summarize_op_report.py b/scripts/summarize_op_report.py
new file mode 100755
index 000000000..56a7b8a48
--- /dev/null
+++ b/scripts/summarize_op_report.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import argparse
+import json
+import pathlib
+
+
+_REPO_ROOT = pathlib.Path(__file__).resolve().parents[1]
+_BASE_DIR = _REPO_ROOT / "src" / "base"
+_TORCH_OPS_YAML = _REPO_ROOT / "scripts" / "torch_ops.yaml"
+
+_DISPLAY_NAMES = {
+    "ascend": "Ascend",
+    "cambricon": "Cambricon",
+    "cpu": "CPU",
+    "hygon": "Hygon",
+    "iluvatar": "Iluvatar",
+    "metax": "MetaX",
+    "moore": "Moore",
+    "nvidia": "Nvidia",
+}
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=(
+            "Print a compact summary for one or more pytest operator reports, "
+            "including both case-level skips and operator-level skip-only counts."
+        )
+    )
+    parser.add_argument(
+        "inputs", nargs="+", type=pathlib.Path, help="Report JSON path(s)"
+    )
+    parser.add_argument(
+        "--show-skip-only",
+        action="store_true",
+        help="Also list operator names whose status is SKIPPED_ONLY.",
+    )
+    args = parser.parse_args()
+
+    inventory = _load_inventory()
+    first = True
+
+    for path in args.inputs:
+        payload = json.loads(path.read_text(encoding="utf-8"))
+
+        for label, summary in _extract_summaries(path, payload):
+            if not first:
+                print("")
+            first = False
+            _print_summary(label, summary, inventory, args.show_skip_only)
+
+
+def _load_inventory():
+    inventory = {}
+
+    for path in sorted(_BASE_DIR.glob("*.h")):
+        inventory[path.stem] = {"operator": path.stem, "category": "native"}
+
+    for line in _TORCH_OPS_YAML.read_text(encoding="utf-8").splitlines():
+        stripped = line.strip()
+        if not stripped.startswith("- "):
+            continue
+
+        operator = _public_op_name(stripped[2:])
+        inventory.setdefault(
+            operator,
+            {"operator": operator, "category": "torch-generated"},
+        )
+
+    return [inventory[name] for name in sorted(inventory)]
+
+
+def _public_op_name(aten_name):
+    public_name = aten_name
+
+    if public_name.startswith("_"):
+        public_name = f"aten{public_name}"
+
+    if public_name.endswith("_") and not public_name.endswith("__"):
+        public_name = public_name[:-1] + "_inplace"
+
+    return public_name
+
+
+def _extract_summaries(path, payload):
+    if {"left", "right", "operator_diff", "case_diff"} <= set(payload):
+        return [
+            (
+                _summary_label(payload["left"]["summary"], suffix="left"),
+                payload["left"]["summary"],
+            ),
+            (
+                _summary_label(payload["right"]["summary"], suffix="right"),
+                payload["right"]["summary"],
+            ),
+        ]
+
+    return [(_summary_label(payload, fallback=path.stem), payload)]
+
+
+def _summary_label(summary, fallback=None, suffix=None):
+    requested = summary.get("environment", {}).get("requested_devices") or []
+    key = requested[0] if requested else (fallback or "report")
+    label = _DISPLAY_NAMES.get(key, key)
+
+    if suffix is not None:
+        return f"{label} ({suffix})"
+
+    return label
+
+
+def _print_summary(label, summary, inventory, show_skip_only):
+    env = summary.get("environment", {})
+    totals = summary.get("totals", {})
+    rows = _build_rows(summary, inventory)
+
+    tested = sum(1 for row in rows if row["status"] != "NO_PYTEST_RECORD")
+    pass_gt0 = sum(1 for row in rows if row["passed"] > 0)
+    any_skip = sum(1 for row in rows if row["skipped"] > 0)
+    skip_only = [row for row in rows if row["status"] == "SKIPPED_ONLY"]
+    failed = sum(1 for row in rows if row["status"] == "FAILED")
+    no_record = sum(1 for row in rows if row["status"] == "NO_PYTEST_RECORD")
+
+    print(f"{label}")
+    print(f"  torch={env.get('torch_version')}")
+    print(
+        "  case totals: "
+        f"collected={totals.get('collected')} "
+        f"passed={totals.get('passed')} "
+        f"skipped={totals.get('skipped')} "
+        f"failed={totals.get('failed')}"
+    )
+    print(
+        "  operator totals: "
+        f"total={len(rows)} "
+        f"tested={tested} "
+        f"pass>0={pass_gt0} "
+        f"any-skip={any_skip} "
+        f"skip-only={len(skip_only)} "
+        f"failed={failed} "
+        f"no-record={no_record}"
+    )
+
+    if show_skip_only and skip_only:
+        print("  skip-only operators:")
+        for row in skip_only:
+            print(
+                "    "
+                f"{row['operator']} "
+                f"(cases={row['cases']}, skipped={row['skipped']}, module={row['module']})"
+            )
+
+
+def _build_rows(summary, inventory):
+    summary_rows = {row["operator"]: row for row in summary.get("operators", [])}
+    rows = []
+
+    for item in inventory:
+        summary_row = summary_rows.get(item["operator"])
+
+        if summary_row is None:
+            rows.append(
+                {
+                    "operator": item["operator"],
+                    "status": "NO_PYTEST_RECORD",
+                    "cases": 0,
+                    "passed": 0,
+                    "skipped": 0,
+                    "failed": 0,
+                    "module": "-",
+                }
+            )
+            continue
+
+        outcomes = summary_row["outcomes"]
+        passed = outcomes.get("passed", 0)
+        skipped = outcomes.get("skipped", 0)
+        failed = outcomes.get("failed", 0)
+
+        if failed > 0:
+            status = "FAILED"
+        elif passed > 0 and skipped > 0:
+            status = "PASSED_WITH_SKIPS"
+        elif passed > 0:
+            status = "PASSED"
+        elif skipped > 0:
+            status = "SKIPPED_ONLY"
+        else:
+            status = "NO_PYTEST_RECORD"
+
+        rows.append(
+            {
+                "operator": item["operator"],
+                "status": status,
+                "cases": summary_row.get("cases", 0),
+                "passed": passed,
+                "skipped": skipped,
+                "failed": failed,
+                "module": summary_row.get("module", "-"),
+            }
+        )
+
+    return rows
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_dev_scripts.py b/tests/test_dev_scripts.py
new file mode 100644
index 000000000..0c9033176
--- /dev/null
+++ b/tests/test_dev_scripts.py
@@ -0,0 +1,84 @@
+import os
+import subprocess
+from pathlib import Path
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+
+
+def _run_bash(script, *, env=None, check=True):
+    base_env = {
+        "HOME": os.environ.get("HOME", str(REPO_ROOT)),
+        "PATH": os.environ.get("PATH", ""),
+    }
+    if env:
+        base_env.update(env)
+
+    return subprocess.run(
+        ["bash", "-lc", script],
+        cwd=REPO_ROOT,
+        text=True,
+        capture_output=True,
+        check=check,
+        env=base_env,
+    )
+
+
+def _isolated_probe_env():
+    return {
+        "DTK_ROOT": str(REPO_ROOT / ".tmp-no-dtk"),
+        "INFINIOPS_NVIDIA_DEVICE_GLOB": str(REPO_ROOT / ".tmp-no-dev" / "nvidia*"),
+        "INFINIOPS_ILUVATAR_DEVICE_GLOB": str(REPO_ROOT / ".tmp-no-dev" / "iluvatar*"),
+        "INFINIOPS_ASCEND_DEVICE_GLOB": str(REPO_ROOT / ".tmp-no-dev" / "davinci0"),
+        "INFINIOPS_METAX_PCI_VENDOR_GLOB": str(REPO_ROOT / ".tmp-no-dev" / "vendor"),
+    }
+
+
+def test_dev_build_accepts_nvidia_platform():
+    result = _run_bash("bash scripts/dev_build.sh nvidia --help")
+
+    assert "Usage:" in result.stdout
+
+
+def test_dev_test_accepts_ascend_platform():
+    result = _run_bash("bash scripts/dev_test.sh ascend --help")
+
+    assert "Usage:" in result.stdout
+
+
+def test_detect_platforms_reports_fake_nvidia_probe(tmp_path):
+    fake_dev = tmp_path / "dev"
+    fake_dev.mkdir()
+    (fake_dev / "nvidia0").touch()
+
+    result = _run_bash(
+        "source scripts/dev_platforms.sh; infiniops_detect_platforms",
+        env={
+            **_isolated_probe_env(),
+            "INFINIOPS_NVIDIA_DEVICE_GLOB": str(fake_dev / "nvidia*"),
+        },
+    )
+
+    assert result.stdout.strip() == "nvidia"
+
+
+def test_detect_platforms_reports_env_backends_in_cmake_order():
+    result = _run_bash(
+        "source scripts/dev_platforms.sh; infiniops_detect_platforms",
+        env={
+            **_isolated_probe_env(),
+            "MACA_PATH": "/tmp/maca",
+            "NEUWARE_HOME": "/tmp/neuware",
+        },
+    )
+
+    assert result.stdout.strip().splitlines() == ["metax", "cambricon"]
+
+
+def test_detect_platforms_emits_nothing_when_no_backend_matches():
+    result = _run_bash(
+        "source scripts/dev_platforms.sh; infiniops_detect_platforms | sed -n l",
+        env=_isolated_probe_env(),
+    )
+
+    assert result.stdout == ""

From 987f576c5b327617dbe35dedef62b1775c6eca9e Mon Sep 17 00:00:00 2001
From: gongchensu <zhuyue_134@qq.com>
Date: Wed, 3 Jun 2026 07:57:32 +0000
Subject: [PATCH 3/4] Avoid rewriting unchanged generated build artifacts

---
 scripts/generate_torch_ops.py    | 85 +++++++++++++++++++++-------
 scripts/generate_wrappers.py     | 96 +++++++++++++++++++++++++-------
 src/CMakeLists.txt               | 70 ++++++++++++++++++++---
 tests/test_generate_torch_ops.py | 34 +++++++++++
 tests/test_generate_wrappers.py  | 34 +++++++++++
 5 files changed, 269 insertions(+), 50 deletions(-)

diff --git a/scripts/generate_torch_ops.py b/scripts/generate_torch_ops.py
index e1636591c..052641fbb 100644
--- a/scripts/generate_torch_ops.py
+++ b/scripts/generate_torch_ops.py
@@ -170,6 +170,40 @@ def _normalize_cpp_type(cpp_type: str) -> str:
     return text.rstrip("&").strip()
 
 
+def _write_text_if_changed(path: pathlib.Path, content: str) -> bool:
+    """Write `content` only when the file's bytes would change."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+
+    if path.exists() and path.read_text() == content:
+        return False
+
+    path.write_text(content)
+    return True
+
+
+def _prune_empty_dirs(root: pathlib.Path) -> None:
+    if not root.exists():
+        return
+
+    for child in sorted(root.iterdir(), reverse=True):
+        if child.is_dir():
+            _prune_empty_dirs(child)
+
+    if root.is_dir() and not any(root.iterdir()):
+        root.rmdir()
+
+
+def _remove_stale_files(root: pathlib.Path, expected_files: set[pathlib.Path]) -> None:
+    if not root.exists():
+        return
+
+    for path in sorted(root.rglob("*"), reverse=True):
+        if path.is_file() and path not in expected_files:
+            path.unlink()
+
+    _prune_empty_dirs(root)
+
+
 @dataclasses.dataclass
 class Param:
     name: str
@@ -1424,24 +1458,31 @@ def _clang_format(text: str, path: pathlib.Path) -> str:
     ).stdout
 
 
-def _emit(name: str, ops: list[Op], *, emit_base: bool) -> None:
+def _emit(name: str, ops: list[Op], *, emit_base: bool) -> set[pathlib.Path]:
     base_path = _GENERATED_BASE_DIR / f"{name}.h"
     torch_dir = _GENERATED_TORCH_DIR / name
     torch_header_path = torch_dir / f"{name}.h"
     torch_source_path = torch_dir / f"{name}.cc"
+    emitted_paths = set()
 
     if emit_base:
-        _GENERATED_BASE_DIR.mkdir(parents=True, exist_ok=True)
-        base_path.write_text(_clang_format(_generate_base_header(name, ops), base_path))
-
-    torch_dir.mkdir(parents=True, exist_ok=True)
+        _write_text_if_changed(
+            base_path, _clang_format(_generate_base_header(name, ops), base_path)
+        )
+        emitted_paths.add(base_path)
 
-    torch_header_path.write_text(
-        _clang_format(_generate_torch_header(name, ops), torch_header_path)
+    _write_text_if_changed(
+        torch_header_path,
+        _clang_format(_generate_torch_header(name, ops), torch_header_path),
     )
-    torch_source_path.write_text(
-        _clang_format(_generate_torch_source(name, ops), torch_source_path)
+    _write_text_if_changed(
+        torch_source_path,
+        _clang_format(_generate_torch_source(name, ops), torch_source_path),
     )
+    emitted_paths.add(torch_header_path)
+    emitted_paths.add(torch_source_path)
+
+    return emitted_paths
 
 
 def main() -> int:
@@ -1469,18 +1510,10 @@ def main() -> int:
     op_names = args.ops or yaml.safe_load(_OPS_YAML_PATH.read_text())
     aten_entries = yaml.safe_load(_load_aten_yaml(args.pytorch_version))
 
-    # Wipe previous outputs so files for ops that have since been removed,
-    # renamed, or rejected by `cpp_type` don't linger and get picked up by
-    # the CMake glob. Both `generated/base/` and `generated/torch/` are
-    # written exclusively by this script.
-    if _GENERATED_BASE_DIR.exists():
-        shutil.rmtree(_GENERATED_BASE_DIR)
-
-    if _GENERATED_TORCH_DIR.exists():
-        shutil.rmtree(_GENERATED_TORCH_DIR)
-
     skipped: list[tuple[str, str]] = []
     metadata: list[dict] = []
+    expected_base_files: set[pathlib.Path] = set()
+    expected_torch_files: set[pathlib.Path] = set()
     ops_by_public_name: dict[str, list[Op]] = collections.defaultdict(list)
 
     for name in op_names:
@@ -1565,7 +1598,13 @@ def main() -> int:
         # resolves through `src/` first). Signature mismatches surface as
         # compile errors with a clear message — drop the op from the YAML
         # to suppress.
-        _emit(public_name, usable, emit_base=not base_exists)
+        emitted_paths = _emit(public_name, usable, emit_base=not base_exists)
+        expected_base_files.update(
+            path for path in emitted_paths if path.is_relative_to(_GENERATED_BASE_DIR)
+        )
+        expected_torch_files.update(
+            path for path in emitted_paths if path.is_relative_to(_GENERATED_TORCH_DIR)
+        )
 
         for op in usable:
             metadata.append(
@@ -1586,7 +1625,11 @@ def main() -> int:
             )
 
     _GENERATED_DIR.mkdir(parents=True, exist_ok=True)
-    _METADATA_PATH.write_text(json.dumps({"ops": metadata}, indent=2) + "\n")
+    _write_text_if_changed(
+        _METADATA_PATH, json.dumps({"ops": metadata}, indent=2) + "\n"
+    )
+    _remove_stale_files(_GENERATED_BASE_DIR, expected_base_files)
+    _remove_stale_files(_GENERATED_TORCH_DIR, expected_torch_files)
 
     generated_names = sorted({m["name"] for m in metadata})
     print(
diff --git a/scripts/generate_wrappers.py b/scripts/generate_wrappers.py
index e5ebbc0c9..6037bbbf3 100644
--- a/scripts/generate_wrappers.py
+++ b/scripts/generate_wrappers.py
@@ -83,6 +83,40 @@ def _op_relative_type(op_name):
     return "::".join(parts)
 
 
+def _write_text_if_changed(path: pathlib.Path, content: str) -> bool:
+    """Write `content` only when the file's bytes would change."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+
+    if path.exists() and path.read_text() == content:
+        return False
+
+    path.write_text(content)
+    return True
+
+
+def _prune_empty_dirs(root: pathlib.Path) -> None:
+    if not root.exists():
+        return
+
+    for child in sorted(root.iterdir(), reverse=True):
+        if child.is_dir():
+            _prune_empty_dirs(child)
+
+    if root.is_dir() and not any(root.iterdir()):
+        root.rmdir()
+
+
+def _remove_stale_files(root: pathlib.Path, expected_files: set[pathlib.Path]) -> None:
+    if not root.exists():
+        return
+
+    for path in sorted(root.rglob("*"), reverse=True):
+        if path.is_file() and path not in expected_files:
+            path.unlink()
+
+    _prune_empty_dirs(root)
+
+
 @functools.lru_cache(maxsize=1)
 def _get_system_include_flags():
     """Probe the system C++ compiler for default include paths so libclang
@@ -1289,14 +1323,8 @@ def _dispatch_gen_batch_size():
 
     args = parser.parse_args()
 
-    # Wipe previous outputs so files for ops that have since been removed
-    # from the active set (e.g. when toggling `--with-torch`) do not linger
-    # and get globbed by a later build.
     for directory in (_BINDINGS_DIR, _GENERATED_SRC_DIR, _INCLUDE_DIR):
-        if directory.exists():
-            shutil.rmtree(directory)
-
-        directory.mkdir(parents=True)
+        directory.mkdir(parents=True, exist_ok=True)
 
     ops_json = pathlib.Path("ops.json")
 
@@ -1320,6 +1348,9 @@ def _dispatch_gen_batch_size():
             artifacts = list(executor.map(_generate_op_artifacts, ops.items()))
 
     op_names = [artifact["op_name"] for artifact in artifacts]
+    expected_binding_files: set[pathlib.Path] = set()
+    expected_generated_src_files: set[pathlib.Path] = set()
+    expected_include_files: set[pathlib.Path] = set()
     dispatch_declarations = [
         declaration
         for artifact in artifacts
@@ -1338,34 +1369,45 @@ def _dispatch_gen_batch_size():
         source_path = _GENERATED_SRC_DIR / op_name
         header_name = artifact["header_name"]
         bind_func_name = artifact["bind_func_name"]
+        binding_header_path = _BINDINGS_DIR / header_name
 
-        (_BINDINGS_DIR / header_name).write_text(artifact["pybind11"])
+        _write_text_if_changed(binding_header_path, artifact["pybind11"])
+        expected_binding_files.add(binding_header_path)
 
         if use_monolithic_bindings:
             op_includes.append(f'#include "{header_name}"')
         else:
-            (_BINDINGS_DIR / f"{op_name}.cc").write_text(artifact["binding_source"])
+            binding_source_path = _BINDINGS_DIR / f"{op_name}.cc"
+            _write_text_if_changed(binding_source_path, artifact["binding_source"])
+            expected_binding_files.add(binding_source_path)
 
         source_path.mkdir(exist_ok=True)
-        (_GENERATED_SRC_DIR / op_name / "operator.cc").write_text(
-            artifact["legacy_c_source"]
-        )
-        (_INCLUDE_DIR / header_name).write_text(artifact["legacy_c_header"])
+        legacy_source_path = _GENERATED_SRC_DIR / op_name / "operator.cc"
+        _write_text_if_changed(legacy_source_path, artifact["legacy_c_source"])
+        expected_generated_src_files.add(legacy_source_path)
+
+        include_path = _INCLUDE_DIR / header_name
+        _write_text_if_changed(include_path, artifact["legacy_c_header"])
+        expected_include_files.add(include_path)
 
         bind_func_names.append(bind_func_name)
 
     dispatch_header = _generate_generated_dispatch_header(
         op_names, args.devices, dispatch_declarations
     )
-    (_BINDINGS_DIR / "generated_dispatch.h").write_text(dispatch_header)
+    dispatch_header_path = _BINDINGS_DIR / "generated_dispatch.h"
+    _write_text_if_changed(dispatch_header_path, dispatch_header)
+    expected_binding_files.add(dispatch_header_path)
 
     call_instantiation_header = _generate_operator_call_instantiation_header(
         op_names, call_instantiation_declarations
     )
     (_INCLUDE_DIR / "infini").mkdir(exist_ok=True)
-    (_INCLUDE_DIR / "infini" / "operator_call_instantiations.h").write_text(
-        call_instantiation_header
+    call_instantiation_header_path = (
+        _INCLUDE_DIR / "infini" / "operator_call_instantiations.h"
     )
+    _write_text_if_changed(call_instantiation_header_path, call_instantiation_header)
+    expected_include_files.add(call_instantiation_header_path)
 
     dispatch_batch_size = _dispatch_gen_batch_size()
 
@@ -1384,9 +1426,11 @@ def _dispatch_gen_batch_size():
             for definition in artifact["dispatch_definitions"]
         ]
         dispatch_source = _generate_generated_dispatch_source(impl_paths, definitions)
-        (_BINDINGS_DIR / f"generated_dispatch_{dispatch_batch_index}.cc").write_text(
-            dispatch_source
+        dispatch_source_path = (
+            _BINDINGS_DIR / f"generated_dispatch_{dispatch_batch_index}.cc"
         )
+        _write_text_if_changed(dispatch_source_path, dispatch_source)
+        expected_binding_files.add(dispatch_source_path)
 
         call_instantiation_definitions = [
             definition
@@ -1398,10 +1442,14 @@ def _dispatch_gen_batch_size():
             impl_paths,
             call_instantiation_definitions,
         )
-        (
+        call_instantiation_source_path = (
             _GENERATED_SRC_DIR
             / f"operator_call_instantiations_{dispatch_batch_index}.cc"
-        ).write_text(call_instantiation_source)
+        )
+        _write_text_if_changed(
+            call_instantiation_source_path, call_instantiation_source
+        )
+        expected_generated_src_files.add(call_instantiation_source_path)
 
     bind_func_calls = "\n".join(
         f"{bind_func_name}(m);" for bind_func_name in bind_func_names
@@ -1440,4 +1488,10 @@ def _dispatch_gen_batch_size():
 }}  // namespace infini::ops
 """
 
-    (_BINDINGS_DIR / "ops.cc").write_text(ops_source)
+    ops_source_path = _BINDINGS_DIR / "ops.cc"
+    _write_text_if_changed(ops_source_path, ops_source)
+    expected_binding_files.add(ops_source_path)
+
+    _remove_stale_files(_BINDINGS_DIR, expected_binding_files)
+    _remove_stale_files(_GENERATED_SRC_DIR, expected_generated_src_files)
+    _remove_stale_files(_INCLUDE_DIR, expected_include_files)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d3784e682..cfe97af10 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,5 +1,37 @@
 add_library(infiniops SHARED)
 
+function(_infiniops_write_if_different path content)
+    set(_should_write TRUE)
+    if(EXISTS "${path}")
+        file(READ "${path}" _existing_content)
+        if(_existing_content STREQUAL "${content}")
+            set(_should_write FALSE)
+        endif()
+    endif()
+
+    if(_should_write)
+        get_filename_component(_parent_dir "${path}" DIRECTORY)
+        file(MAKE_DIRECTORY "${_parent_dir}")
+        file(WRITE "${path}" "${content}")
+    endif()
+endfunction()
+
+function(_infiniops_prune_stale_cc_files dir)
+    cmake_parse_arguments(PARSE_ARGV 1 _ARG "" "" "EXPECTED")
+
+    if(NOT EXISTS "${dir}")
+        return()
+    endif()
+
+    file(GLOB _existing_cc_files "${dir}/*.cc")
+    foreach(_path IN LISTS _existing_cc_files)
+        list(FIND _ARG_EXPECTED "${_path}" _index)
+        if(_index EQUAL -1)
+            file(REMOVE "${_path}")
+        endif()
+    endforeach()
+endfunction()
+
 include(GNUInstallDirs)
 
 file(GLOB BASE_SRCS CONFIGURE_DEPENDS "*.cc")
@@ -363,29 +395,40 @@ if(WITH_TORCH)
     set(TORCH_COMPILE_SOURCES ${TORCH_SOURCES})
     if(INFINIOPS_TORCH_UNITY_BATCH_SIZE GREATER 1)
         set(_torch_unity_dir "${CMAKE_CURRENT_BINARY_DIR}/torch_unity")
-        file(REMOVE_RECURSE "${_torch_unity_dir}")
         file(MAKE_DIRECTORY "${_torch_unity_dir}")
 
         set(TORCH_COMPILE_SOURCES)
+        set(_torch_unity_expected)
         set(_torch_unity_index 0)
         set(_torch_unity_count 0)
+        set(_torch_unity_content "")
         foreach(_src IN LISTS TORCH_SOURCES)
             if(_torch_unity_count EQUAL 0)
                 set(_torch_unity_src
                     "${_torch_unity_dir}/torch_unity_${_torch_unity_index}.cc")
-                file(WRITE "${_torch_unity_src}"
-                    "// Generated by CMake to batch ATen-heavy torch wrappers.\n")
                 list(APPEND TORCH_COMPILE_SOURCES "${_torch_unity_src}")
+                list(APPEND _torch_unity_expected "${_torch_unity_src}")
+                set(_torch_unity_content
+                    "// Generated by CMake to batch ATen-heavy torch wrappers.\n")
             endif()
 
-            file(APPEND "${_torch_unity_src}" "#include \"${_src}\"\n")
+            string(APPEND _torch_unity_content "#include \"${_src}\"\n")
 
             math(EXPR _torch_unity_count "${_torch_unity_count} + 1")
             if(_torch_unity_count GREATER_EQUAL INFINIOPS_TORCH_UNITY_BATCH_SIZE)
+                _infiniops_write_if_different(
+                    "${_torch_unity_src}" "${_torch_unity_content}")
                 math(EXPR _torch_unity_index "${_torch_unity_index} + 1")
                 set(_torch_unity_count 0)
+                set(_torch_unity_content "")
             endif()
         endforeach()
+        if(NOT _torch_unity_count EQUAL 0)
+            _infiniops_write_if_different(
+                "${_torch_unity_src}" "${_torch_unity_content}")
+        endif()
+        _infiniops_prune_stale_cc_files(
+            "${_torch_unity_dir}" EXPECTED ${_torch_unity_expected})
 
         list(LENGTH TORCH_SOURCES _torch_source_count)
         list(LENGTH TORCH_COMPILE_SOURCES _torch_unity_source_count)
@@ -625,29 +668,40 @@ if(GENERATE_PYTHON_BINDINGS)
             "Number of generated pybind11 sources to include in each unity translation unit; set to 1 to disable")
         if(INFINIOPS_BINDING_UNITY_BATCH_SIZE GREATER 1)
             set(_binding_unity_dir "${CMAKE_CURRENT_BINARY_DIR}/binding_unity")
-            file(REMOVE_RECURSE "${_binding_unity_dir}")
             file(MAKE_DIRECTORY "${_binding_unity_dir}")
 
             set(PYBIND11_COMPILE_SOURCES)
+            set(_binding_unity_expected)
             set(_binding_unity_index 0)
             set(_binding_unity_count 0)
+            set(_binding_unity_content "")
             foreach(_src IN LISTS PYBIND11_UNITY_SOURCES)
                 if(_binding_unity_count EQUAL 0)
                     set(_binding_unity_src
                         "${_binding_unity_dir}/binding_unity_${_binding_unity_index}.cc")
-                    file(WRITE "${_binding_unity_src}"
-                        "// Generated by CMake to batch pybind11 wrapper sources.\n")
                     list(APPEND PYBIND11_COMPILE_SOURCES "${_binding_unity_src}")
+                    list(APPEND _binding_unity_expected "${_binding_unity_src}")
+                    set(_binding_unity_content
+                        "// Generated by CMake to batch pybind11 wrapper sources.\n")
                 endif()
 
-                file(APPEND "${_binding_unity_src}" "#include \"${_src}\"\n")
+                string(APPEND _binding_unity_content "#include \"${_src}\"\n")
 
                 math(EXPR _binding_unity_count "${_binding_unity_count} + 1")
                 if(_binding_unity_count GREATER_EQUAL INFINIOPS_BINDING_UNITY_BATCH_SIZE)
+                    _infiniops_write_if_different(
+                        "${_binding_unity_src}" "${_binding_unity_content}")
                     math(EXPR _binding_unity_index "${_binding_unity_index} + 1")
                     set(_binding_unity_count 0)
+                    set(_binding_unity_content "")
                 endif()
             endforeach()
+            if(NOT _binding_unity_count EQUAL 0)
+                _infiniops_write_if_different(
+                    "${_binding_unity_src}" "${_binding_unity_content}")
+            endif()
+            _infiniops_prune_stale_cc_files(
+                "${_binding_unity_dir}" EXPECTED ${_binding_unity_expected})
 
             list(LENGTH PYBIND11_UNITY_SOURCES _binding_source_count)
             list(LENGTH PYBIND11_COMPILE_SOURCES _binding_unity_source_count)
diff --git a/tests/test_generate_torch_ops.py b/tests/test_generate_torch_ops.py
index 1ba7ea915..165511180 100644
--- a/tests/test_generate_torch_ops.py
+++ b/tests/test_generate_torch_ops.py
@@ -259,3 +259,37 @@ def test_existing_base_overload_matches_by_name_when_types_repeat():
 
     assert "c10::optional<at::IntArrayRef>{}, true, keepdim" in source
     assert "unbiased" not in source
+
+
+def test_write_text_if_changed_preserves_unchanged_mtime(tmp_path):
+    module = _load_generator_module()
+    path = tmp_path / "generated.cc"
+    path.write_text("same\n")
+    before = path.stat().st_mtime_ns
+
+    assert module._write_text_if_changed(path, "same\n") is False
+    assert path.stat().st_mtime_ns == before
+
+    assert module._write_text_if_changed(path, "different\n") is True
+    assert path.read_text() == "different\n"
+    assert path.stat().st_mtime_ns >= before
+
+
+def test_remove_stale_files_keeps_expected_outputs(tmp_path):
+    module = _load_generator_module()
+    root = tmp_path / "generated"
+    keep = root / "torch" / "keep.cc"
+    stale = root / "torch" / "stale.cc"
+    nested_stale = root / "base" / "stale.h"
+    keep.parent.mkdir(parents=True)
+    nested_stale.parent.mkdir(parents=True)
+    keep.write_text("keep\n")
+    stale.write_text("stale\n")
+    nested_stale.write_text("stale\n")
+
+    module._remove_stale_files(root, {keep})
+
+    assert keep.exists()
+    assert not stale.exists()
+    assert not nested_stale.exists()
+    assert not (root / "base").exists()
diff --git a/tests/test_generate_wrappers.py b/tests/test_generate_wrappers.py
index 92a1530b6..2f2c57afd 100644
--- a/tests/test_generate_wrappers.py
+++ b/tests/test_generate_wrappers.py
@@ -121,3 +121,37 @@ class Mul {
         "DefaultImplementationIndexForMul(DeviceFromPybind11Handle(input).type()))"
     ) in text
     assert 'py::arg("implementation_index") = py::none()' in text
+
+
+def test_write_text_if_changed_preserves_unchanged_mtime(tmp_path):
+    module = _load_generator_module()
+    path = tmp_path / "bindings.cc"
+    path.write_text("same\n")
+    before = path.stat().st_mtime_ns
+
+    assert module._write_text_if_changed(path, "same\n") is False
+    assert path.stat().st_mtime_ns == before
+
+    assert module._write_text_if_changed(path, "different\n") is True
+    assert path.read_text() == "different\n"
+    assert path.stat().st_mtime_ns >= before
+
+
+def test_remove_stale_files_keeps_expected_outputs(tmp_path):
+    module = _load_generator_module()
+    root = tmp_path / "generated"
+    keep = root / "bindings" / "keep.cc"
+    stale = root / "bindings" / "stale.cc"
+    nested_stale = root / "src" / "foo" / "operator.cc"
+    keep.parent.mkdir(parents=True)
+    nested_stale.parent.mkdir(parents=True)
+    keep.write_text("keep\n")
+    stale.write_text("stale\n")
+    nested_stale.write_text("stale\n")
+
+    module._remove_stale_files(root, {keep})
+
+    assert keep.exists()
+    assert not stale.exists()
+    assert not nested_stale.exists()
+    assert not (root / "src" / "foo").exists()

From 16fa83c608ebaa9f9823fcb97e063eee499edac7 Mon Sep 17 00:00:00 2001
From: zhuyue <zhuyue@qiyuanlab.com>
Date: Fri, 12 Jun 2026 10:58:37 +0800
Subject: [PATCH 4/4] Stabilize dev script platform tests

---
 .gitignore                | 1 +
 tests/test_dev_scripts.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index c6501fb2d..17f472687 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ build/
 build-*/
 generated/
 reports/
+ci-results/
 
 # Prerequisites
 *.d
diff --git a/tests/test_dev_scripts.py b/tests/test_dev_scripts.py
index 0c9033176..abbef9652 100644
--- a/tests/test_dev_scripts.py
+++ b/tests/test_dev_scripts.py
@@ -14,8 +14,10 @@ def _run_bash(script, *, env=None, check=True):
     if env:
         base_env.update(env)
 
+    # Use a non-login shell so host startup files cannot reintroduce real
+    # accelerator env vars into the mocked detection environment.
     return subprocess.run(
-        ["bash", "-lc", script],
+        ["bash", "-c", script],
         cwd=REPO_ROOT,
         text=True,
         capture_output=True,