From b1e8b7571beacfc7983667a95210c8aefc9cebca Mon Sep 17 00:00:00 2001 From: DevCats Date: Wed, 24 Jun 2026 20:28:21 +0000 Subject: [PATCH 1/7] feat(scanner): add v1 public API + shields.io badge generators Introduces a stable, versioned API surface that the registry-server proxy (and any third-party badge embed) can depend on. Shipped as a new 'scanner build-api-v1' subcommand that takes a generated latest.json and writes the full v1 tree under an output directory: api/v1/skills.json Compact index (verdict + risk_score + source_sha per skill). api/v1/skills//.json Per-skill detail with reasons, findings by severity/rule, and a 'links' block pointing at the badge endpoints and the immutable source-tree URL. api/v1/skills///badge/ verdict.json Shields.io endpoint verdict.svg Inline two-rect SVG risk.json Shields.io endpoint risk.svg Inline two-rect SVG api/v1/history.json Reshape of history/index.json with absolute report URLs. The source-tree URL deliberately pins to source_sha (not source_ref) so links into upstream skills survive branch movement. The badge JSON shape is shields.io's documented endpoint contract so README embeds can use https://img.shields.io/endpoint?url= directly. 12 new pytest cases cover the index/detail/history shapes, the source_sha pinning, badge colour bands, and SVG well-formedness. --- scanner/api.py | 207 +++++++++++++++++++++++++++++++++++++++++++ scanner/badges.py | 138 +++++++++++++++++++++++++++++ scanner/cli.py | 60 ++++++++++++- tests/test_api.py | 168 +++++++++++++++++++++++++++++++++++ tests/test_badges.py | 76 ++++++++++++++++ 5 files changed, 648 insertions(+), 1 deletion(-) create mode 100644 scanner/api.py create mode 100644 scanner/badges.py create mode 100644 tests/test_api.py create mode 100644 tests/test_badges.py diff --git a/scanner/api.py b/scanner/api.py new file mode 100644 index 0000000..63ea740 --- /dev/null +++ b/scanner/api.py @@ -0,0 +1,207 @@ +"""Build the v1 public API surface from a generated ``latest.json``. + +The v1 contract: + +- ``api/v1/skills.json`` Compact index of every skill in the most + recent scan: namespace, slug, verdict, + risk_score, source_repo, source_sha, + scanned_at. Lightweight enough to fetch + on every page render. +- ``api/v1/skills//.json`` Per-skill detail with the same fields + plus reasons, findings by severity and + rule, and a ``links`` object pointing + at the badge endpoints and the + immutable source-tree URL. +- ``api/v1/history.json`` Reshape of ``history/index.json`` into + a versioned shape with absolute + ``report_url`` fields so consumers do + not have to know the Pages layout. + +Stability: once shipped, ``v1`` field names and shapes do not change. New +optional fields are allowed. Removed or renamed fields require a ``v2`` prefix +with a deprecation window on ``v1``. +""" + +from __future__ import annotations + +import json +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from . import badges + +API_SCHEMA_VERSION = 1 + + +def _source_tree_url(skill: dict[str, Any]) -> str: + """Return the immutable ``github.com//tree//`` link. + + Always uses ``source_sha`` (not ``source_ref``) so the link survives + upstream branch movement. Falls back to the bare repo URL when the SHA is + missing. + """ + repo = skill.get("source_repo") + sha = skill.get("source_sha") + path = (skill.get("skill_path") or "").lstrip("/") + if not repo: + return "" + if not sha: + return f"https://github.com/{repo}" + suffix = f"/{path}" if path else "" + return f"https://github.com/{repo}/tree/{sha}{suffix}" + + +def _badge_links(public_base_url: str, namespace: str, slug: str) -> dict[str, str]: + """Return the four badge URLs for a single skill.""" + root = public_base_url.rstrip("/") + return { + "verdict_badge_json": f"{root}/api/v1/skills/{namespace}/{slug}/badge/verdict.json", + "verdict_badge_svg": f"{root}/api/v1/skills/{namespace}/{slug}/badge/verdict.svg", + "risk_badge_json": f"{root}/api/v1/skills/{namespace}/{slug}/badge/risk.json", + "risk_badge_svg": f"{root}/api/v1/skills/{namespace}/{slug}/badge/risk.svg", + } + + +def _skill_index_entry(skill: dict[str, Any]) -> dict[str, Any]: + """Compact shape for the index.""" + ss = (skill.get("scanners") or {}).get("skillspector") or {} + return { + "namespace": skill["namespace"], + "slug": skill["slug"], + "verdict": skill["verdict"], + "risk_score": ss.get("risk_score", 0), + "source_repo": skill.get("source_repo", ""), + "source_sha": skill.get("source_sha", ""), + "scanned_at": skill.get("scanned_at", ""), + } + + +def build_skills_index( + report: dict[str, Any], + *, + generated_at: str | None = None, +) -> dict[str, Any]: + """Build the ``api/v1/skills.json`` payload.""" + skills = sorted( + (_skill_index_entry(s) for s in report.get("skills", [])), + key=lambda r: (r["namespace"], r["slug"]), + ) + return { + "schema_version": API_SCHEMA_VERSION, + "generated_at": generated_at or report.get("generated_at", ""), + "summary": report.get("summary", {}), + "skills": skills, + } + + +def build_skill_detail( + skill: dict[str, Any], + *, + public_base_url: str, + report_url: str, +) -> dict[str, Any]: + """Build a single ``api/v1/skills//.json`` payload.""" + ss = (skill.get("scanners") or {}).get("skillspector") or {} + return { + "schema_version": API_SCHEMA_VERSION, + "namespace": skill["namespace"], + "slug": skill["slug"], + "verdict": skill["verdict"], + "risk_score": ss.get("risk_score", 0), + "risk_severity": ss.get("risk_severity", "unknown"), + "source_repo": skill.get("source_repo", ""), + "source_ref": skill.get("source_ref", ""), + "source_sha": skill.get("source_sha", ""), + "skill_path": skill.get("skill_path", ""), + "scanned_at": skill.get("scanned_at", ""), + "reasons": skill.get("reasons", []), + "findings_by_severity": ss.get("findings_by_severity", {}), + "findings_by_rule": ss.get("findings_by_rule", []), + "links": { + "report": report_url, + "source_tree": _source_tree_url(skill), + **_badge_links(public_base_url, skill["namespace"], skill["slug"]), + }, + } + + +def build_history_index( + history_manifest: dict[str, Any], + *, + public_base_url: str, + generated_at: str | None = None, +) -> dict[str, Any]: + """Reshape ``history/index.json`` into the versioned API shape.""" + root = public_base_url.rstrip("/") + entries = [] + for entry in history_manifest.get("entries", []): + rel = entry.get("path", "").lstrip("/") + entries.append( + { + "stamp": entry.get("stamp", ""), + "generated_at": entry.get("generated_at", ""), + "summary": entry.get("summary", {}), + "report_url": f"{root}/{rel}" if rel else "", + } + ) + return { + "schema_version": API_SCHEMA_VERSION, + "generated_at": generated_at or datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ"), + "entries": entries, + } + + +def write_api_v1( + report: dict[str, Any], + *, + output_dir: Path, + public_base_url: str, + history_manifest: dict[str, Any] | None = None, +) -> list[Path]: + """Write the full v1 API tree under ``output_dir`` and return paths written.""" + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + written: list[Path] = [] + + report_url = f"{public_base_url.rstrip('/')}/latest.json" + + skills_index = build_skills_index(report) + skills_index_path = output_dir / "skills.json" + skills_index_path.write_text(json.dumps(skills_index, indent=2) + "\n", encoding="utf-8") + written.append(skills_index_path) + + for skill in report.get("skills", []): + ns = skill["namespace"] + slug = skill["slug"] + detail = build_skill_detail(skill, public_base_url=public_base_url, report_url=report_url) + detail_path = output_dir / "skills" / ns / f"{slug}.json" + detail_path.parent.mkdir(parents=True, exist_ok=True) + detail_path.write_text(json.dumps(detail, indent=2) + "\n", encoding="utf-8") + written.append(detail_path) + + badge_dir = detail_path.parent / slug / "badge" + badge_dir.mkdir(parents=True, exist_ok=True) + verdict = skill["verdict"] + ss = (skill.get("scanners") or {}).get("skillspector") or {} + risk = int(ss.get("risk_score", 0)) + v_json = badge_dir / "verdict.json" + v_json.write_text(json.dumps(badges.verdict_badge_json(verdict), indent=2) + "\n", encoding="utf-8") + written.append(v_json) + v_svg = badge_dir / "verdict.svg" + v_svg.write_text(badges.verdict_badge_svg(verdict), encoding="utf-8") + written.append(v_svg) + r_json = badge_dir / "risk.json" + r_json.write_text(json.dumps(badges.risk_badge_json(risk), indent=2) + "\n", encoding="utf-8") + written.append(r_json) + r_svg = badge_dir / "risk.svg" + r_svg.write_text(badges.risk_badge_svg(risk), encoding="utf-8") + written.append(r_svg) + + if history_manifest is not None: + history_api = build_history_index(history_manifest, public_base_url=public_base_url) + history_path = output_dir / "history.json" + history_path.write_text(json.dumps(history_api, indent=2) + "\n", encoding="utf-8") + written.append(history_path) + + return written diff --git a/scanner/badges.py b/scanner/badges.py new file mode 100644 index 0000000..a67817c --- /dev/null +++ b/scanner/badges.py @@ -0,0 +1,138 @@ +"""SVG and shields.io-endpoint JSON badge generators. + +The JSON shape matches shields.io's ``endpoint`` badge contract so a consumer +can embed ``https://img.shields.io/endpoint?url=`` directly in +a README. The SVG endpoint generates a flat-style badge inline (no shields.io +dependency, no network hop) for sites that want the badge served from the +same origin as the rest of the report. + +Stability: ``schemaVersion``, ``label``, ``message``, ``color``, and +``cacheSeconds`` are part of the v1 contract. The SVG layout (two-rect flat +badge, 11px Verdana) is the contract for ``.svg`` consumers. +""" + +from __future__ import annotations + +from typing import Any + +SHIELDS_SCHEMA_VERSION = 1 + +# Cache hint for shields.io: 5 minutes lines up with the registry-server +# proxy's cache TTL and the catalogue refresh cadence. +DEFAULT_CACHE_SECONDS = 300 + +# Shields.io color names. +_VERDICT_COLORS: dict[str, str] = { + "clean": "brightgreen", + "suspicious": "yellow", + "malicious": "red", + "unknown": "lightgrey", +} + +# Bands for the risk score badge color. Matches the verdict cutoffs in +# ``config.yaml`` (51 = HIGH/suspicious, 81 = CRITICAL/malicious). +def _risk_color(score: int) -> str: + if score >= 81: + return "red" + if score >= 51: + return "yellow" + if score >= 21: + return "yellowgreen" + return "brightgreen" + + +# Hex equivalents used for the SVG renderer. Shields.io's `?color=` accepts +# names; the inline SVG renderer needs raw hex. +_NAMED_HEX: dict[str, str] = { + "brightgreen": "#4c1", + "green": "#97ca00", + "yellowgreen": "#a4a61d", + "yellow": "#dfb317", + "orange": "#fe7d37", + "red": "#e05d44", + "lightgrey": "#9f9f9f", + "blue": "#007ec6", +} + + +def verdict_badge_json(verdict: str) -> dict[str, Any]: + """Build the shields.io-endpoint payload for a verdict badge.""" + return { + "schemaVersion": SHIELDS_SCHEMA_VERSION, + "label": "skill scan", + "message": verdict, + "color": _VERDICT_COLORS.get(verdict, "lightgrey"), + "cacheSeconds": DEFAULT_CACHE_SECONDS, + } + + +def risk_badge_json(risk_score: int) -> dict[str, Any]: + """Build the shields.io-endpoint payload for a risk-score badge.""" + return { + "schemaVersion": SHIELDS_SCHEMA_VERSION, + "label": "risk score", + "message": f"{risk_score}/100", + "color": _risk_color(risk_score), + "cacheSeconds": DEFAULT_CACHE_SECONDS, + } + + +def _estimate_text_width(text: str) -> int: + """Conservative estimate of rendered text width in pixels for 11px Verdana. + + Shields.io measures glyphs precisely; we just need stable, deterministic + output. ~7px/char rounded up with a small padding works well in practice + across short labels and numeric scores. + """ + return max(8, int(len(text) * 7) + 10) + + +def _flat_badge_svg(label: str, message: str, color_hex: str) -> str: + """Render a two-rect flat-style badge as inline SVG.""" + label_w = _estimate_text_width(label) + message_w = _estimate_text_width(message) + total_w = label_w + message_w + label_mid = label_w / 2 + message_mid = label_w + message_w / 2 + return ( + f'' + f"{label}: {message}" + '' + '' + '' + "" + f'' + '' + f'' + f'' + f'' + "" + '' + f'' + f'{label}' + f'' + f'{message}' + "" + ) + + +def verdict_badge_svg(verdict: str) -> str: + """Render the verdict badge as a self-contained SVG string.""" + color = _NAMED_HEX[_VERDICT_COLORS.get(verdict, "lightgrey")] + return _flat_badge_svg("skill scan", verdict, color) + + +def risk_badge_svg(risk_score: int) -> str: + """Render the risk-score badge as a self-contained SVG string.""" + color = _NAMED_HEX[_risk_color(risk_score)] + return _flat_badge_svg("risk score", f"{risk_score}/100", color) diff --git a/scanner/cli.py b/scanner/cli.py index 078c702..ded485e 100644 --- a/scanner/cli.py +++ b/scanner/cli.py @@ -21,7 +21,7 @@ import click import yaml -from . import __version__, aggregate, combine, history +from . import __version__, aggregate, api, combine, history from . import enumerate as enumerate_mod REPO_ROOT = Path(__file__).resolve().parent.parent @@ -312,5 +312,63 @@ def index_history_cmd(history_dir: Path, output: Path | None) -> None: click.echo(f"wrote {target} ({len(manifest['entries'])} snapshots)") +@main.command("build-api-v1") +@click.argument( + "report_path", + type=click.Path(path_type=Path, exists=True, dir_okay=False), +) +@click.option( + "--output", + "output_dir", + type=click.Path(path_type=Path, file_okay=False), + required=True, + help="Directory to write the api/v1 tree into. Created if missing.", +) +@click.option( + "--public-base-url", + required=True, + help=( + "Public URL the v1 API is served from (e.g. " + "https://coder.github.io/coder-skill-scanner). Used to build the " + "absolute report_url, source_tree, and badge links inside the JSON " + "payloads. No trailing /api/v1." + ), +) +@click.option( + "--history-index", + type=click.Path(path_type=Path, exists=True, dir_okay=False), + default=None, + help=( + "Path to history/index.json. When provided, the command also writes " + "api/v1/history.json reshaped from that manifest." + ), +) +def build_api_v1_cmd( + report_path: Path, + output_dir: Path, + public_base_url: str, + history_index: Path | None, +) -> None: + """Build the v1 public API tree from REPORT_PATH (typically latest.json). + + Writes skills.json, per-skill detail JSONs, and four badge endpoints per + skill (verdict.json/svg, risk.json/svg). When --history-index is given, + also writes history.json reshaped into the API-v1 contract. + """ + with report_path.open(encoding="utf-8") as fh: + report = json.load(fh) + history_manifest = None + if history_index is not None: + with history_index.open(encoding="utf-8") as fh: + history_manifest = json.load(fh) + written = api.write_api_v1( + report, + output_dir=output_dir, + public_base_url=public_base_url, + history_manifest=history_manifest, + ) + click.echo(f"wrote {len(written)} files under {output_dir}") + + if __name__ == "__main__": main() diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..3af1d21 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,168 @@ +"""Tests for the v1 public API builder.""" + +from __future__ import annotations + +import json +from pathlib import Path + +from scanner import api + + +def _skill(slug: str, *, verdict: str = "clean", risk: int = 5) -> dict: + return { + "namespace": "coder", + "slug": slug, + "source_repo": "coder/skills", + "source_ref": "main", + "source_sha": "a" * 40, + "skill_path": f"skills/{slug}", + "scanned_at": "2026-01-02T03:04:05Z", + "verdict": verdict, + "reasons": [] if verdict == "clean" else [f"risk_score={risk}"], + "scanners": { + "skillspector": { + "crashed": False, + "json_missing": False, + "risk_score": risk, + "risk_severity": "low" if verdict == "clean" else "high", + "risk_recommendation": "", + "findings_by_severity": {"medium": 1} if risk else {}, + "findings_by_rule": [{"id": "LP3", "severity": "medium", "count": 1}] if risk else [], + } + }, + "artifacts": {}, + } + + +def _report(skills: list[dict]) -> dict: + return { + "schema_version": "1.0.0", + "generated_at": "2026-01-02T03:04:00Z", + "summary": { + "namespaces": 1, + "sources": 1, + "skills_scanned": len(skills), + "verdicts": {"clean": 0, "suspicious": 0, "malicious": 0, "unknown": 0}, + }, + "skills": skills, + } + + +def test_skills_index_compact_shape(): + report = _report([_skill("modules", risk=21), _skill("setup", verdict="malicious", risk=100)]) + out = api.build_skills_index(report) + + assert out["schema_version"] == 1 + assert out["generated_at"] == "2026-01-02T03:04:00Z" + assert [s["slug"] for s in out["skills"]] == ["modules", "setup"] + + first = out["skills"][0] + assert set(first.keys()) == { + "namespace", + "slug", + "verdict", + "risk_score", + "source_repo", + "source_sha", + "scanned_at", + } + assert first["risk_score"] == 21 + assert first["source_sha"] == "a" * 40 + assert "source_ref" not in first, "index payload must not leak mutable source_ref" + + +def test_skill_detail_uses_source_sha_for_tree_url(): + skill = _skill("setup", verdict="malicious", risk=87) + detail = api.build_skill_detail( + skill, + public_base_url="https://example.com/scanner", + report_url="https://example.com/scanner/latest.json", + ) + + assert detail["schema_version"] == 1 + assert detail["verdict"] == "malicious" + assert detail["risk_severity"] == "high" + expected_tree = f"https://github.com/coder/skills/tree/{'a' * 40}/skills/setup" + assert detail["links"]["source_tree"] == expected_tree + assert "main" not in detail["links"]["source_tree"], "source_tree must not pin to a mutable ref" + + badges = detail["links"] + assert badges["verdict_badge_json"].endswith("/api/v1/skills/coder/setup/badge/verdict.json") + assert badges["verdict_badge_svg"].endswith("/api/v1/skills/coder/setup/badge/verdict.svg") + assert badges["risk_badge_json"].endswith("/api/v1/skills/coder/setup/badge/risk.json") + assert badges["risk_badge_svg"].endswith("/api/v1/skills/coder/setup/badge/risk.svg") + assert detail["links"]["report"] == "https://example.com/scanner/latest.json" + + +def test_history_index_attaches_absolute_report_urls(): + manifest = { + "entries": [ + { + "stamp": "2026-01-02T03-04Z", + "generated_at": "2026-01-02T03:04:00Z", + "summary": {"skills_scanned": 3}, + "path": "history/2026-01-02/2026-01-02T03-04Z.json", + }, + ], + } + out = api.build_history_index(manifest, public_base_url="https://example.com/scanner") + assert out["schema_version"] == 1 + assert len(out["entries"]) == 1 + assert ( + out["entries"][0]["report_url"] + == "https://example.com/scanner/history/2026-01-02/2026-01-02T03-04Z.json" + ) + + +def test_write_api_v1_writes_full_tree(tmp_path: Path): + report = _report( + [ + _skill("modules", risk=21), + _skill("setup", verdict="malicious", risk=100), + _skill("templates", risk=0), + ] + ) + written = api.write_api_v1( + report, + output_dir=tmp_path, + public_base_url="https://example.com/scanner", + history_manifest={"entries": []}, + ) + + # 1 (skills.json) + 3 skills * (1 detail + 4 badge files) + 1 (history.json) = 17. + assert len(written) == 17 + + # Index validates as parseable JSON, has the right schema_version. + idx = json.loads((tmp_path / "skills.json").read_text()) + assert idx["schema_version"] == 1 + assert {s["slug"] for s in idx["skills"]} == {"modules", "setup", "templates"} + + # Per-skill JSON exists and has badge link pointing back at our base URL. + setup_detail = json.loads((tmp_path / "skills" / "coder" / "setup.json").read_text()) + assert setup_detail["verdict"] == "malicious" + + # Badge files exist and the SVGs are well-formed strings. + verdict_svg = (tmp_path / "skills" / "coder" / "setup" / "badge" / "verdict.svg").read_text() + assert verdict_svg.startswith("") + assert "malicious" in verdict_svg + + risk_json = json.loads( + (tmp_path / "skills" / "coder" / "setup" / "badge" / "risk.json").read_text() + ) + assert risk_json["message"] == "100/100" + assert risk_json["color"] == "red" + + # history.json got written when a manifest was passed. + hist = json.loads((tmp_path / "history.json").read_text()) + assert hist["schema_version"] == 1 + + +def test_write_api_v1_skips_history_when_not_provided(tmp_path: Path): + report = _report([_skill("modules")]) + written = api.write_api_v1( + report, output_dir=tmp_path, public_base_url="https://example.com/scanner" + ) + # 1 (skills.json) + 1 * (1 detail + 4 badge files) = 6 + assert len(written) == 6 + assert not (tmp_path / "history.json").exists() diff --git a/tests/test_badges.py b/tests/test_badges.py new file mode 100644 index 0000000..c743f71 --- /dev/null +++ b/tests/test_badges.py @@ -0,0 +1,76 @@ +"""Tests for the v1 badge generators (shields.io endpoint JSON + inline SVG).""" + +from __future__ import annotations + +import re + +from scanner import badges + + +def test_verdict_badge_json_colors_by_state(): + assert badges.verdict_badge_json("clean")["color"] == "brightgreen" + assert badges.verdict_badge_json("suspicious")["color"] == "yellow" + assert badges.verdict_badge_json("malicious")["color"] == "red" + assert badges.verdict_badge_json("unknown")["color"] == "lightgrey" + # Unrecognised verdict falls back to lightgrey rather than crashing. + assert badges.verdict_badge_json("not-a-real-verdict")["color"] == "lightgrey" + + +def test_verdict_badge_json_carries_shields_contract(): + payload = badges.verdict_badge_json("clean") + assert payload["schemaVersion"] == 1 + assert payload["label"] == "skill scan" + assert payload["message"] == "clean" + assert payload["cacheSeconds"] == 300 + + +def test_risk_badge_color_bands(): + # Bands aligned to the 21/51/81 cutoffs the verdict policy uses. + assert badges.risk_badge_json(0)["color"] == "brightgreen" + assert badges.risk_badge_json(20)["color"] == "brightgreen" + assert badges.risk_badge_json(21)["color"] == "yellowgreen" + assert badges.risk_badge_json(50)["color"] == "yellowgreen" + assert badges.risk_badge_json(51)["color"] == "yellow" + assert badges.risk_badge_json(80)["color"] == "yellow" + assert badges.risk_badge_json(81)["color"] == "red" + assert badges.risk_badge_json(100)["color"] == "red" + + +def test_risk_badge_json_carries_shields_contract(): + payload = badges.risk_badge_json(42) + assert payload["schemaVersion"] == 1 + assert payload["label"] == "risk score" + assert payload["message"] == "42/100" + + +def test_verdict_badge_svg_is_well_formed(): + svg = badges.verdict_badge_svg("clean") + assert svg.startswith("") + # Two rects (label background + message background). + assert svg.count("= 2 + # The verdict text appears in both the title and one of the nodes. + assert "clean" in svg + # Verdict colour bleeds through as a hex fill. + assert "#4c1" in svg, "clean verdict should use the brightgreen hex" + + +def test_risk_badge_svg_color_threshold(): + high = badges.risk_badge_svg(95) + low = badges.risk_badge_svg(5) + # Red vs brightgreen hex. + assert "#e05d44" in high + assert "#4c1" in low + # Both have the score string. + assert "95/100" in high + assert "5/100" in low + + +def test_svg_width_grows_with_message_length(): + """Width estimation has to widen for longer text or the badge clips.""" + short = badges.verdict_badge_svg("clean") + long_ = badges.verdict_badge_svg("suspicious") + # Pull the width attribute from the opening tag. + sw = int(re.search(r'width="(\d+)"', short).group(1)) + lw = int(re.search(r'width="(\d+)"', long_).group(1)) + assert lw > sw From 5e237dcc70d9af28c2079e420d67c8572ae34169 Mon Sep 17 00:00:00 2001 From: DevCats Date: Wed, 24 Jun 2026 20:28:21 +0000 Subject: [PATCH 2/7] feat(scan.yaml): wire scanner build-api-v1 into Pages publish job Runs after index-history so the v1 history.json can mirror the same manifest. Derives the public base URL from $GITHUB_REPOSITORY_OWNER and the repo short name so forks get the right prefix automatically (mirrors the Vite build's GITHUB_REPOSITORY-derived base path from PR #11). --- .github/workflows/scan.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/scan.yaml b/.github/workflows/scan.yaml index f0ebf3d..f8c9eda 100644 --- a/.github/workflows/scan.yaml +++ b/.github/workflows/scan.yaml @@ -322,6 +322,17 @@ jobs: # Regenerate the manifest so the React app sees every retained run. scanner index-history pages/history --output pages/history/index.json + + # Build the stable v1 API surface (skills.json, per-skill detail, + # badge endpoints, history.json). Derive the public base URL from + # the runtime publishing context so forks get the right prefix + # automatically; the Vite build uses the same logic. + repo_short="${GITHUB_REPOSITORY#*/}" + public_base_url="https://${GITHUB_REPOSITORY_OWNER}.github.io/${repo_short}" + scanner build-api-v1 latest.json \ + --output pages/api/v1 \ + --public-base-url "${public_base_url}" \ + --history-index pages/history/index.json - name: Upload Pages artifact uses: actions/upload-pages-artifact@fc324d3547104276b827a68afc52ff2a11cc49c9 # v5.0.0 with: From db91dc95fa05f1b8b36ba18a6a1b409b8e634a2a Mon Sep 17 00:00:00 2001 From: DevCats Date: Wed, 24 Jun 2026 20:28:21 +0000 Subject: [PATCH 3/7] fix(site): pin 'open in upstream' links to source_sha, not source_ref SkillTable and SkillDetailPage built the 'open this skill at the scan revision' link with source_ref (e.g. 'main'), which is a moving target -- clicking the link a week after the scan can land on a different tree. Use source_sha, the immutable commit the scan was actually run against. --- site/src/components/SkillTable/SkillTable.tsx | 2 +- site/src/pages/SkillDetailPage.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/site/src/components/SkillTable/SkillTable.tsx b/site/src/components/SkillTable/SkillTable.tsx index 7fe1f9c..be0431f 100644 --- a/site/src/components/SkillTable/SkillTable.tsx +++ b/site/src/components/SkillTable/SkillTable.tsx @@ -92,7 +92,7 @@ export const SkillTable: FC = ({ ).reduce((a, b) => a + b, 0); const srcHref = sourceRepoUrl( s.source_repo, - s.source_ref, + s.source_sha, s.skill_path, ); const detailHref = `${detailLinkBase}/${s.namespace}/${s.slug}`; diff --git a/site/src/pages/SkillDetailPage.tsx b/site/src/pages/SkillDetailPage.tsx index c7db750..085b1c5 100644 --- a/site/src/pages/SkillDetailPage.tsx +++ b/site/src/pages/SkillDetailPage.tsx @@ -105,7 +105,7 @@ export const SkillDetailPage: FC = () => { ); const srcHref = sourceRepoUrl( skill.source_repo, - skill.source_ref, + skill.source_sha, skill.skill_path, ); From 66a397c0fa3778ec050a39b65a6034f809c45e2e Mon Sep 17 00:00:00 2001 From: DevCats Date: Wed, 24 Jun 2026 20:32:07 +0000 Subject: [PATCH 4/7] style: split long lines flagged by ruff E501 scanner/api.py write_text calls and tests/test_api.py findings_by_rule literal were >100 cols; reformatted with no behaviour change. All 49 tests still pass. --- scanner/api.py | 10 ++++++++-- tests/test_api.py | 4 +++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/scanner/api.py b/scanner/api.py index 63ea740..886c0bb 100644 --- a/scanner/api.py +++ b/scanner/api.py @@ -186,13 +186,19 @@ def write_api_v1( ss = (skill.get("scanners") or {}).get("skillspector") or {} risk = int(ss.get("risk_score", 0)) v_json = badge_dir / "verdict.json" - v_json.write_text(json.dumps(badges.verdict_badge_json(verdict), indent=2) + "\n", encoding="utf-8") + v_json.write_text( + json.dumps(badges.verdict_badge_json(verdict), indent=2) + "\n", + encoding="utf-8", + ) written.append(v_json) v_svg = badge_dir / "verdict.svg" v_svg.write_text(badges.verdict_badge_svg(verdict), encoding="utf-8") written.append(v_svg) r_json = badge_dir / "risk.json" - r_json.write_text(json.dumps(badges.risk_badge_json(risk), indent=2) + "\n", encoding="utf-8") + r_json.write_text( + json.dumps(badges.risk_badge_json(risk), indent=2) + "\n", + encoding="utf-8", + ) written.append(r_json) r_svg = badge_dir / "risk.svg" r_svg.write_text(badges.risk_badge_svg(risk), encoding="utf-8") diff --git a/tests/test_api.py b/tests/test_api.py index 3af1d21..a92f7f2 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -27,7 +27,9 @@ def _skill(slug: str, *, verdict: str = "clean", risk: int = 5) -> dict: "risk_severity": "low" if verdict == "clean" else "high", "risk_recommendation": "", "findings_by_severity": {"medium": 1} if risk else {}, - "findings_by_rule": [{"id": "LP3", "severity": "medium", "count": 1}] if risk else [], + "findings_by_rule": ( + [{"id": "LP3", "severity": "medium", "count": 1}] if risk else [] + ), } }, "artifacts": {}, From a7ab62bb7dc33b10eed5089d318da95513a9060c Mon Sep 17 00:00:00 2001 From: DevCats Date: Wed, 24 Jun 2026 20:37:31 +0000 Subject: [PATCH 5/7] security(api,badges): XML-escape SVG inputs + validate path segments Two defence-in-depth fixes for Copilot review feedback on PR #12. 1. scanner/badges.py: _flat_badge_svg now XML-escapes label and message before interpolating into SVG attribute and text contexts. Verdict and risk inputs never contain markup characters today, but escaping removes any SVG-injection or malformed-output path if the input shape ever drifts. 2. scanner/api.py: write_api_v1 now validates skill namespace and slug against ^[A-Za-z0-9][A-Za-z0-9._-]*$ before using them as filesystem path components. Rejects path traversal (../) and absolute paths in a malformed latest.json. Plus two regression-guard tests each. 52/52 pytest, ruff clean. Real-payload smoke run produces byte-identical output (no real input has markup characters). --- scanner/api.py | 18 ++++++++++++++++-- scanner/badges.py | 27 +++++++++++++++++++++------ tests/test_api.py | 20 ++++++++++++++++++++ tests/test_badges.py | 15 +++++++++++++++ 4 files changed, 72 insertions(+), 8 deletions(-) diff --git a/scanner/api.py b/scanner/api.py index 886c0bb..b0e30f3 100644 --- a/scanner/api.py +++ b/scanner/api.py @@ -25,6 +25,7 @@ from __future__ import annotations import json +import re from datetime import UTC, datetime from pathlib import Path from typing import Any @@ -33,6 +34,19 @@ API_SCHEMA_VERSION = 1 +# Filesystem-safe identifier shape for ``namespace`` and ``slug`` segments. +# Skill IDs in the registry are kebab-case ASCII; rejecting anything else here +# is defence-in-depth against a malformed report writing outside ``output_dir`` +# via ``../`` or absolute paths. +_SAFE_SEGMENT = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$") + + +def _check_safe_segment(kind: str, value: str) -> str: + """Return ``value`` unchanged if it is a safe path segment, else raise.""" + if not isinstance(value, str) or not _SAFE_SEGMENT.fullmatch(value): + raise ValueError(f"unsafe {kind} segment: {value!r}") + return value + def _source_tree_url(skill: dict[str, Any]) -> str: """Return the immutable ``github.com//tree//`` link. @@ -172,8 +186,8 @@ def write_api_v1( written.append(skills_index_path) for skill in report.get("skills", []): - ns = skill["namespace"] - slug = skill["slug"] + ns = _check_safe_segment("namespace", skill["namespace"]) + slug = _check_safe_segment("slug", skill["slug"]) detail = build_skill_detail(skill, public_base_url=public_base_url, report_url=report_url) detail_path = output_dir / "skills" / ns / f"{slug}.json" detail_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/scanner/badges.py b/scanner/badges.py index a67817c..af4ed94 100644 --- a/scanner/badges.py +++ b/scanner/badges.py @@ -14,6 +14,7 @@ from __future__ import annotations from typing import Any +from xml.sax.saxutils import escape as _xml_escape SHIELDS_SCHEMA_VERSION = 1 @@ -87,6 +88,18 @@ def _estimate_text_width(text: str) -> int: return max(8, int(len(text) * 7) + 10) +def _xml_safe(s: str) -> str: + """Escape a string for both SVG attribute and text contexts. + + The badge inputs (verdict labels, risk-score strings) are produced by the + scanner and currently never contain markup characters, but the SVG is + served to third-party README consumers; defending against ``&``, ``<``, + ``>``, ``\"`` keeps the output well-formed and removes any path to SVG + injection if the input shape ever drifts. + """ + return _xml_escape(s, {'"': """}) + + def _flat_badge_svg(label: str, message: str, color_hex: str) -> str: """Render a two-rect flat-style badge as inline SVG.""" label_w = _estimate_text_width(label) @@ -94,11 +107,13 @@ def _flat_badge_svg(label: str, message: str, color_hex: str) -> str: total_w = label_w + message_w label_mid = label_w / 2 message_mid = label_w + message_w / 2 + label_xml = _xml_safe(label) + message_xml = _xml_safe(message) return ( f'' - f"{label}: {message}" + f'aria-label="{label_xml}: {message_xml}">' + f"{label_xml}: {message_xml}" '' '' '' @@ -114,14 +129,14 @@ def _flat_badge_svg(label: str, message: str, color_hex: str) -> str: 'text-rendering="geometricPrecision" font-size="110">' f'' + f'{(label_w - 10) * 10:.0f}">{label_xml}' f'{label}' + f'fill="#fff" textLength="{(label_w - 10) * 10:.0f}">{label_xml}' f'' + f'{(message_w - 10) * 10:.0f}">{message_xml}' f'{message}' + f'fill="#fff" textLength="{(message_w - 10) * 10:.0f}">{message_xml}' "" ) diff --git a/tests/test_api.py b/tests/test_api.py index a92f7f2..ee62799 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -5,6 +5,8 @@ import json from pathlib import Path +import pytest + from scanner import api @@ -168,3 +170,21 @@ def test_write_api_v1_skips_history_when_not_provided(tmp_path: Path): # 1 (skills.json) + 1 * (1 detail + 4 badge files) = 6 assert len(written) == 6 assert not (tmp_path / "history.json").exists() + + +def test_write_api_v1_rejects_path_traversal_namespace(tmp_path: Path): + report = _report([_skill("modules")]) + report["skills"][0]["namespace"] = "../evil" + with pytest.raises(ValueError, match="unsafe namespace"): + api.write_api_v1( + report, output_dir=tmp_path, public_base_url="https://example.com/x" + ) + + +def test_write_api_v1_rejects_path_traversal_slug(tmp_path: Path): + report = _report([_skill("modules")]) + report["skills"][0]["slug"] = "ok/../escape" + with pytest.raises(ValueError, match="unsafe slug"): + api.write_api_v1( + report, output_dir=tmp_path, public_base_url="https://example.com/x" + ) diff --git a/tests/test_badges.py b/tests/test_badges.py index c743f71..2d4b8e2 100644 --- a/tests/test_badges.py +++ b/tests/test_badges.py @@ -74,3 +74,18 @@ def test_svg_width_grows_with_message_length(): sw = int(re.search(r'width="(\d+)"', short).group(1)) lw = int(re.search(r'width="(\d+)"', long_).group(1)) assert lw > sw + + +def test_svg_escapes_markup_in_label_and_message(): + """The SVG renderer must escape XML special chars; the public surface only + ever passes shape-constrained inputs, but defense in depth keeps the badge + well-formed if the input shape ever drifts.""" + raw = badges._flat_badge_svg('