diff --git a/.github/workflows/scan.yaml b/.github/workflows/scan.yaml index f0ebf3d..f8c9eda 100644 --- a/.github/workflows/scan.yaml +++ b/.github/workflows/scan.yaml @@ -322,6 +322,17 @@ jobs: # Regenerate the manifest so the React app sees every retained run. scanner index-history pages/history --output pages/history/index.json + + # Build the stable v1 API surface (skills.json, per-skill detail, + # badge endpoints, history.json). Derive the public base URL from + # the runtime publishing context so forks get the right prefix + # automatically; the Vite build uses the same logic. + repo_short="${GITHUB_REPOSITORY#*/}" + public_base_url="https://${GITHUB_REPOSITORY_OWNER}.github.io/${repo_short}" + scanner build-api-v1 latest.json \ + --output pages/api/v1 \ + --public-base-url "${public_base_url}" \ + --history-index pages/history/index.json - name: Upload Pages artifact uses: actions/upload-pages-artifact@fc324d3547104276b827a68afc52ff2a11cc49c9 # v5.0.0 with: diff --git a/README.md b/README.md index 99d198e..629cde0 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,49 @@ Stable URLs, no auth required: + Schema: `https://coder.github.io/coder-skill-scanner/schema.json` (v1) + Per-scan history (JSON): `https://coder.github.io/coder-skill-scanner/history/index.json` +## Public API (v1) + +Under `/api/v1/`, every URL is constructible from `(namespace, slug)` alone +— no lookup against the index is required to render a badge or read a +single skill. Field names and URL shapes are committed to the `v1` +stability contract; breaking changes move to a `v2` prefix. + +| URL | Shape | Use | +| --- | --- | --- | +| `/api/v1/index.json` | discovery manifest: URL templates + current `(ns, slug)` pairs | bootstrap a third-party consumer | +| `/api/v1/skills.json` | compact index of every skill | listing / cache warmer | +| `/api/v1/skills//.json` | per-skill detail (reasons, findings, `links` block) | per-skill consumer | +| `/api/v1/skills///badge/status.json` | shields.io endpoint payload | `img.shields.io/endpoint?url=...` | +| `/api/v1/skills///badge/status.svg` | inline SVG | direct embed | +| `/api/v1/skills///badge/score.json` | shields.io endpoint payload | same | +| `/api/v1/skills///badge/score.svg` | inline SVG | direct embed | +| `/api/v1/history.json` | reshape of history with absolute report URLs | history consumer | + +Two badges per skill: + ++ **`status`** — the categorical scan outcome (`clean`, `suspicious`, + `malicious`, `unknown`). Colour follows the verdict 1:1. ++ **`score`** — the numeric SkillSpector risk score (`0/100` … `100/100`). + Colour is banded at the same 21 / 51 / 81 cutoffs the verdict policy + uses. + +Embed a status badge in a README: + +```markdown +![skill scan](https://coder.github.io/coder-skill-scanner/api/v1/skills/coder/setup/badge/status.svg) +``` + +Or via shields.io if you want their renderer: + +```markdown +![skill scan](https://img.shields.io/endpoint?url=https://coder.github.io/coder-skill-scanner/api/v1/skills/coder/setup/badge/status.json) +``` + +For a fork, swap the host: `https://.github.io//api/v1/...`. +The scanner derives the public base URL from `$GITHUB_REPOSITORY` at +publish time, so the same URL pattern is correct for any fork without +config changes. + ## Running locally Requires Python 3.12+, Node 22+ (via `mise`), pnpm, and `git`. diff --git a/scanner/api.py b/scanner/api.py new file mode 100644 index 0000000..94b8c48 --- /dev/null +++ b/scanner/api.py @@ -0,0 +1,292 @@ +"""Build the v1 public API surface from a generated ``latest.json``. + +The v1 contract: + +- ``api/v1/index.json`` Discovery manifest. Lists the URL + templates (with ``{namespace}`` and + ``{slug}`` placeholders) needed to + address every other endpoint, plus the + current ``(namespace, slug)`` pairs. + Bootstrap entry point for third-party + consumers - no other fetch required to + learn the URL conventions. +- ``api/v1/skills.json`` Compact index of every skill in the most + recent scan: namespace, slug, verdict, + risk_score, source_repo, source_sha, + scanned_at. Lightweight enough to fetch + on every page render. +- ``api/v1/skills//.json`` Per-skill detail with the same fields + plus reasons, findings by severity and + rule, and a ``links`` object pointing + at the badge endpoints and the + immutable source-tree URL. +- ``api/v1/skills///badge/status.{json,svg}`` + Categorical scan-outcome badge + (clean/suspicious/malicious/unknown). + Directly addressable from the + ``(namespace, slug)`` pair - no detail + fetch required. +- ``api/v1/skills///badge/score.{json,svg}`` + Numeric risk-score badge (0-100, + colour-banded). Same direct-addressing + contract as ``status``. +- ``api/v1/history.json`` Reshape of ``history/index.json`` into + a versioned shape with absolute + ``report_url`` fields so consumers do + not have to know the Pages layout. + +Stability: once shipped, ``v1`` field names and shapes do not change. New +optional fields are allowed. Removed or renamed fields require a ``v2`` prefix +with a deprecation window on ``v1``. +""" + +from __future__ import annotations + +import json +import re +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from . import badges + +API_SCHEMA_VERSION = 1 + +# Filesystem-safe identifier shape for ``namespace`` and ``slug`` segments. +# Skill IDs in the registry are kebab-case ASCII; rejecting anything else here +# is defence-in-depth against a malformed report writing outside ``output_dir`` +# via ``../`` or absolute paths. +_SAFE_SEGMENT = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$") + + +def _check_safe_segment(kind: str, value: str) -> str: + """Return ``value`` unchanged if it is a safe path segment, else raise.""" + if not isinstance(value, str) or not _SAFE_SEGMENT.fullmatch(value): + raise ValueError(f"unsafe {kind} segment: {value!r}") + return value + + +def _source_tree_url(skill: dict[str, Any]) -> str: + """Return the immutable ``github.com//tree//`` link. + + Always uses ``source_sha`` (not ``source_ref``) so the link survives + upstream branch movement. Falls back to the bare repo URL when the SHA is + missing. + """ + repo = skill.get("source_repo") + sha = skill.get("source_sha") + path = (skill.get("skill_path") or "").lstrip("/") + if not repo: + return "" + if not sha: + return f"https://github.com/{repo}" + suffix = f"/{path}" if path else "" + return f"https://github.com/{repo}/tree/{sha}{suffix}" + + +def _badge_links(public_base_url: str, namespace: str, slug: str) -> dict[str, str]: + """Return the four badge URLs for a single skill.""" + root = public_base_url.rstrip("/") + return { + "status_badge_json": f"{root}/api/v1/skills/{namespace}/{slug}/badge/status.json", + "status_badge_svg": f"{root}/api/v1/skills/{namespace}/{slug}/badge/status.svg", + "score_badge_json": f"{root}/api/v1/skills/{namespace}/{slug}/badge/score.json", + "score_badge_svg": f"{root}/api/v1/skills/{namespace}/{slug}/badge/score.svg", + } + + +def _skill_index_entry(skill: dict[str, Any]) -> dict[str, Any]: + """Compact shape for the index.""" + ss = (skill.get("scanners") or {}).get("skillspector") or {} + return { + "namespace": skill["namespace"], + "slug": skill["slug"], + "verdict": skill["verdict"], + "risk_score": ss.get("risk_score", 0), + "source_repo": skill.get("source_repo", ""), + "source_sha": skill.get("source_sha", ""), + "scanned_at": skill.get("scanned_at", ""), + } + + +def build_skills_index( + report: dict[str, Any], + *, + generated_at: str | None = None, +) -> dict[str, Any]: + """Build the ``api/v1/skills.json`` payload.""" + skills = sorted( + (_skill_index_entry(s) for s in report.get("skills", [])), + key=lambda r: (r["namespace"], r["slug"]), + ) + return { + "schema_version": API_SCHEMA_VERSION, + "generated_at": generated_at or report.get("generated_at", ""), + "summary": report.get("summary", {}), + "skills": skills, + } + + +def build_skill_detail( + skill: dict[str, Any], + *, + public_base_url: str, + report_url: str, +) -> dict[str, Any]: + """Build a single ``api/v1/skills//.json`` payload.""" + ss = (skill.get("scanners") or {}).get("skillspector") or {} + return { + "schema_version": API_SCHEMA_VERSION, + "namespace": skill["namespace"], + "slug": skill["slug"], + "verdict": skill["verdict"], + "risk_score": ss.get("risk_score", 0), + "risk_severity": ss.get("risk_severity", "unknown"), + "source_repo": skill.get("source_repo", ""), + "source_ref": skill.get("source_ref", ""), + "source_sha": skill.get("source_sha", ""), + "skill_path": skill.get("skill_path", ""), + "scanned_at": skill.get("scanned_at", ""), + "reasons": skill.get("reasons", []), + "findings_by_severity": ss.get("findings_by_severity", {}), + "findings_by_rule": ss.get("findings_by_rule", []), + "links": { + "report": report_url, + "source_tree": _source_tree_url(skill), + **_badge_links(public_base_url, skill["namespace"], skill["slug"]), + }, + } + + +def build_history_index( + history_manifest: dict[str, Any], + *, + public_base_url: str, + generated_at: str | None = None, +) -> dict[str, Any]: + """Reshape ``history/index.json`` into the versioned API shape.""" + root = public_base_url.rstrip("/") + entries = [] + for entry in history_manifest.get("entries", []): + rel = entry.get("path", "").lstrip("/") + entries.append( + { + "stamp": entry.get("stamp", ""), + "generated_at": entry.get("generated_at", ""), + "summary": entry.get("summary", {}), + "report_url": f"{root}/{rel}" if rel else "", + } + ) + return { + "schema_version": API_SCHEMA_VERSION, + "generated_at": generated_at or datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ"), + "entries": entries, + } + + +def build_v1_index( + report: dict[str, Any], + *, + public_base_url: str, + has_history: bool, + generated_at: str | None = None, +) -> dict[str, Any]: + """Build the ``api/v1/index.json`` discovery manifest. + + The manifest lists the URL templates a third-party consumer needs to + address every endpoint without first parsing ``skills.json``, plus the + current ``(namespace, slug)`` pairs so a programmatic consumer can iterate + without a second fetch. Field shapes are part of the v1 contract. + """ + root = public_base_url.rstrip("/") + api_root = f"{root}/api/v1" + skills = sorted( + ({"namespace": s["namespace"], "slug": s["slug"]} for s in report.get("skills", [])), + key=lambda s: (s["namespace"], s["slug"]), + ) + urls = { + "skills_index": f"{api_root}/skills.json", + "skill_detail": f"{api_root}/skills/{{namespace}}/{{slug}}.json", + "status_badge_json": f"{api_root}/skills/{{namespace}}/{{slug}}/badge/status.json", + "status_badge_svg": f"{api_root}/skills/{{namespace}}/{{slug}}/badge/status.svg", + "score_badge_json": f"{api_root}/skills/{{namespace}}/{{slug}}/badge/score.json", + "score_badge_svg": f"{api_root}/skills/{{namespace}}/{{slug}}/badge/score.svg", + } + if has_history: + urls["history"] = f"{api_root}/history.json" + return { + "schema_version": API_SCHEMA_VERSION, + "generated_at": generated_at or report.get("generated_at", ""), + "urls": urls, + "skills": skills, + } + + +def write_api_v1( + report: dict[str, Any], + *, + output_dir: Path, + public_base_url: str, + history_manifest: dict[str, Any] | None = None, +) -> list[Path]: + """Write the full v1 API tree under ``output_dir`` and return paths written.""" + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + written: list[Path] = [] + + report_url = f"{public_base_url.rstrip('/')}/latest.json" + + skills_index = build_skills_index(report) + skills_index_path = output_dir / "skills.json" + skills_index_path.write_text(json.dumps(skills_index, indent=2) + "\n", encoding="utf-8") + written.append(skills_index_path) + + discovery = build_v1_index( + report, + public_base_url=public_base_url, + has_history=history_manifest is not None, + ) + discovery_path = output_dir / "index.json" + discovery_path.write_text(json.dumps(discovery, indent=2) + "\n", encoding="utf-8") + written.append(discovery_path) + + for skill in report.get("skills", []): + ns = _check_safe_segment("namespace", skill["namespace"]) + slug = _check_safe_segment("slug", skill["slug"]) + detail = build_skill_detail(skill, public_base_url=public_base_url, report_url=report_url) + detail_path = output_dir / "skills" / ns / f"{slug}.json" + detail_path.parent.mkdir(parents=True, exist_ok=True) + detail_path.write_text(json.dumps(detail, indent=2) + "\n", encoding="utf-8") + written.append(detail_path) + + badge_dir = detail_path.parent / slug / "badge" + badge_dir.mkdir(parents=True, exist_ok=True) + verdict = skill["verdict"] + ss = (skill.get("scanners") or {}).get("skillspector") or {} + risk = int(ss.get("risk_score", 0)) + v_json = badge_dir / "status.json" + v_json.write_text( + json.dumps(badges.status_badge_json(verdict), indent=2) + "\n", + encoding="utf-8", + ) + written.append(v_json) + v_svg = badge_dir / "status.svg" + v_svg.write_text(badges.status_badge_svg(verdict), encoding="utf-8") + written.append(v_svg) + r_json = badge_dir / "score.json" + r_json.write_text( + json.dumps(badges.score_badge_json(risk), indent=2) + "\n", + encoding="utf-8", + ) + written.append(r_json) + r_svg = badge_dir / "score.svg" + r_svg.write_text(badges.score_badge_svg(risk), encoding="utf-8") + written.append(r_svg) + + if history_manifest is not None: + history_api = build_history_index(history_manifest, public_base_url=public_base_url) + history_path = output_dir / "history.json" + history_path.write_text(json.dumps(history_api, indent=2) + "\n", encoding="utf-8") + written.append(history_path) + + return written diff --git a/scanner/badges.py b/scanner/badges.py new file mode 100644 index 0000000..06bd189 --- /dev/null +++ b/scanner/badges.py @@ -0,0 +1,163 @@ +"""SVG and shields.io-endpoint JSON badge generators. + +The JSON shape matches shields.io's ``endpoint`` badge contract so a consumer +can embed ``https://img.shields.io/endpoint?url=`` directly in +a README. The SVG endpoint generates a flat-style badge inline (no shields.io +dependency, no network hop) for sites that want the badge served from the +same origin as the rest of the report. + +Stability: ``schemaVersion``, ``label``, ``message``, ``color``, and +``cacheSeconds`` are part of the v1 contract. The SVG layout (two-rect flat +badge, 11px Verdana) is the contract for ``.svg`` consumers. +""" + +from __future__ import annotations + +from typing import Any +from xml.sax.saxutils import escape as _xml_escape + +SHIELDS_SCHEMA_VERSION = 1 + +# Cache hint for shields.io: 5 minutes lines up with the registry-server +# proxy's cache TTL and the catalogue refresh cadence. +DEFAULT_CACHE_SECONDS = 300 + +# Shields.io color names. +_VERDICT_COLORS: dict[str, str] = { + "clean": "brightgreen", + "suspicious": "yellow", + "malicious": "red", + "unknown": "lightgrey", +} + +# Bands for the risk score badge color. Matches the verdict cutoffs in +# ``config.yaml`` (51 = HIGH/suspicious, 81 = CRITICAL/malicious). +def _risk_color(score: int) -> str: + if score >= 81: + return "red" + if score >= 51: + return "yellow" + if score >= 21: + return "yellowgreen" + return "brightgreen" + + +# Hex equivalents used for the SVG renderer. Shields.io's `?color=` accepts +# names; the inline SVG renderer needs raw hex. +_NAMED_HEX: dict[str, str] = { + "brightgreen": "#4c1", + "green": "#97ca00", + "yellowgreen": "#a4a61d", + "yellow": "#dfb317", + "orange": "#fe7d37", + "red": "#e05d44", + "lightgrey": "#9f9f9f", + "blue": "#007ec6", +} + + +def status_badge_json(verdict: str) -> dict[str, Any]: + """Build the shields.io-endpoint payload for the status badge. + + The categorical scan outcome: "clean", "suspicious", "malicious", or + "unknown". Color follows the verdict 1:1 (brightgreen, yellow, red, + lightgrey). Pair with :func:`score_badge_json` for the numeric variant. + """ + return { + "schemaVersion": SHIELDS_SCHEMA_VERSION, + "label": "skill scan", + "message": verdict, + "color": _VERDICT_COLORS.get(verdict, "lightgrey"), + "cacheSeconds": DEFAULT_CACHE_SECONDS, + } + + +def score_badge_json(risk_score: int) -> dict[str, Any]: + """Build the shields.io-endpoint payload for the score badge. + + The numeric SkillSpector risk score (0-100). Color is banded at the + 21 / 51 / 81 cutoffs that drive the verdict policy. Pair with + :func:`status_badge_json` for the categorical variant. + """ + return { + "schemaVersion": SHIELDS_SCHEMA_VERSION, + "label": "risk score", + "message": f"{risk_score}/100", + "color": _risk_color(risk_score), + "cacheSeconds": DEFAULT_CACHE_SECONDS, + } + + +def _estimate_text_width(text: str) -> int: + """Conservative estimate of rendered text width in pixels for 11px Verdana. + + Shields.io measures glyphs precisely; we just need stable, deterministic + output. ~7px/char rounded up with a small padding works well in practice + across short labels and numeric scores. + """ + return max(8, int(len(text) * 7) + 10) + + +def _xml_safe(s: str) -> str: + """Escape a string for both SVG attribute and text contexts. + + The badge inputs (verdict labels, risk-score strings) are produced by the + scanner and currently never contain markup characters, but the SVG is + served to third-party README consumers; defending against ``&``, ``<``, + ``>``, ``\"`` keeps the output well-formed and removes any path to SVG + injection if the input shape ever drifts. + """ + return _xml_escape(s, {'"': """}) + + +def _flat_badge_svg(label: str, message: str, color_hex: str) -> str: + """Render a two-rect flat-style badge as inline SVG.""" + label_w = _estimate_text_width(label) + message_w = _estimate_text_width(message) + total_w = label_w + message_w + label_mid = label_w / 2 + message_mid = label_w + message_w / 2 + label_xml = _xml_safe(label) + message_xml = _xml_safe(message) + return ( + f'' + f"{label_xml}: {message_xml}" + '' + '' + '' + "" + f'' + '' + f'' + f'' + f'' + "" + '' + f'' + f'{label_xml}' + f'' + f'{message_xml}' + "" + ) + + +def status_badge_svg(verdict: str) -> str: + """Render the status badge (categorical scan outcome) as inline SVG.""" + color = _NAMED_HEX[_VERDICT_COLORS.get(verdict, "lightgrey")] + return _flat_badge_svg("skill scan", verdict, color) + + +def score_badge_svg(risk_score: int) -> str: + """Render the score badge (numeric risk score) as inline SVG.""" + color = _NAMED_HEX[_risk_color(risk_score)] + return _flat_badge_svg("risk score", f"{risk_score}/100", color) diff --git a/scanner/cli.py b/scanner/cli.py index 078c702..ded485e 100644 --- a/scanner/cli.py +++ b/scanner/cli.py @@ -21,7 +21,7 @@ import click import yaml -from . import __version__, aggregate, combine, history +from . import __version__, aggregate, api, combine, history from . import enumerate as enumerate_mod REPO_ROOT = Path(__file__).resolve().parent.parent @@ -312,5 +312,63 @@ def index_history_cmd(history_dir: Path, output: Path | None) -> None: click.echo(f"wrote {target} ({len(manifest['entries'])} snapshots)") +@main.command("build-api-v1") +@click.argument( + "report_path", + type=click.Path(path_type=Path, exists=True, dir_okay=False), +) +@click.option( + "--output", + "output_dir", + type=click.Path(path_type=Path, file_okay=False), + required=True, + help="Directory to write the api/v1 tree into. Created if missing.", +) +@click.option( + "--public-base-url", + required=True, + help=( + "Public URL the v1 API is served from (e.g. " + "https://coder.github.io/coder-skill-scanner). Used to build the " + "absolute report_url, source_tree, and badge links inside the JSON " + "payloads. No trailing /api/v1." + ), +) +@click.option( + "--history-index", + type=click.Path(path_type=Path, exists=True, dir_okay=False), + default=None, + help=( + "Path to history/index.json. When provided, the command also writes " + "api/v1/history.json reshaped from that manifest." + ), +) +def build_api_v1_cmd( + report_path: Path, + output_dir: Path, + public_base_url: str, + history_index: Path | None, +) -> None: + """Build the v1 public API tree from REPORT_PATH (typically latest.json). + + Writes skills.json, per-skill detail JSONs, and four badge endpoints per + skill (verdict.json/svg, risk.json/svg). When --history-index is given, + also writes history.json reshaped into the API-v1 contract. + """ + with report_path.open(encoding="utf-8") as fh: + report = json.load(fh) + history_manifest = None + if history_index is not None: + with history_index.open(encoding="utf-8") as fh: + history_manifest = json.load(fh) + written = api.write_api_v1( + report, + output_dir=output_dir, + public_base_url=public_base_url, + history_manifest=history_manifest, + ) + click.echo(f"wrote {len(written)} files under {output_dir}") + + if __name__ == "__main__": main() diff --git a/site/src/components/SkillTable/SkillTable.tsx b/site/src/components/SkillTable/SkillTable.tsx index 7fe1f9c..be0431f 100644 --- a/site/src/components/SkillTable/SkillTable.tsx +++ b/site/src/components/SkillTable/SkillTable.tsx @@ -92,7 +92,7 @@ export const SkillTable: FC = ({ ).reduce((a, b) => a + b, 0); const srcHref = sourceRepoUrl( s.source_repo, - s.source_ref, + s.source_sha, s.skill_path, ); const detailHref = `${detailLinkBase}/${s.namespace}/${s.slug}`; diff --git a/site/src/pages/SkillDetailPage.tsx b/site/src/pages/SkillDetailPage.tsx index c7db750..085b1c5 100644 --- a/site/src/pages/SkillDetailPage.tsx +++ b/site/src/pages/SkillDetailPage.tsx @@ -105,7 +105,7 @@ export const SkillDetailPage: FC = () => { ); const srcHref = sourceRepoUrl( skill.source_repo, - skill.source_ref, + skill.source_sha, skill.skill_path, ); diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..420805f --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,237 @@ +"""Tests for the v1 public API builder.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from scanner import api + + +def _skill(slug: str, *, verdict: str = "clean", risk: int = 5) -> dict: + return { + "namespace": "coder", + "slug": slug, + "source_repo": "coder/skills", + "source_ref": "main", + "source_sha": "a" * 40, + "skill_path": f"skills/{slug}", + "scanned_at": "2026-01-02T03:04:05Z", + "verdict": verdict, + "reasons": [] if verdict == "clean" else [f"risk_score={risk}"], + "scanners": { + "skillspector": { + "crashed": False, + "json_missing": False, + "risk_score": risk, + "risk_severity": "low" if verdict == "clean" else "high", + "risk_recommendation": "", + "findings_by_severity": {"medium": 1} if risk else {}, + "findings_by_rule": ( + [{"id": "LP3", "severity": "medium", "count": 1}] if risk else [] + ), + } + }, + "artifacts": {}, + } + + +def _report(skills: list[dict]) -> dict: + return { + "schema_version": "1.0.0", + "generated_at": "2026-01-02T03:04:00Z", + "summary": { + "namespaces": 1, + "sources": 1, + "skills_scanned": len(skills), + "verdicts": {"clean": 0, "suspicious": 0, "malicious": 0, "unknown": 0}, + }, + "skills": skills, + } + + +def test_skills_index_compact_shape(): + report = _report([_skill("modules", risk=21), _skill("setup", verdict="malicious", risk=100)]) + out = api.build_skills_index(report) + + assert out["schema_version"] == 1 + assert out["generated_at"] == "2026-01-02T03:04:00Z" + assert [s["slug"] for s in out["skills"]] == ["modules", "setup"] + + first = out["skills"][0] + assert set(first.keys()) == { + "namespace", + "slug", + "verdict", + "risk_score", + "source_repo", + "source_sha", + "scanned_at", + } + assert first["risk_score"] == 21 + assert first["source_sha"] == "a" * 40 + assert "source_ref" not in first, "index payload must not leak mutable source_ref" + + +def test_skill_detail_uses_source_sha_for_tree_url(): + skill = _skill("setup", verdict="malicious", risk=87) + detail = api.build_skill_detail( + skill, + public_base_url="https://example.com/scanner", + report_url="https://example.com/scanner/latest.json", + ) + + assert detail["schema_version"] == 1 + assert detail["verdict"] == "malicious" + assert detail["risk_severity"] == "high" + expected_tree = f"https://github.com/coder/skills/tree/{'a' * 40}/skills/setup" + assert detail["links"]["source_tree"] == expected_tree + assert "main" not in detail["links"]["source_tree"], "source_tree must not pin to a mutable ref" + + badges = detail["links"] + assert badges["status_badge_json"].endswith("/api/v1/skills/coder/setup/badge/status.json") + assert badges["status_badge_svg"].endswith("/api/v1/skills/coder/setup/badge/status.svg") + assert badges["score_badge_json"].endswith("/api/v1/skills/coder/setup/badge/score.json") + assert badges["score_badge_svg"].endswith("/api/v1/skills/coder/setup/badge/score.svg") + assert detail["links"]["report"] == "https://example.com/scanner/latest.json" + + +def test_history_index_attaches_absolute_report_urls(): + manifest = { + "entries": [ + { + "stamp": "2026-01-02T03-04Z", + "generated_at": "2026-01-02T03:04:00Z", + "summary": {"skills_scanned": 3}, + "path": "history/2026-01-02/2026-01-02T03-04Z.json", + }, + ], + } + out = api.build_history_index(manifest, public_base_url="https://example.com/scanner") + assert out["schema_version"] == 1 + assert len(out["entries"]) == 1 + assert ( + out["entries"][0]["report_url"] + == "https://example.com/scanner/history/2026-01-02/2026-01-02T03-04Z.json" + ) + + +def test_write_api_v1_writes_full_tree(tmp_path: Path): + report = _report( + [ + _skill("modules", risk=21), + _skill("setup", verdict="malicious", risk=100), + _skill("templates", risk=0), + ] + ) + written = api.write_api_v1( + report, + output_dir=tmp_path, + public_base_url="https://example.com/scanner", + history_manifest={"entries": []}, + ) + + # 1 (skills.json) + 1 (index.json) + 3 skills * (1 detail + 4 badge files) + + # 1 (history.json) = 18. + assert len(written) == 18 + + # Index validates as parseable JSON, has the right schema_version. + idx = json.loads((tmp_path / "skills.json").read_text()) + assert idx["schema_version"] == 1 + assert {s["slug"] for s in idx["skills"]} == {"modules", "setup", "templates"} + + # Per-skill JSON exists and has badge link pointing back at our base URL. + setup_detail = json.loads((tmp_path / "skills" / "coder" / "setup.json").read_text()) + assert setup_detail["verdict"] == "malicious" + + # Badge files exist and the SVGs are well-formed strings. + verdict_svg = (tmp_path / "skills" / "coder" / "setup" / "badge" / "status.svg").read_text() + assert verdict_svg.startswith("") + assert "malicious" in verdict_svg + + risk_json = json.loads( + (tmp_path / "skills" / "coder" / "setup" / "badge" / "score.json").read_text() + ) + assert risk_json["message"] == "100/100" + assert risk_json["color"] == "red" + + # history.json got written when a manifest was passed. + hist = json.loads((tmp_path / "history.json").read_text()) + assert hist["schema_version"] == 1 + + +def test_write_api_v1_skips_history_when_not_provided(tmp_path: Path): + report = _report([_skill("modules")]) + written = api.write_api_v1( + report, output_dir=tmp_path, public_base_url="https://example.com/scanner" + ) + # 1 (skills.json) + 1 (index.json) + 1 * (1 detail + 4 badge files) = 7 + assert len(written) == 7 + assert not (tmp_path / "history.json").exists() + + +def test_write_api_v1_rejects_path_traversal_namespace(tmp_path: Path): + report = _report([_skill("modules")]) + report["skills"][0]["namespace"] = "../evil" + with pytest.raises(ValueError, match="unsafe namespace"): + api.write_api_v1( + report, output_dir=tmp_path, public_base_url="https://example.com/x" + ) + + +def test_write_api_v1_rejects_path_traversal_slug(tmp_path: Path): + report = _report([_skill("modules")]) + report["skills"][0]["slug"] = "ok/../escape" + with pytest.raises(ValueError, match="unsafe slug"): + api.write_api_v1( + report, output_dir=tmp_path, public_base_url="https://example.com/x" + ) + + +def test_build_v1_index_lists_url_templates_and_skills(): + """The discovery manifest at /api/v1/index.json must let a third-party + consumer address every endpoint without first fetching skills.json.""" + report = _report( + [ + _skill("templates"), + _skill("modules"), + _skill("setup", verdict="malicious", risk=100), + ] + ) + manifest = api.build_v1_index( + report, + public_base_url="https://example.com/scanner", + has_history=True, + ) + + assert manifest["schema_version"] == 1 + # URL templates use {namespace} and {slug} placeholders, anchored at the + # caller-supplied base; consumers can substitute without knowing layout. + urls = manifest["urls"] + assert urls["skills_index"] == "https://example.com/scanner/api/v1/skills.json" + assert urls["skill_detail"].endswith("/api/v1/skills/{namespace}/{slug}.json") + assert urls["status_badge_svg"].endswith( + "/api/v1/skills/{namespace}/{slug}/badge/status.svg" + ) + assert urls["score_badge_json"].endswith( + "/api/v1/skills/{namespace}/{slug}/badge/score.json" + ) + # history URL is included only when a history manifest was supplied. + assert urls["history"].endswith("/api/v1/history.json") + # Skill list is sorted and contains only namespace/slug pairs. + assert manifest["skills"] == [ + {"namespace": "coder", "slug": "modules"}, + {"namespace": "coder", "slug": "setup"}, + {"namespace": "coder", "slug": "templates"}, + ] + + +def test_build_v1_index_omits_history_url_when_no_history(): + report = _report([_skill("modules")]) + manifest = api.build_v1_index( + report, public_base_url="https://example.com/scanner", has_history=False + ) + assert "history" not in manifest["urls"] diff --git a/tests/test_badges.py b/tests/test_badges.py new file mode 100644 index 0000000..81735b4 --- /dev/null +++ b/tests/test_badges.py @@ -0,0 +1,91 @@ +"""Tests for the v1 badge generators (shields.io endpoint JSON + inline SVG).""" + +from __future__ import annotations + +import re + +from scanner import badges + + +def test_status_badge_json_colors_by_state(): + assert badges.status_badge_json("clean")["color"] == "brightgreen" + assert badges.status_badge_json("suspicious")["color"] == "yellow" + assert badges.status_badge_json("malicious")["color"] == "red" + assert badges.status_badge_json("unknown")["color"] == "lightgrey" + # Unrecognised verdict falls back to lightgrey rather than crashing. + assert badges.status_badge_json("not-a-real-verdict")["color"] == "lightgrey" + + +def test_status_badge_json_carries_shields_contract(): + payload = badges.status_badge_json("clean") + assert payload["schemaVersion"] == 1 + assert payload["label"] == "skill scan" + assert payload["message"] == "clean" + assert payload["cacheSeconds"] == 300 + + +def test_score_badge_color_bands(): + # Bands aligned to the 21/51/81 cutoffs the verdict policy uses. + assert badges.score_badge_json(0)["color"] == "brightgreen" + assert badges.score_badge_json(20)["color"] == "brightgreen" + assert badges.score_badge_json(21)["color"] == "yellowgreen" + assert badges.score_badge_json(50)["color"] == "yellowgreen" + assert badges.score_badge_json(51)["color"] == "yellow" + assert badges.score_badge_json(80)["color"] == "yellow" + assert badges.score_badge_json(81)["color"] == "red" + assert badges.score_badge_json(100)["color"] == "red" + + +def test_score_badge_json_carries_shields_contract(): + payload = badges.score_badge_json(42) + assert payload["schemaVersion"] == 1 + assert payload["label"] == "risk score" + assert payload["message"] == "42/100" + + +def test_status_badge_svg_is_well_formed(): + svg = badges.status_badge_svg("clean") + assert svg.startswith("") + # Two rects (label background + message background). + assert svg.count("= 2 + # The verdict text appears in both the title and one of the nodes. + assert "clean" in svg + # Verdict colour bleeds through as a hex fill. + assert "#4c1" in svg, "clean verdict should use the brightgreen hex" + + +def test_score_badge_svg_color_threshold(): + high = badges.score_badge_svg(95) + low = badges.score_badge_svg(5) + # Red vs brightgreen hex. + assert "#e05d44" in high + assert "#4c1" in low + # Both have the score string. + assert "95/100" in high + assert "5/100" in low + + +def test_svg_width_grows_with_message_length(): + """Width estimation has to widen for longer text or the badge clips.""" + short = badges.status_badge_svg("clean") + long_ = badges.status_badge_svg("suspicious") + # Pull the width attribute from the opening tag. + sw = int(re.search(r'width="(\d+)"', short).group(1)) + lw = int(re.search(r'width="(\d+)"', long_).group(1)) + assert lw > sw + + +def test_svg_escapes_markup_in_label_and_message(): + """The SVG renderer must escape XML special chars; the public surface only + ever passes shape-constrained inputs, but defense in depth keeps the badge + well-formed if the input shape ever drifts.""" + raw = badges._flat_badge_svg('