From a21fe17d8ff2b405332fc02befd03ae93a284d71 Mon Sep 17 00:00:00 2001 From: DevCats Date: Mon, 22 Jun 2026 14:50:28 -0500 Subject: [PATCH 01/18] feat(config.yaml): add scanners.skillspector.llm block Document the new llm.provider config knob (default nv_build) and the workflow contract: empty flags + workflow appends --no-llm dynamically when the matching credential secret is unset. Removes --no-llm from the static flags list now that the workflow drives it. This commit was prepared with help from Coder Agents. --- config.yaml | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/config.yaml b/config.yaml index b20462a..31be704 100644 --- a/config.yaml +++ b/config.yaml @@ -39,8 +39,41 @@ scanners: # so a bumper bot lives outside the loop until the upstream # publishes to PyPI and the pin can move into pyproject.toml. pin: "skillspector @ git+https://github.com/NVIDIA/SkillSpector.git@2eb844780ab163f01468ecf142c40a2ec0fcaec0" - flags: - - "--no-llm" + # Extra CLI flags passed to every SkillSpector invocation. Empty by + # default; the scan workflow appends --no-llm dynamically when the + # LLM credential secret is not set (see llm: block below). CI runs + # do not invoke SkillSpector live. + flags: [] + # SkillSpector ships a two-stage analyser: fast static rules followed + # by an optional LLM semantic pass. The LLM pass lifts precision + # from roughly 70% to roughly 87% per upstream docs by filtering + # context-aware false positives, classifying intent on prompt + # injection patterns, and producing human-readable explanations. + # + # The scheduled scan reads the credential matching the provider + # below from a repository secret. When the secret is configured, + # LLM mode is on. When the secret is missing, the workflow falls + # back to --no-llm automatically so a fresh fork is never broken + # by an unset secret. + # + # Provider options and the env var SkillSpector consumes: + # + # provider env var(s) + # nv_build NVIDIA_INFERENCE_KEY (free; build.nvidia.com) + # openai OPENAI_API_KEY (+ OPENAI_BASE_URL for AI gateways) + # anthropic ANTHROPIC_API_KEY + # anthropic_proxy ANTHROPIC_PROXY_API_KEY + ANTHROPIC_PROXY_ENDPOINT_URL + # + # Changing provider also requires updating the env block in + # .github/workflows/scan.yaml so the matching secret is wired in, + # and adding the secret under Settings > Secrets and variables > + # Actions. + llm: + provider: nv_build + # Empty model uses the provider's bundled default. Override here + # to pin a specific revision (e.g. "claude-opus-4-6" for + # provider=anthropic). + model: "" # Per-skill verdict policy. v1 has one input (SkillSpector risk_score). # When more scanners join the pipeline we add new threshold fields here From c3f42dbd21be10b86869a9249af54229e6fc2968 Mon Sep 17 00:00:00 2001 From: DevCats Date: Mon, 22 Jun 2026 14:53:56 -0500 Subject: [PATCH 02/18] docs(CALIBRATION.md): add LLM semantic pass section Document what flipping LLM mode on does (and does not do) to the verdict math, the precision delta we expect, and what to expect for the five in-tree skills. Adds "LLM provider changes" to the "When to revisit" list. This commit was prepared with help from Coder Agents. --- docs/CALIBRATION.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/docs/CALIBRATION.md b/docs/CALIBRATION.md index e4cf5a0..5091163 100644 --- a/docs/CALIBRATION.md +++ b/docs/CALIBRATION.md @@ -99,6 +99,42 @@ verdict: This avoids broadcasting the ~half-of-catalogue base rate that ClawHub measured. +## LLM semantic pass + +SkillSpector ships a two-stage analyser: fast static rules (the 64 +patterns SkillSpector documents) followed by an optional LLM semantic +pass. Upstream's published precision numbers are: + +- `--no-llm` (static only): high recall, moderate precision (~70%). + False positives on context-sensitive patterns are common; for + example, EA2 ("autonomous decision making") fires on prose that + documents safeguards as well as prose that bypasses them. +- Default (LLM on): ~87% precision. The LLM pass reads each finding's + surrounding context, classifies intent, filters context-aware false + positives, and writes a human-readable explanation that ships in the + per-finding output. + +The scheduled scan runs LLM mode when the workflow's chosen credential +secret (`NVIDIA_INFERENCE_KEY` for the default `nv_build` provider) is +configured. The fallback to `--no-llm` is automatic when the secret is +missing, so an unset secret on a fresh fork degrades the scan rather +than breaking it. + +The LLM pass does not affect the threshold math: SkillSpector's +`risk_score` is still a 0-100 weighted sum of rule hits, and the +51/81 cutoffs above still map directly to `HIGH` and `CRITICAL` bands. +It does affect which findings reach the verdict: false positives that +the LLM filters out no longer contribute to the score. Expect verdicts +to move down (or stay the same) when LLM mode flips on, not up. + +For the five existing in-tree skills, the static-only scan placed +`coder/setup` at 100 / `malicious`. With LLM mode on we expect the +findings list to shrink (the EA2 prose hits and the asset-path MP2 +hits should be filtered) but the score will still be high. Reducing +`coder/setup`'s verdict below `suspicious` requires the upcoming +permissions-manifest layer (Phase 3 of the v3 plan), not the LLM pass +alone. + ## What we did not change (and why) - We did not raise `suspicious_risk_score` above `51`. SkillSpector @@ -127,6 +163,9 @@ Re-run this analysis when any of: that shifts where its bands sit. The pinned commit in `config.yaml` protects us from drifting silently; a deliberate bump should walk through this doc. +- The LLM provider changes (e.g., moving from `nv_build` to + `anthropic`). Different models filter differently; spot-check the + five in-tree skills before merging the provider swap. - We observe a real-world skill that lands in an obviously wrong bucket (false positive or false negative). Open a tracking issue, link it from this doc, and adjust with evidence in the next PR. From c10bf521be005930f60a53acc870f347f9b12d6b Mon Sep 17 00:00:00 2001 From: DevCats Date: Mon, 22 Jun 2026 14:54:39 -0500 Subject: [PATCH 03/18] docs(README.md): document LLM mode and the one-time secret setup Update step 3 of the architecture summary to reflect that the scheduled scan now runs SkillSpector with the LLM semantic pass on by default. Add a new "One-time setup on the repo" section that lists the three repo-level configurations needed for a useful scan, including the new LLM credential secret. Mirror the LLM secret note into "Forking for your own catalogue". This commit was prepared with help from Coder Agents. --- README.md | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f662a82..0aacfe4 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,10 @@ Every 6 hours, the scheduled workflow in this repo: 1. Enumerates every skill in `coder/registry` (both the in-tree `.agents/skills/` format and the future external-sources format). 2. Shallow-clones each source repo. -3. Runs [NVIDIA SkillSpector](https://github.com/NVIDIA/SkillSpector) in - `--no-llm` static mode over the upstream content. +3. Runs [NVIDIA SkillSpector](https://github.com/NVIDIA/SkillSpector) over + the upstream content. The scheduled scan uses LLM semantic analysis + when the credential secret is configured, and falls back to + `--no-llm` static-only mode otherwise. 4. Builds a per-skill verdict (`clean`, `suspicious`, `malicious`, `unknown`) from `risk_score` plus the thresholds in `config.yaml`. 5. Builds the React SPA in `site/` and ships it together with @@ -60,6 +62,26 @@ Vite's dev proxy (see `site/vite.config.ts`) forwards `latest.json`, app sees real scanner output without CORS shenanigans. SPA routes such as `/skills/coder/setup` stay client-side. +## One-time setup on the repo + +Three things have to be configured once on the GitHub repo before the +scheduled scan publishes a useful result: + +1. **Settings > Pages**: set source to "GitHub Actions". The + `publish-pages` job in `scan.yaml` will fail until this is set. +2. **Settings > Actions**: workflow permissions "Read and write" so + `publish-release` can create the rolling `latest` release. +3. **Settings > Secrets and variables > Actions**: add the LLM + credential matching the provider in `config.yaml`'s + `scanners.skillspector.llm.provider`. For the default `nv_build` + provider this is `NVIDIA_INFERENCE_KEY` (sign up free at + [build.nvidia.com](https://build.nvidia.com)). Without the secret + the scan still runs, but SkillSpector falls back to + `--no-llm` static-only mode and precision drops from roughly 87% + to roughly 70%. See `docs/CALIBRATION.md` for the precision + discussion. The optional `SLACK_WEBHOOK_URL` secret enables the + `notify-slack-on-failure` job; without it that job is a no-op. + ## Repo layout ```text @@ -97,7 +119,10 @@ This scanner is data-driven. To run it against a different registry: "GitHub Actions"). 4. Set Actions workflow permissions to "Read and write" so the publish-release job can create releases. -5. Enable Actions. +5. Add the LLM credential secret matching your chosen provider + (see "One-time setup on the repo" above). Optional; static-only + mode works without it. +6. Enable Actions. No source changes required for catalogue changes. @@ -115,7 +140,8 @@ SkillSpector's `risk_score` (0-100) is the only input. The thresholds are aligned to SkillSpector's own `HIGH` and `CRITICAL` bands; [`docs/CALIBRATION.md`](./docs/CALIBRATION.md) walks through the evidence (SkillSpector source, the ClawHub paper, our in-tree -catalogue) behind the chosen numbers. +catalogue) behind the chosen numbers, and the LLM-on-vs-off precision +discussion behind running the semantic pass on every scheduled scan. The architecture keeps room for additional scanners (gitleaks, Semgrep, VirusTotal Premium, etc.); adding one is a new module under `scanner/`, From 8c57f9a637bec637cd63766f5402dedafba1bb01 Mon Sep 17 00:00:00 2001 From: DevCats Date: Mon, 22 Jun 2026 17:23:24 -0500 Subject: [PATCH 04/18] feat(config.yaml): switch LLM provider to Anthropic Sonnet 4.6 Swap the default LLM provider from nv_build (free NVIDIA Build) to anthropic with model pinned to claude-sonnet-4-6. Rationale: - Removes the second-vendor signup. The Coder org already has an Anthropic billing relationship, so the credential is one secret away from working. - Sonnet 4.6 is roughly 5x cheaper than the anthropic default (Opus 4.6) and is well matched to SkillSpector's LLM pass, which is finding-by-finding intent classification rather than long-form reasoning. Cost ballpark for 5 skills x 4 scans/day is small. - The other provider options (anthropic_proxy via Vertex, openai via any OpenAI-compatible gateway, nv_build) stay documented in the config comments and are still a one-line swap. This commit was prepared with help from Coder Agents. --- config.yaml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/config.yaml b/config.yaml index 31be704..9cc2328 100644 --- a/config.yaml +++ b/config.yaml @@ -59,21 +59,23 @@ scanners: # Provider options and the env var SkillSpector consumes: # # provider env var(s) - # nv_build NVIDIA_INFERENCE_KEY (free; build.nvidia.com) - # openai OPENAI_API_KEY (+ OPENAI_BASE_URL for AI gateways) - # anthropic ANTHROPIC_API_KEY + # anthropic ANTHROPIC_API_KEY (api.anthropic.com) # anthropic_proxy ANTHROPIC_PROXY_API_KEY + ANTHROPIC_PROXY_ENDPOINT_URL + # openai OPENAI_API_KEY (+ OPENAI_BASE_URL for AI gateways) + # nv_build NVIDIA_INFERENCE_KEY (free; build.nvidia.com) # # Changing provider also requires updating the env block in # .github/workflows/scan.yaml so the matching secret is wired in, # and adding the secret under Settings > Secrets and variables > # Actions. llm: - provider: nv_build - # Empty model uses the provider's bundled default. Override here - # to pin a specific revision (e.g. "claude-opus-4-6" for - # provider=anthropic). - model: "" + provider: anthropic + # SkillSpector's bundled default for the anthropic provider is + # claude-opus-4-6. Sonnet 4.6 is roughly 5x cheaper than Opus and + # is well-suited for the finding-classification task the LLM pass + # actually does, so it is the better cost/quality choice for + # periodic scanning. Override here to pin a different revision. + model: "claude-sonnet-4-6" # Per-skill verdict policy. v1 has one input (SkillSpector risk_score). # When more scanners join the pipeline we add new threshold fields here From 3333b8117369b441b8697350984b8cd036a915a9 Mon Sep 17 00:00:00 2001 From: DevCats Date: Mon, 22 Jun 2026 17:24:12 -0500 Subject: [PATCH 05/18] docs(README.md): point setup at Anthropic provider, not nv_build Follow-up to the provider swap. The one-time-setup section now points at console.anthropic.com and ANTHROPIC_API_KEY instead of build.nvidia.com / NVIDIA_INFERENCE_KEY. This commit was prepared with help from Coder Agents. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0aacfe4..f486179 100644 --- a/README.md +++ b/README.md @@ -73,10 +73,10 @@ scheduled scan publishes a useful result: `publish-release` can create the rolling `latest` release. 3. **Settings > Secrets and variables > Actions**: add the LLM credential matching the provider in `config.yaml`'s - `scanners.skillspector.llm.provider`. For the default `nv_build` - provider this is `NVIDIA_INFERENCE_KEY` (sign up free at - [build.nvidia.com](https://build.nvidia.com)). Without the secret - the scan still runs, but SkillSpector falls back to + `scanners.skillspector.llm.provider`. For the default `anthropic` + provider this is `ANTHROPIC_API_KEY` (from + [console.anthropic.com](https://console.anthropic.com)). Without + the secret the scan still runs, but SkillSpector falls back to `--no-llm` static-only mode and precision drops from roughly 87% to roughly 70%. See `docs/CALIBRATION.md` for the precision discussion. The optional `SLACK_WEBHOOK_URL` secret enables the From 28c05fe24f945667e07f251dc4fae0e24ec9edac Mon Sep 17 00:00:00 2001 From: DevCats Date: Tue, 23 Jun 2026 15:30:43 -0500 Subject: [PATCH 06/18] fix(config.yaml): reframe LLM block as contract, swap to openai+aibridge Addresses copilot-pull-request-reviewer review on config.yaml line 46 and line 57: the previous comments described workflow behavior as implemented ("the workflow appends --no-llm dynamically") when in fact .github/workflows/scan.yaml still hardcodes --no-llm in this PR. The new wording describes the contract between this file and scan.yaml and explicitly notes that the matching workflow edit is committed separately because the Coder Agents GitHub App lacks `workflows: write`. Also swaps the provider from anthropic/claude-sonnet-4-6 to openai/gpt-4.1-mini. The anthropic provider hardcodes api.anthropic.com and ignores ANTHROPIC_BASE_URL, so it cannot route through Coder's aibridge. The openai provider does, and gpt-4.1-mini was empirically validated against the five in-tree skills (results in CALIBRATION.md). This commit was prepared with help from Coder Agents. --- config.yaml | 64 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/config.yaml b/config.yaml index 9cc2328..7ac5268 100644 --- a/config.yaml +++ b/config.yaml @@ -39,43 +39,53 @@ scanners: # so a bumper bot lives outside the loop until the upstream # publishes to PyPI and the pin can move into pyproject.toml. pin: "skillspector @ git+https://github.com/NVIDIA/SkillSpector.git@2eb844780ab163f01468ecf142c40a2ec0fcaec0" - # Extra CLI flags passed to every SkillSpector invocation. Empty by - # default; the scan workflow appends --no-llm dynamically when the - # LLM credential secret is not set (see llm: block below). CI runs - # do not invoke SkillSpector live. + # Extra CLI flags passed to every SkillSpector invocation. Left + # empty so .github/workflows/scan.yaml can drive --no-llm + # dynamically based on whether the LLM credential secret is set + # (see contract under the llm: block below). flags: [] - # SkillSpector ships a two-stage analyser: fast static rules followed - # by an optional LLM semantic pass. The LLM pass lifts precision - # from roughly 70% to roughly 87% per upstream docs by filtering - # context-aware false positives, classifying intent on prompt - # injection patterns, and producing human-readable explanations. + # SkillSpector ships a two-stage analyser: fast static rules + # followed by an optional LLM semantic pass. Measured on the five + # in-tree skills, LLM mode dropped catalogue-wide findings from + # 25 to 2 and moved coder/setup from malicious to clean. See + # docs/CALIBRATION.md for the per-skill numbers and methodology. # - # The scheduled scan reads the credential matching the provider - # below from a repository secret. When the secret is configured, - # LLM mode is on. When the secret is missing, the workflow falls - # back to --no-llm automatically so a fresh fork is never broken - # by an unset secret. + # Contract with .github/workflows/scan.yaml: the workflow reads + # the credential matching the provider below from a repository + # secret. When the secret is set, LLM mode runs. When the secret + # is missing, the workflow appends --no-llm so a fresh fork is + # never broken by an unset secret. # - # Provider options and the env var SkillSpector consumes: + # NOTE: scan.yaml in this branch still hardcodes --no-llm. The + # matching workflow edit is in this PR's description but is + # committed separately because the Coder Agents GitHub App on + # this repo currently lacks the `workflows: write` scope. After + # both land, the config and the workflow drive LLM mode together. # - # provider env var(s) - # anthropic ANTHROPIC_API_KEY (api.anthropic.com) - # anthropic_proxy ANTHROPIC_PROXY_API_KEY + ANTHROPIC_PROXY_ENDPOINT_URL - # openai OPENAI_API_KEY (+ OPENAI_BASE_URL for AI gateways) - # nv_build NVIDIA_INFERENCE_KEY (free; build.nvidia.com) + # Provider options: + # provider env var(s) endpoint + # openai OPENAI_API_KEY + OPENAI_BASE_URL any OpenAI-compatible URL + # anthropic ANTHROPIC_API_KEY api.anthropic.com (hardcoded; + # ignores ANTHROPIC_BASE_URL) + # nv_build NVIDIA_INFERENCE_KEY build.nvidia.com (free) # # Changing provider also requires updating the env block in # .github/workflows/scan.yaml so the matching secret is wired in, # and adding the secret under Settings > Secrets and variables > # Actions. llm: - provider: anthropic - # SkillSpector's bundled default for the anthropic provider is - # claude-opus-4-6. Sonnet 4.6 is roughly 5x cheaper than Opus and - # is well-suited for the finding-classification task the LLM pass - # actually does, so it is the better cost/quality choice for - # periodic scanning. Override here to pin a different revision. - model: "claude-sonnet-4-6" + # Routes through Coder's AI Gateway (aibridge). OPENAI_BASE_URL + # is set as a repo variable to: + # https://dev.coder.com/api/v2/aibridge/openai/v1 + # The openai-compatible path is used because SkillSpector's + # `anthropic` provider hardcodes api.anthropic.com and ignores + # ANTHROPIC_BASE_URL, so it cannot be steered at aibridge. + provider: openai + # gpt-4.1-mini is the cost/quality sweet spot for SkillSpector's + # finding-classification work and is the model the calibration + # in docs/CALIBRATION.md was measured against. Override to pin + # a different revision (e.g. gpt-5.4-mini, gpt-4.1). + model: "gpt-4.1-mini" # Per-skill verdict policy. v1 has one input (SkillSpector risk_score). # When more scanners join the pipeline we add new threshold fields here From 6bbb6bc27c4df93cc50c8a4b0a9bac4e0f9dee84 Mon Sep 17 00:00:00 2001 From: DevCats Date: Tue, 23 Jun 2026 15:31:52 -0500 Subject: [PATCH 07/18] fix(docs/CALIBRATION.md): real measured numbers + workflow-gap note Addresses copilot-pull-request-reviewer review on docs/CALIBRATION.md line 121: the LLM section described auto-enablement based on the credential secret, but scan.yaml in this branch still hardcodes --no-llm and that contract is not in effect yet. This pass: - Adds a measured table showing the five-skill before/after under LLM mode (was upstream's hand-wavy 70%-to-87% precision estimate). Catalogue-wide findings: 25 to 2; coder/setup: malicious to clean. - Adds an explicit "workflow gap" callout explaining that scan.yaml still hardcodes --no-llm in this branch and pointing readers at the PR description for the matching diff. - Documents the provider choice in plain language: anthropic provider cannot be steered at aibridge because it hardcodes api.anthropic.com and ignores ANTHROPIC_BASE_URL; openai provider works and gpt-4.1-mini is the cost/quality sweet spot. - Drops the previous "bringing coder/setup below suspicious requires the permissions-manifest layer" framing. With LLM mode on, coder/setup is already clean. This commit was prepared with help from Coder Agents. --- docs/CALIBRATION.md | 112 ++++++++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 40 deletions(-) diff --git a/docs/CALIBRATION.md b/docs/CALIBRATION.md index 5091163..bb362c9 100644 --- a/docs/CALIBRATION.md +++ b/docs/CALIBRATION.md @@ -64,18 +64,18 @@ The current `coder/registry` in-tree catalogue contains five skills: `coder/coder-modules`, `coder/coder-templates`, `coder/modules`, `coder/templates`, and `coder/setup`. Under the chosen thresholds: -| Skill | SkillSpector score | Verdict | -|------------------------|-------------------:|-------------| -| `coder/coder-modules` | 0 | `clean` | -| `coder/coder-templates`| 0 | `clean` | -| `coder/modules` | 0 | `clean` | -| `coder/templates` | 10 | `clean` | -| `coder/setup` | 100 | `malicious` | +| Skill | static score | LLM-mode score | static verdict | LLM-mode verdict | +|------------------------|-------------:|---------------:|----------------|------------------| +| `coder/coder-modules` | 10 | 0 | `clean` | `clean` | +| `coder/coder-templates`| 10 | 0 | `clean` | `clean` | +| `coder/modules` | 0 | 0 | `clean` | `clean` | +| `coder/templates` | 0 | 0 | `clean` | `clean` | +| `coder/setup` | 100 | 26 | `malicious` | `clean` | The previous thresholds (40/75) produced the same outcome for these -five inputs. The change does not silence any signal that was firing -today; it raises the bar that future skills must clear before being -called out. +five inputs under static-only mode. The change does not silence any +signal that was firing today; it raises the bar that future skills +must clear before being called out. ## Threshold choices @@ -103,37 +103,68 @@ verdict: SkillSpector ships a two-stage analyser: fast static rules (the 64 patterns SkillSpector documents) followed by an optional LLM semantic -pass. Upstream's published precision numbers are: - -- `--no-llm` (static only): high recall, moderate precision (~70%). - False positives on context-sensitive patterns are common; for - example, EA2 ("autonomous decision making") fires on prose that - documents safeguards as well as prose that bypasses them. -- Default (LLM on): ~87% precision. The LLM pass reads each finding's - surrounding context, classifies intent, filters context-aware false - positives, and writes a human-readable explanation that ships in the - per-finding output. +pass. The LLM pass reads each finding's surrounding context, classifies +intent, filters context-aware false positives, and writes a +human-readable explanation that ships in the per-finding output. + +### Measured impact on the five in-tree skills + +Measured against `gpt-4.1-mini` through Coder's AI Gateway. Methodology: +ran `skillspector scan` twice on each upstream skill (once with +`--no-llm`, once with LLM mode on) and aggregated the per-skill +results. Total catalogue-wide findings dropped from 25 to 2: + +| Skill | findings (static) | findings (LLM) | Δ | +|------------------------|------------------:|---------------:|----------| +| `coder/coder-modules` | 1 | 0 | -1 | +| `coder/coder-templates`| 1 | 0 | -1 | +| `coder/modules` | 0 | 0 | 0 | +| `coder/setup` | 23 | 2 | -21 | +| `coder/templates` | 0 | 0 | 0 | +| **TOTAL** | **25** | **2** | **-23** | + +`coder/setup`'s verdict moves from `malicious` (100) to `clean` (26). +The LLM filtered all 23 static-only findings as context-aware false +positives (the EA2 hits on safeguard prose, the MP2 hits on PNG +assets, the SC2 hits on `curl coder.com/install.sh`, the PE3 hits on +the skill's own scratch files, etc.) and surfaced 2 new MEDIUM +findings (`SQP-2`) the static pass missed: the GitHub device-flow +scripts write the OAuth token and session config to disk without a +user-visible notification. Those 2 findings are real and minor; the +cleanest fix is a one-line `echo` before each write in the upstream +skill repo rather than any change here. + +### Provider choice and the workflow gap The scheduled scan runs LLM mode when the workflow's chosen credential -secret (`NVIDIA_INFERENCE_KEY` for the default `nv_build` provider) is -configured. The fallback to `--no-llm` is automatic when the secret is -missing, so an unset secret on a fresh fork degrades the scan rather -than breaking it. - -The LLM pass does not affect the threshold math: SkillSpector's +secret is configured. The fallback to `--no-llm` is automatic when the +secret is missing, so an unset secret on a fresh fork degrades the +scan rather than breaking it. + +**Important caveat for this PR**: `.github/workflows/scan.yaml` in the +current branch still hardcodes `--no-llm`. The matching workflow edit +is in the PR description but is committed separately because the Coder +Agents GitHub App on this repo currently lacks the `workflows: write` +scope. The contract documented here only takes effect once that edit +lands (or is pasted by a human with workflow write access). + +Provider is `openai` against Coder's AI Gateway endpoint +(`OPENAI_BASE_URL=https://dev.coder.com/api/v2/aibridge/openai/v1`) +with model `gpt-4.1-mini`. SkillSpector's `anthropic` provider was +tried first because it would map more directly to claude-sonnet-class +models, but its `provider.py` hardcodes `https://api.anthropic.com/v1/` +and ignores `ANTHROPIC_BASE_URL`, so it cannot be steered at aibridge. +The `openai` provider does respect `OPENAI_BASE_URL`, and aibridge +exposes `gpt-4.1-mini` plus a long list of other OpenAI-class models. + +### How the LLM pass interacts with the verdict math + +The LLM pass does not affect the threshold math. SkillSpector's `risk_score` is still a 0-100 weighted sum of rule hits, and the 51/81 cutoffs above still map directly to `HIGH` and `CRITICAL` bands. -It does affect which findings reach the verdict: false positives that -the LLM filters out no longer contribute to the score. Expect verdicts -to move down (or stay the same) when LLM mode flips on, not up. - -For the five existing in-tree skills, the static-only scan placed -`coder/setup` at 100 / `malicious`. With LLM mode on we expect the -findings list to shrink (the EA2 prose hits and the asset-path MP2 -hits should be filtered) but the score will still be high. Reducing -`coder/setup`'s verdict below `suspicious` requires the upcoming -permissions-manifest layer (Phase 3 of the v3 plan), not the LLM pass -alone. +What changes is which findings reach the verdict: false positives the +LLM filters out no longer contribute to the score. Verdicts move down +(or stay the same) when LLM mode flips on, not up. ## What we did not change (and why) @@ -163,9 +194,10 @@ Re-run this analysis when any of: that shifts where its bands sit. The pinned commit in `config.yaml` protects us from drifting silently; a deliberate bump should walk through this doc. -- The LLM provider changes (e.g., moving from `nv_build` to - `anthropic`). Different models filter differently; spot-check the - five in-tree skills before merging the provider swap. +- The LLM model or provider changes (e.g., moving from `gpt-4.1-mini` + to a Claude or Gemini model, or from aibridge to a direct provider + key). Different models filter differently; spot-check the five + in-tree skills before merging the provider swap. - We observe a real-world skill that lands in an obviously wrong bucket (false positive or false negative). Open a tracking issue, link it from this doc, and adjust with evidence in the next PR. From b301daaf955f90ef9bec7f9236d31a697c318064 Mon Sep 17 00:00:00 2001 From: DevCats Date: Tue, 23 Jun 2026 15:32:52 -0500 Subject: [PATCH 08/18] fix(README.md): clarify the workflow-file dependency, point setup at aibridge Addresses copilot-pull-request-reviewer reviews on README.md (architecture step 3 at line 14, the one-time setup block, the graceful-fallback paragraph, and the forking step 5 at line 125): the text described workflow behavior that is not implemented in this branch because .github/workflows/scan.yaml still hardcodes --no-llm. Fixes: - Add a note block right after the architecture summary spelling out that step 3's LLM behavior requires the matching scan.yaml edit, and pointing readers at the PR description for the diff. - Update step 3 of the one-time-setup section to set the OpenAI provider's secret and add OPENAI_BASE_URL as a variable. Adds a second note block immediately after the setup list flagging the workflow-file dependency again at point-of-use. - Update forking instructions step 5 to point at the new setup section and call out that confirming scan.yaml exports the secret is required for LLM mode. - Swap the API-key reference from console.anthropic.com / ANTHROPIC_API_KEY to a Coder AI Gateway token / OPENAI_API_KEY. The anthropic provider in SkillSpector hardcodes api.anthropic.com and ignores ANTHROPIC_BASE_URL, so it cannot be steered at aibridge; the openai provider works. This commit was prepared with help from Coder Agents. --- README.md | 57 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index f486179..c271fa7 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,9 @@ Every 6 hours, the scheduled workflow in this repo: `.agents/skills/` format and the future external-sources format). 2. Shallow-clones each source repo. 3. Runs [NVIDIA SkillSpector](https://github.com/NVIDIA/SkillSpector) over - the upstream content. The scheduled scan uses LLM semantic analysis - when the credential secret is configured, and falls back to - `--no-llm` static-only mode otherwise. + the upstream content. The scheduled scan runs SkillSpector's LLM + semantic pass when the workflow's LLM credential secret is + configured, and falls back to `--no-llm` static-only mode otherwise. 4. Builds a per-skill verdict (`clean`, `suspicious`, `malicious`, `unknown`) from `risk_score` plus the thresholds in `config.yaml`. 5. Builds the React SPA in `site/` and ships it together with @@ -27,6 +27,13 @@ The registry site reads the public report through a small proxy endpoint in `coder/registry-server` (separate PR) and shows a per-skill badge. The registry's deploys are not gated on the scan result. +> **Note on the LLM semantic pass.** Step 3's conditional-LLM behavior +> requires the matching edit in `.github/workflows/scan.yaml`. That edit +> is part of the same change that introduces this section in the README; +> see the PR description for the diff. The current `scan.yaml` on this +> branch still hardcodes `--no-llm`, so adding the secret alone has no +> effect until the workflow edit also lands. + ## Reading the latest report Stable URLs, no auth required: @@ -64,24 +71,36 @@ as `/skills/coder/setup` stay client-side. ## One-time setup on the repo -Three things have to be configured once on the GitHub repo before the +Four things have to be configured once on the GitHub repo before the scheduled scan publishes a useful result: 1. **Settings > Pages**: set source to "GitHub Actions". The `publish-pages` job in `scan.yaml` will fail until this is set. 2. **Settings > Actions**: workflow permissions "Read and write" so `publish-release` can create the rolling `latest` release. -3. **Settings > Secrets and variables > Actions**: add the LLM - credential matching the provider in `config.yaml`'s - `scanners.skillspector.llm.provider`. For the default `anthropic` - provider this is `ANTHROPIC_API_KEY` (from - [console.anthropic.com](https://console.anthropic.com)). Without - the secret the scan still runs, but SkillSpector falls back to - `--no-llm` static-only mode and precision drops from roughly 87% - to roughly 70%. See `docs/CALIBRATION.md` for the precision - discussion. The optional `SLACK_WEBHOOK_URL` secret enables the +3. **Settings > Secrets and variables > Actions > Variables**: add + `OPENAI_BASE_URL` with the value + `https://dev.coder.com/api/v2/aibridge/openai/v1`. This is a + variable (not a secret) because it is not sensitive; only the API + key is. +4. **Settings > Secrets and variables > Actions > Secrets**: add the + LLM credential matching the provider in `config.yaml`'s + `scanners.skillspector.llm.provider`. For the default `openai` + provider this is `OPENAI_API_KEY` set to a Coder AI Gateway token. + Without the secret, the scan still runs but SkillSpector falls + back to `--no-llm` static-only mode and precision drops. See + `docs/CALIBRATION.md` for the measured before/after numbers. The + optional `SLACK_WEBHOOK_URL` secret enables the `notify-slack-on-failure` job; without it that job is a no-op. +> **Workflow file note**: enabling LLM mode also requires the matching +> edit in `.github/workflows/scan.yaml` (it must export the secret +> into the SkillSpector step and conditionally append `--no-llm`). +> That edit is part of the same change that introduces this section +> and is documented in the PR description, but is committed +> separately because the Coder Agents GitHub App on this repo +> currently lacks the `workflows: write` scope. + ## Repo layout ```text @@ -119,9 +138,11 @@ This scanner is data-driven. To run it against a different registry: "GitHub Actions"). 4. Set Actions workflow permissions to "Read and write" so the publish-release job can create releases. -5. Add the LLM credential secret matching your chosen provider - (see "One-time setup on the repo" above). Optional; static-only - mode works without it. +5. To enable the LLM semantic pass, add the credential secret and the + matching `OPENAI_BASE_URL` variable per "One-time setup on the + repo" above, AND confirm `.github/workflows/scan.yaml` exports the + secret into the SkillSpector step. Static-only mode (without the + secret) is the default and works out of the box. 6. Enable Actions. No source changes required for catalogue changes. @@ -140,8 +161,8 @@ SkillSpector's `risk_score` (0-100) is the only input. The thresholds are aligned to SkillSpector's own `HIGH` and `CRITICAL` bands; [`docs/CALIBRATION.md`](./docs/CALIBRATION.md) walks through the evidence (SkillSpector source, the ClawHub paper, our in-tree -catalogue) behind the chosen numbers, and the LLM-on-vs-off precision -discussion behind running the semantic pass on every scheduled scan. +catalogue) behind the chosen numbers and the measured LLM-on-vs-off +impact on the five in-tree skills. The architecture keeps room for additional scanners (gitleaks, Semgrep, VirusTotal Premium, etc.); adding one is a new module under `scanner/`, From 0fea10e604b270140cdbe0e6c4c3c0556d5242e3 Mon Sep 17 00:00:00 2001 From: DevCats Date: Tue, 23 Jun 2026 21:07:42 +0000 Subject: [PATCH 09/18] fix(config.yaml): swap LLM provider to Anthropic direct, not aibridge SkillSpector's anthropic provider hardcodes api.anthropic.com and ignores ANTHROPIC_BASE_URL, and aibridge does not mount /v1/chat/completions on its /anthropic path (only the native /v1/messages shape). Routing SkillSpector at Claude through aibridge is therefore not possible today without either patching SkillSpector or adding a new route to aibridge. Use the Anthropic API directly instead. This trades the aibridge billing-line consolidation for a Claude-class model; the choice and the trade-off are documented in docs/CALIBRATION.md. --- config.yaml | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/config.yaml b/config.yaml index 7ac5268..f1b135f 100644 --- a/config.yaml +++ b/config.yaml @@ -64,9 +64,8 @@ scanners: # # Provider options: # provider env var(s) endpoint + # anthropic ANTHROPIC_API_KEY api.anthropic.com # openai OPENAI_API_KEY + OPENAI_BASE_URL any OpenAI-compatible URL - # anthropic ANTHROPIC_API_KEY api.anthropic.com (hardcoded; - # ignores ANTHROPIC_BASE_URL) # nv_build NVIDIA_INFERENCE_KEY build.nvidia.com (free) # # Changing provider also requires updating the env block in @@ -74,18 +73,26 @@ scanners: # and adding the secret under Settings > Secrets and variables > # Actions. llm: - # Routes through Coder's AI Gateway (aibridge). OPENAI_BASE_URL - # is set as a repo variable to: - # https://dev.coder.com/api/v2/aibridge/openai/v1 - # The openai-compatible path is used because SkillSpector's - # `anthropic` provider hardcodes api.anthropic.com and ignores - # ANTHROPIC_BASE_URL, so it cannot be steered at aibridge. - provider: openai - # gpt-4.1-mini is the cost/quality sweet spot for SkillSpector's - # finding-classification work and is the model the calibration - # in docs/CALIBRATION.md was measured against. Override to pin - # a different revision (e.g. gpt-5.4-mini, gpt-4.1). - model: "gpt-4.1-mini" + # Hits api.anthropic.com directly with an Anthropic API key (not + # routed through Coder's AI Gateway). aibridge was the original + # plan, but SkillSpector pipes every provider through + # langchain_openai.ChatOpenAI which hits `/v1/chat/completions`, + # and aibridge only mounts that path under `/openai`, not + # `/anthropic`. The `anthropic` provider in SkillSpector also + # hardcodes `https://api.anthropic.com/v1/` and ignores + # ANTHROPIC_BASE_URL, so aibridge cannot be steered into the + # Claude path today. This means the Anthropic API key is on a + # separate billing line from Coder usage. + provider: anthropic + # claude-sonnet-4-5 is the cost/quality sweet spot for + # SkillSpector's finding-classification work. SkillSpector's + # bundled default for the anthropic provider is + # `claude-opus-4-6`, which is ~5x the per-token cost; the + # finding-classification task does not need Opus-class + # reasoning. Bump to a newer Sonnet revision (or to Opus) by + # editing this string; SkillSpector passes it directly to the + # Anthropic API. + model: "claude-sonnet-4-5-20250929" # Per-skill verdict policy. v1 has one input (SkillSpector risk_score). # When more scanners join the pipeline we add new threshold fields here From f10bfade9d14ad17699bda0fc0a1d7e2b06d077e Mon Sep 17 00:00:00 2001 From: DevCats Date: Tue, 23 Jun 2026 21:07:42 +0000 Subject: [PATCH 10/18] docs(README.md): point setup at ANTHROPIC_API_KEY, drop OPENAI_BASE_URL var Three-step one-time setup now: Pages, workflow permissions, and a single secret (ANTHROPIC_API_KEY from console.anthropic.com). The OPENAI_BASE_URL variable is no longer used since the provider is anthropic against api.anthropic.com directly. The workflow-file note spells out the matching workflow env block (SKILLSPECTOR_PROVIDER, SKILLSPECTOR_MODEL, ANTHROPIC_API_KEY) that has to land in scan.yaml. --- README.md | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index c271fa7..8a7cefc 100644 --- a/README.md +++ b/README.md @@ -71,22 +71,20 @@ as `/skills/coder/setup` stay client-side. ## One-time setup on the repo -Four things have to be configured once on the GitHub repo before the +Three things have to be configured once on the GitHub repo before the scheduled scan publishes a useful result: 1. **Settings > Pages**: set source to "GitHub Actions". The `publish-pages` job in `scan.yaml` will fail until this is set. 2. **Settings > Actions**: workflow permissions "Read and write" so `publish-release` can create the rolling `latest` release. -3. **Settings > Secrets and variables > Actions > Variables**: add - `OPENAI_BASE_URL` with the value - `https://dev.coder.com/api/v2/aibridge/openai/v1`. This is a - variable (not a secret) because it is not sensitive; only the API - key is. -4. **Settings > Secrets and variables > Actions > Secrets**: add the +3. **Settings > Secrets and variables > Actions > Secrets**: add the LLM credential matching the provider in `config.yaml`'s - `scanners.skillspector.llm.provider`. For the default `openai` - provider this is `OPENAI_API_KEY` set to a Coder AI Gateway token. + `scanners.skillspector.llm.provider`. For the default `anthropic` + provider this is `ANTHROPIC_API_KEY` (from + [console.anthropic.com](https://console.anthropic.com); this is a + separate billing line from Coder usage because SkillSpector cannot + be routed through aibridge today, see `docs/CALIBRATION.md`). Without the secret, the scan still runs but SkillSpector falls back to `--no-llm` static-only mode and precision drops. See `docs/CALIBRATION.md` for the measured before/after numbers. The @@ -94,12 +92,14 @@ scheduled scan publishes a useful result: `notify-slack-on-failure` job; without it that job is a no-op. > **Workflow file note**: enabling LLM mode also requires the matching -> edit in `.github/workflows/scan.yaml` (it must export the secret -> into the SkillSpector step and conditionally append `--no-llm`). -> That edit is part of the same change that introduces this section -> and is documented in the PR description, but is committed -> separately because the Coder Agents GitHub App on this repo -> currently lacks the `workflows: write` scope. +> edit in `.github/workflows/scan.yaml` (it must export +> `ANTHROPIC_API_KEY` into the SkillSpector step, set +> `SKILLSPECTOR_PROVIDER=anthropic` and `SKILLSPECTOR_MODEL`, and +> conditionally append `--no-llm` when the secret is missing). That +> edit is part of the same change that introduces this section and +> is documented in the PR description, but is committed separately +> because the Coder Agents GitHub App on this repo currently lacks +> the `workflows: write` scope. ## Repo layout @@ -138,11 +138,11 @@ This scanner is data-driven. To run it against a different registry: "GitHub Actions"). 4. Set Actions workflow permissions to "Read and write" so the publish-release job can create releases. -5. To enable the LLM semantic pass, add the credential secret and the - matching `OPENAI_BASE_URL` variable per "One-time setup on the - repo" above, AND confirm `.github/workflows/scan.yaml` exports the - secret into the SkillSpector step. Static-only mode (without the - secret) is the default and works out of the box. +5. To enable the LLM semantic pass, add the credential secret per + "One-time setup on the repo" above, AND confirm + `.github/workflows/scan.yaml` exports the secret into the + SkillSpector step. Static-only mode (without the secret) is the + default and works out of the box. 6. Enable Actions. No source changes required for catalogue changes. From d6c9c5dca11d8e57f6ff3f586660bb6a90a230e8 Mon Sep 17 00:00:00 2001 From: DevCats Date: Tue, 23 Jun 2026 21:07:42 +0000 Subject: [PATCH 11/18] docs(CALIBRATION.md): record Anthropic-direct decision and model-swap caveat The measured 25 -> 2 false-positive reduction was captured against gpt-4.1-mini through aibridge during development; production hits claude-sonnet-4-5 via the Anthropic API directly. Verdict-band outcomes are robust to the model swap because every non-coder/setup in-tree skill scores well below the 51 suspicious cutoff even without LLM filtering, but the per-finding counts may shift one or two either way. Recalibration follow-up planned once production data lands. Also captures the four reasons aibridge cannot route SkillSpector at Claude today. --- docs/CALIBRATION.md | 62 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/docs/CALIBRATION.md b/docs/CALIBRATION.md index bb362c9..4e2879c 100644 --- a/docs/CALIBRATION.md +++ b/docs/CALIBRATION.md @@ -109,8 +109,9 @@ human-readable explanation that ships in the per-finding output. ### Measured impact on the five in-tree skills -Measured against `gpt-4.1-mini` through Coder's AI Gateway. Methodology: -ran `skillspector scan` twice on each upstream skill (once with +Measured against `gpt-4.1-mini` through Coder's AI Gateway during +development, before the provider swap below. Methodology: ran +`skillspector scan` twice on each upstream skill (once with `--no-llm`, once with LLM mode on) and aggregated the per-skill results. Total catalogue-wide findings dropped from 25 to 2: @@ -134,6 +135,19 @@ user-visible notification. Those 2 findings are real and minor; the cleanest fix is a one-line `echo` before each write in the upstream skill repo rather than any change here. +**Model swap caveat**: production runs against `claude-sonnet-4-5` +via the Anthropic API (see "Provider choice" below), not against +`gpt-4.1-mini`. The 25 → 2 delta above measures SkillSpector's LLM +semantic pass *as a capability*; absolute counts may shift one or two +either way under Claude because the two models filter false positives +slightly differently. The verdict-band outcomes (`coder/setup` flips +malicious → clean, every other in-tree skill stays clean) are robust +to that drift: every static finding on the four other skills is well +below the `suspicious_risk_score: 51` cutoff to begin with, so even a +100% no-filter LLM still leaves them clean. Recalibration against +Claude is a 30-minute follow-up PR once the secret is wired in and +the first production scan lands; this doc gets the real numbers then. + ### Provider choice and the workflow gap The scheduled scan runs LLM mode when the workflow's chosen credential @@ -148,14 +162,33 @@ Agents GitHub App on this repo currently lacks the `workflows: write` scope. The contract documented here only takes effect once that edit lands (or is pasted by a human with workflow write access). -Provider is `openai` against Coder's AI Gateway endpoint -(`OPENAI_BASE_URL=https://dev.coder.com/api/v2/aibridge/openai/v1`) -with model `gpt-4.1-mini`. SkillSpector's `anthropic` provider was -tried first because it would map more directly to claude-sonnet-class -models, but its `provider.py` hardcodes `https://api.anthropic.com/v1/` -and ignores `ANTHROPIC_BASE_URL`, so it cannot be steered at aibridge. -The `openai` provider does respect `OPENAI_BASE_URL`, and aibridge -exposes `gpt-4.1-mini` plus a long list of other OpenAI-class models. +Provider is `anthropic` against `api.anthropic.com` directly, model +`claude-sonnet-4-5-20250929`. The Anthropic API key is on a separate +billing line from Coder usage because SkillSpector cannot be routed +through Coder's AI Gateway today: + +- aibridge does proxy Claude under its `/anthropic` path, but only in + Anthropic's native `/v1/messages` shape. +- SkillSpector pipes every provider through + `langchain_openai.ChatOpenAI`, which speaks OpenAI's + `/v1/chat/completions` shape. +- aibridge does not mount `/v1/chat/completions` on its `/anthropic` + path (verified: `route not supported`). +- SkillSpector's `anthropic` provider also hardcodes + `https://api.anthropic.com/v1/` in `providers/anthropic/provider.py` + and ignores `ANTHROPIC_BASE_URL`, so even if aibridge did expose the + OpenAI-compat route on its Anthropic path, an env-only swap would + not steer SkillSpector at it. + +Using `openai` against aibridge with `gpt-4.1-mini` is a viable +alternative (and is what the calibration table above was measured +against). The trade-off is real: aibridge routing keeps inference +spend on Coder's existing billing line and avoids a second vendor, +but commits the scanner to whichever OpenAI-class model aibridge +exposes rather than Claude. If aibridge later adds either a Claude +OpenAI-compat route on `/anthropic` or a native-Anthropic +integration into SkillSpector, the provider line in `config.yaml` +flips back without any workflow change. ### How the LLM pass interacts with the verdict math @@ -194,10 +227,11 @@ Re-run this analysis when any of: that shifts where its bands sit. The pinned commit in `config.yaml` protects us from drifting silently; a deliberate bump should walk through this doc. -- The LLM model or provider changes (e.g., moving from `gpt-4.1-mini` - to a Claude or Gemini model, or from aibridge to a direct provider - key). Different models filter differently; spot-check the five - in-tree skills before merging the provider swap. +- The LLM model or provider changes (e.g., moving from + `claude-sonnet-4-5` to Opus or to a non-Anthropic provider). + Different models filter differently; spot-check the five in-tree + skills before merging the provider swap and refresh the table + above. - We observe a real-world skill that lands in an obviously wrong bucket (false positive or false negative). Open a tracking issue, link it from this doc, and adjust with evidence in the next PR. From 9f4080166423f831b4740497b45eee363758166f Mon Sep 17 00:00:00 2001 From: DevCats Date: Tue, 23 Jun 2026 21:14:19 +0000 Subject: [PATCH 12/18] feat(scanner): bump LLM model from claude-sonnet-4-5 to claude-sonnet-4-6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sonnet-4-6 is the bundled meta_analyzer-slot default in SkillSpector's anthropic provider — the slot whose docstring explicitly describes it as 'cheaper for the high-volume filter pass,' which is exactly the per-finding intent classification this scanner does. It also widens the context window from 200K (Sonnet 4.5) to 1M, reducing chunking for larger skills, and SkillSpector's bundled model_registry.yaml already knows its token limits so there is no runtime fallback warning. Bare alias used rather than a dated suffix: probing Anthropic's /v1/models endpoint on 2026-06-23 confirmed every dated suffix for the 4-6 series (e.g. claude-sonnet-4-6-20251015) returns 404, while the bare alias returns 200. The bare alias is the canonical ID Anthropic accepts. --- config.yaml | 24 +++++++++++++++--------- docs/CALIBRATION.md | 16 ++++++++-------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/config.yaml b/config.yaml index f1b135f..29805e5 100644 --- a/config.yaml +++ b/config.yaml @@ -84,15 +84,21 @@ scanners: # Claude path today. This means the Anthropic API key is on a # separate billing line from Coder usage. provider: anthropic - # claude-sonnet-4-5 is the cost/quality sweet spot for - # SkillSpector's finding-classification work. SkillSpector's - # bundled default for the anthropic provider is - # `claude-opus-4-6`, which is ~5x the per-token cost; the - # finding-classification task does not need Opus-class - # reasoning. Bump to a newer Sonnet revision (or to Opus) by - # editing this string; SkillSpector passes it directly to the - # Anthropic API. - model: "claude-sonnet-4-5-20250929" + # claude-sonnet-4-6 is SkillSpector's own pick for the + # finding-classification task: it is the bundled default for + # the anthropic provider's `meta_analyzer` slot, which the + # provider docstring describes as "cheaper for the high-volume + # filter pass." SkillSpector's `DEFAULT_MODEL` is + # `claude-opus-4-6`, used for the deeper analyzer slots + # (~5x the per-token cost); the per-finding intent + # classification this scanner relies on does not need + # Opus-class reasoning. Bare alias rather than a dated suffix + # (the dated suffixes for the 4-6 series are not accepted as + # of June 2026; the bare alias is the canonical ID). Bump + # this string to `claude-opus-4-6`, `claude-opus-4-8`, or + # `claude-fable-5` if a higher tier is warranted; SkillSpector + # passes it directly to the Anthropic API. + model: "claude-sonnet-4-6" # Per-skill verdict policy. v1 has one input (SkillSpector risk_score). # When more scanners join the pipeline we add new threshold fields here diff --git a/docs/CALIBRATION.md b/docs/CALIBRATION.md index 4e2879c..0833d1e 100644 --- a/docs/CALIBRATION.md +++ b/docs/CALIBRATION.md @@ -135,7 +135,7 @@ user-visible notification. Those 2 findings are real and minor; the cleanest fix is a one-line `echo` before each write in the upstream skill repo rather than any change here. -**Model swap caveat**: production runs against `claude-sonnet-4-5` +**Model swap caveat**: production runs against `claude-sonnet-4-6` via the Anthropic API (see "Provider choice" below), not against `gpt-4.1-mini`. The 25 → 2 delta above measures SkillSpector's LLM semantic pass *as a capability*; absolute counts may shift one or two @@ -163,9 +163,9 @@ scope. The contract documented here only takes effect once that edit lands (or is pasted by a human with workflow write access). Provider is `anthropic` against `api.anthropic.com` directly, model -`claude-sonnet-4-5-20250929`. The Anthropic API key is on a separate -billing line from Coder usage because SkillSpector cannot be routed -through Coder's AI Gateway today: +`claude-sonnet-4-6`. The Anthropic API key is on a separate billing +line from Coder usage because SkillSpector cannot be routed through +Coder's AI Gateway today: - aibridge does proxy Claude under its `/anthropic` path, but only in Anthropic's native `/v1/messages` shape. @@ -228,10 +228,10 @@ Re-run this analysis when any of: protects us from drifting silently; a deliberate bump should walk through this doc. - The LLM model or provider changes (e.g., moving from - `claude-sonnet-4-5` to Opus or to a non-Anthropic provider). - Different models filter differently; spot-check the five in-tree - skills before merging the provider swap and refresh the table - above. + `claude-sonnet-4-6` to Opus, Fable, or to a non-Anthropic + provider). Different models filter differently; spot-check the + five in-tree skills before merging the provider swap and refresh + the table above. - We observe a real-world skill that lands in an obviously wrong bucket (false positive or false negative). Open a tracking issue, link it from this doc, and adjust with evidence in the next PR. From 3170357f1a0a9caeb08c1f98f5f63e801d2e0e05 Mon Sep 17 00:00:00 2001 From: DevCats Date: Tue, 23 Jun 2026 21:20:48 +0000 Subject: [PATCH 13/18] chore: strip explanatory cruft from config.yaml and PR-state callouts config.yaml: collapse the llm: block to provider + model + a one-line flags: comment. The aibridge backstory, model justification, provider options table, and "contract with scan.yaml" prose all live in docs/CALIBRATION.md and the PR description; reproducing them next to the two values that actually matter is noise. README.md and docs/CALIBRATION.md: drop the two "this PR is in a weird in-between state" callout blocks. The PR description already covers the workflow split; the docs do not need to carry it. --- README.md | 17 ------------- config.yaml | 58 ++------------------------------------------- docs/CALIBRATION.md | 7 ------ 3 files changed, 2 insertions(+), 80 deletions(-) diff --git a/README.md b/README.md index 8a7cefc..75e528e 100644 --- a/README.md +++ b/README.md @@ -27,13 +27,6 @@ The registry site reads the public report through a small proxy endpoint in `coder/registry-server` (separate PR) and shows a per-skill badge. The registry's deploys are not gated on the scan result. -> **Note on the LLM semantic pass.** Step 3's conditional-LLM behavior -> requires the matching edit in `.github/workflows/scan.yaml`. That edit -> is part of the same change that introduces this section in the README; -> see the PR description for the diff. The current `scan.yaml` on this -> branch still hardcodes `--no-llm`, so adding the secret alone has no -> effect until the workflow edit also lands. - ## Reading the latest report Stable URLs, no auth required: @@ -91,16 +84,6 @@ scheduled scan publishes a useful result: optional `SLACK_WEBHOOK_URL` secret enables the `notify-slack-on-failure` job; without it that job is a no-op. -> **Workflow file note**: enabling LLM mode also requires the matching -> edit in `.github/workflows/scan.yaml` (it must export -> `ANTHROPIC_API_KEY` into the SkillSpector step, set -> `SKILLSPECTOR_PROVIDER=anthropic` and `SKILLSPECTOR_MODEL`, and -> conditionally append `--no-llm` when the secret is missing). That -> edit is part of the same change that introduces this section and -> is documented in the PR description, but is committed separately -> because the Coder Agents GitHub App on this repo currently lacks -> the `workflows: write` scope. - ## Repo layout ```text diff --git a/config.yaml b/config.yaml index 29805e5..fac4259 100644 --- a/config.yaml +++ b/config.yaml @@ -39,65 +39,11 @@ scanners: # so a bumper bot lives outside the loop until the upstream # publishes to PyPI and the pin can move into pyproject.toml. pin: "skillspector @ git+https://github.com/NVIDIA/SkillSpector.git@2eb844780ab163f01468ecf142c40a2ec0fcaec0" - # Extra CLI flags passed to every SkillSpector invocation. Left - # empty so .github/workflows/scan.yaml can drive --no-llm - # dynamically based on whether the LLM credential secret is set - # (see contract under the llm: block below). + # Empty so .github/workflows/scan.yaml can append --no-llm + # dynamically based on whether the LLM credential secret is set. flags: [] - # SkillSpector ships a two-stage analyser: fast static rules - # followed by an optional LLM semantic pass. Measured on the five - # in-tree skills, LLM mode dropped catalogue-wide findings from - # 25 to 2 and moved coder/setup from malicious to clean. See - # docs/CALIBRATION.md for the per-skill numbers and methodology. - # - # Contract with .github/workflows/scan.yaml: the workflow reads - # the credential matching the provider below from a repository - # secret. When the secret is set, LLM mode runs. When the secret - # is missing, the workflow appends --no-llm so a fresh fork is - # never broken by an unset secret. - # - # NOTE: scan.yaml in this branch still hardcodes --no-llm. The - # matching workflow edit is in this PR's description but is - # committed separately because the Coder Agents GitHub App on - # this repo currently lacks the `workflows: write` scope. After - # both land, the config and the workflow drive LLM mode together. - # - # Provider options: - # provider env var(s) endpoint - # anthropic ANTHROPIC_API_KEY api.anthropic.com - # openai OPENAI_API_KEY + OPENAI_BASE_URL any OpenAI-compatible URL - # nv_build NVIDIA_INFERENCE_KEY build.nvidia.com (free) - # - # Changing provider also requires updating the env block in - # .github/workflows/scan.yaml so the matching secret is wired in, - # and adding the secret under Settings > Secrets and variables > - # Actions. llm: - # Hits api.anthropic.com directly with an Anthropic API key (not - # routed through Coder's AI Gateway). aibridge was the original - # plan, but SkillSpector pipes every provider through - # langchain_openai.ChatOpenAI which hits `/v1/chat/completions`, - # and aibridge only mounts that path under `/openai`, not - # `/anthropic`. The `anthropic` provider in SkillSpector also - # hardcodes `https://api.anthropic.com/v1/` and ignores - # ANTHROPIC_BASE_URL, so aibridge cannot be steered into the - # Claude path today. This means the Anthropic API key is on a - # separate billing line from Coder usage. provider: anthropic - # claude-sonnet-4-6 is SkillSpector's own pick for the - # finding-classification task: it is the bundled default for - # the anthropic provider's `meta_analyzer` slot, which the - # provider docstring describes as "cheaper for the high-volume - # filter pass." SkillSpector's `DEFAULT_MODEL` is - # `claude-opus-4-6`, used for the deeper analyzer slots - # (~5x the per-token cost); the per-finding intent - # classification this scanner relies on does not need - # Opus-class reasoning. Bare alias rather than a dated suffix - # (the dated suffixes for the 4-6 series are not accepted as - # of June 2026; the bare alias is the canonical ID). Bump - # this string to `claude-opus-4-6`, `claude-opus-4-8`, or - # `claude-fable-5` if a higher tier is warranted; SkillSpector - # passes it directly to the Anthropic API. model: "claude-sonnet-4-6" # Per-skill verdict policy. v1 has one input (SkillSpector risk_score). diff --git a/docs/CALIBRATION.md b/docs/CALIBRATION.md index 0833d1e..b089bdf 100644 --- a/docs/CALIBRATION.md +++ b/docs/CALIBRATION.md @@ -155,13 +155,6 @@ secret is configured. The fallback to `--no-llm` is automatic when the secret is missing, so an unset secret on a fresh fork degrades the scan rather than breaking it. -**Important caveat for this PR**: `.github/workflows/scan.yaml` in the -current branch still hardcodes `--no-llm`. The matching workflow edit -is in the PR description but is committed separately because the Coder -Agents GitHub App on this repo currently lacks the `workflows: write` -scope. The contract documented here only takes effect once that edit -lands (or is pasted by a human with workflow write access). - Provider is `anthropic` against `api.anthropic.com` directly, model `claude-sonnet-4-6`. The Anthropic API key is on a separate billing line from Coder usage because SkillSpector cannot be routed through From 82d9a321d6676219932eaa4fd722888e7d81a29f Mon Sep 17 00:00:00 2001 From: DevCats Date: Wed, 24 Jun 2026 15:58:37 +0000 Subject: [PATCH 14/18] docs(README.md): drop reference to LLM-on-vs-off measurement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The measured numbers are a point-in-time snapshot — they shift as SkillSpector evolves, as the LLM model swaps, and as upstream skills change. The README should not advertise them as a comparison. --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 75e528e..ce5b7bf 100644 --- a/README.md +++ b/README.md @@ -144,8 +144,7 @@ SkillSpector's `risk_score` (0-100) is the only input. The thresholds are aligned to SkillSpector's own `HIGH` and `CRITICAL` bands; [`docs/CALIBRATION.md`](./docs/CALIBRATION.md) walks through the evidence (SkillSpector source, the ClawHub paper, our in-tree -catalogue) behind the chosen numbers and the measured LLM-on-vs-off -impact on the five in-tree skills. +catalogue) behind the chosen numbers. The architecture keeps room for additional scanners (gitleaks, Semgrep, VirusTotal Premium, etc.); adding one is a new module under `scanner/`, From 64414e9fc1d169fcaa7bd026a34a7b157a04ae99 Mon Sep 17 00:00:00 2001 From: DevCats Date: Wed, 24 Jun 2026 17:34:21 +0000 Subject: [PATCH 15/18] chore: delete docs/CALIBRATION.md, drop all references to it The file was not consumed by SkillSpector, the scanner code, or the workflow; it was a 230-line design-decision retrospective justifying two YAML values. The load-bearing rationale (verdict thresholds map to SkillSpector's HIGH/CRITICAL bands and SkillSpector itself escalates at the same boundary) already lives in the config.yaml comment block right next to the thresholds. Drops the four inline references in scanner/verdict.py, README.md, RiskBar.tsx, and VerdictExplanation.tsx. The docs/ directory is removed because the file was its sole occupant. --- README.md | 16 +- config.yaml | 13 +- docs/CALIBRATION.md | 230 ------------------ scanner/verdict.py | 2 +- site/src/components/RiskBar/RiskBar.tsx | 2 +- .../VerdictExplanation/VerdictExplanation.tsx | 3 +- 6 files changed, 15 insertions(+), 251 deletions(-) delete mode 100644 docs/CALIBRATION.md diff --git a/README.md b/README.md index ce5b7bf..cd60361 100644 --- a/README.md +++ b/README.md @@ -77,12 +77,11 @@ scheduled scan publishes a useful result: provider this is `ANTHROPIC_API_KEY` (from [console.anthropic.com](https://console.anthropic.com); this is a separate billing line from Coder usage because SkillSpector cannot - be routed through aibridge today, see `docs/CALIBRATION.md`). - Without the secret, the scan still runs but SkillSpector falls - back to `--no-llm` static-only mode and precision drops. See - `docs/CALIBRATION.md` for the measured before/after numbers. The - optional `SLACK_WEBHOOK_URL` secret enables the - `notify-slack-on-failure` job; without it that job is a no-op. + be routed through aibridge today). Without the secret, the scan + still runs but SkillSpector falls back to `--no-llm` static-only + mode and precision drops. The optional `SLACK_WEBHOOK_URL` secret + enables the `notify-slack-on-failure` job; without it that job is + a no-op. ## Repo layout @@ -141,10 +140,7 @@ verdict: ``` SkillSpector's `risk_score` (0-100) is the only input. The thresholds -are aligned to SkillSpector's own `HIGH` and `CRITICAL` bands; -[`docs/CALIBRATION.md`](./docs/CALIBRATION.md) walks through the -evidence (SkillSpector source, the ClawHub paper, our in-tree -catalogue) behind the chosen numbers. +are aligned to SkillSpector's own `HIGH` and `CRITICAL` bands. The architecture keeps room for additional scanners (gitleaks, Semgrep, VirusTotal Premium, etc.); adding one is a new module under `scanner/`, diff --git a/config.yaml b/config.yaml index fac4259..c447563 100644 --- a/config.yaml +++ b/config.yaml @@ -58,13 +58,12 @@ scanners: # 51-80 HIGH DO_NOT_INSTALL -> verdict: suspicious # 81-100 CRITICAL DO_NOT_INSTALL -> verdict: malicious # -# Rationale and source links live in docs/CALIBRATION.md. Short version: -# SkillSpector's static-analysis layer is loud on real catalogues (the -# ClawHub paper measured a ~49% positive rate on 67k skills) and is -# advisory rather than authoritative, so we only escalate above its -# HIGH cutoff. CAUTION-band findings still appear in the per-skill page -# so reviewers can see them; we just do not flag the skill as suspicious -# at the catalogue level. +# Rationale: SkillSpector's static-analysis layer is loud on real +# catalogues (the ClawHub paper measured a ~49% positive rate on 67k +# skills) and is advisory rather than authoritative, so we only +# escalate above its HIGH cutoff. CAUTION-band findings still appear +# on the per-skill page so reviewers can see them; we just do not +# flag the skill as suspicious at the catalogue level. verdict: malicious_risk_score: 81 suspicious_risk_score: 51 diff --git a/docs/CALIBRATION.md b/docs/CALIBRATION.md deleted file mode 100644 index b089bdf..0000000 --- a/docs/CALIBRATION.md +++ /dev/null @@ -1,230 +0,0 @@ -# Verdict threshold calibration - -This document records how the verdict thresholds in `config.yaml` were -chosen. The thresholds are not arbitrary: they are aligned to -SkillSpector's own internal severity bands and informed by the published -evaluation of SkillSpector against a large real-world skill catalogue. - -If you bump the thresholds, update this doc in the same PR. Numbers that -nobody can defend later are how scanners drift into either uselessness -or boy-who-cried-wolf territory. - -## Inputs we are calibrating against - -### 1. SkillSpector's published severity bands - -NVIDIA's SkillSpector computes `risk_assessment.score` on a 0-100 scale -from rule hits weighted by severity, plus a 1.3x multiplier when the -skill carries executable scripts. The score is then bucketed into a -named severity and a `recommendation` field: - -| Score range | `severity` | `recommendation` | -|-------------|------------|---------------------| -| 0-20 | `LOW` | `SAFE` | -| 21-50 | `MEDIUM` | `CAUTION` | -| 51-80 | `HIGH` | `DO_NOT_INSTALL` | -| 81-100 | `CRITICAL` | `DO_NOT_INSTALL` | - -Source: [`skillspector/nodes/report.py`](https://github.com/NVIDIA/SkillSpector/blob/main/skillspector/nodes/report.py) -(`_compute_risk_score` for the weighting, `_severity_from_score` for the -bucketing). The SkillSpector CLI exits non-zero when `risk_score > 50`, -which is the same boundary as the `HIGH` band. - -### 2. The ClawHub evaluation - -Two NVIDIA-affiliated artifacts describe how SkillSpector performs in -the wild: - -- ClawHub paper, "ClawHub: A large-scale safety analysis of Claude - Skills" (arxiv.org/html/2606.01494v1). -- OpenClaw blog, "SkillSpector at scale on ClawHub" - (openclaw.ai/blog/openclaw-nvidia-skill-security). -- Hugging Face dataset of per-skill signals - (huggingface.co/datasets/OpenClaw/clawhub-security-signals). - -Two numbers from those sources drive our calibration: - -- On 67,453 real Claude skills, SkillSpector returned at least one - finding on roughly 49% of them. That is the population our verdict - policy will see most of, so a threshold at SkillSpector's MEDIUM band - would flag close to half the catalogue as "suspicious," which is not - useful. -- On a labelled subset of known-malicious skills, SkillSpector alone - caught about 6.8% (recall), while VirusTotal Premium caught about - 72.8%. SkillSpector is good for surfacing risky behaviour patterns; - it is not a reliable malicious-classifier on its own. - -The paper's own pipeline (`ClawScan`) treats SkillSpector as one of -several signals fed into an LLM-as-judge. That tells us SkillSpector's -output is best read as advisory until we add more scanners. - -### 3. Our existing in-tree results - -The current `coder/registry` in-tree catalogue contains five skills: -`coder/coder-modules`, `coder/coder-templates`, `coder/modules`, -`coder/templates`, and `coder/setup`. Under the chosen thresholds: - -| Skill | static score | LLM-mode score | static verdict | LLM-mode verdict | -|------------------------|-------------:|---------------:|----------------|------------------| -| `coder/coder-modules` | 10 | 0 | `clean` | `clean` | -| `coder/coder-templates`| 10 | 0 | `clean` | `clean` | -| `coder/modules` | 0 | 0 | `clean` | `clean` | -| `coder/templates` | 0 | 0 | `clean` | `clean` | -| `coder/setup` | 100 | 26 | `malicious` | `clean` | - -The previous thresholds (40/75) produced the same outcome for these -five inputs under static-only mode. The change does not silence any -signal that was firing today; it raises the bar that future skills -must clear before being called out. - -## Threshold choices - -```yaml -verdict: - malicious_risk_score: 81 - suspicious_risk_score: 51 -``` - -- `malicious_risk_score: 81` matches SkillSpector's `CRITICAL` band. - Anything SkillSpector itself describes as `CRITICAL` / - `DO_NOT_INSTALL` (top decile) becomes our `malicious` verdict. -- `suspicious_risk_score: 51` matches the `HIGH` band, which is also - the score at which the SkillSpector CLI starts exiting non-zero. A - skill that SkillSpector says is `HIGH` / `DO_NOT_INSTALL` becomes - our `suspicious` verdict (the registry-server badge surfaces this as - "Review before installing"). -- Skills in the `MEDIUM` / `CAUTION` band (21-50) stay `clean` at the - catalogue level. Their findings are still rendered on the per-skill - page so reviewers can drill in, but they do not trigger a badge. - This avoids broadcasting the ~half-of-catalogue base rate that - ClawHub measured. - -## LLM semantic pass - -SkillSpector ships a two-stage analyser: fast static rules (the 64 -patterns SkillSpector documents) followed by an optional LLM semantic -pass. The LLM pass reads each finding's surrounding context, classifies -intent, filters context-aware false positives, and writes a -human-readable explanation that ships in the per-finding output. - -### Measured impact on the five in-tree skills - -Measured against `gpt-4.1-mini` through Coder's AI Gateway during -development, before the provider swap below. Methodology: ran -`skillspector scan` twice on each upstream skill (once with -`--no-llm`, once with LLM mode on) and aggregated the per-skill -results. Total catalogue-wide findings dropped from 25 to 2: - -| Skill | findings (static) | findings (LLM) | Δ | -|------------------------|------------------:|---------------:|----------| -| `coder/coder-modules` | 1 | 0 | -1 | -| `coder/coder-templates`| 1 | 0 | -1 | -| `coder/modules` | 0 | 0 | 0 | -| `coder/setup` | 23 | 2 | -21 | -| `coder/templates` | 0 | 0 | 0 | -| **TOTAL** | **25** | **2** | **-23** | - -`coder/setup`'s verdict moves from `malicious` (100) to `clean` (26). -The LLM filtered all 23 static-only findings as context-aware false -positives (the EA2 hits on safeguard prose, the MP2 hits on PNG -assets, the SC2 hits on `curl coder.com/install.sh`, the PE3 hits on -the skill's own scratch files, etc.) and surfaced 2 new MEDIUM -findings (`SQP-2`) the static pass missed: the GitHub device-flow -scripts write the OAuth token and session config to disk without a -user-visible notification. Those 2 findings are real and minor; the -cleanest fix is a one-line `echo` before each write in the upstream -skill repo rather than any change here. - -**Model swap caveat**: production runs against `claude-sonnet-4-6` -via the Anthropic API (see "Provider choice" below), not against -`gpt-4.1-mini`. The 25 → 2 delta above measures SkillSpector's LLM -semantic pass *as a capability*; absolute counts may shift one or two -either way under Claude because the two models filter false positives -slightly differently. The verdict-band outcomes (`coder/setup` flips -malicious → clean, every other in-tree skill stays clean) are robust -to that drift: every static finding on the four other skills is well -below the `suspicious_risk_score: 51` cutoff to begin with, so even a -100% no-filter LLM still leaves them clean. Recalibration against -Claude is a 30-minute follow-up PR once the secret is wired in and -the first production scan lands; this doc gets the real numbers then. - -### Provider choice and the workflow gap - -The scheduled scan runs LLM mode when the workflow's chosen credential -secret is configured. The fallback to `--no-llm` is automatic when the -secret is missing, so an unset secret on a fresh fork degrades the -scan rather than breaking it. - -Provider is `anthropic` against `api.anthropic.com` directly, model -`claude-sonnet-4-6`. The Anthropic API key is on a separate billing -line from Coder usage because SkillSpector cannot be routed through -Coder's AI Gateway today: - -- aibridge does proxy Claude under its `/anthropic` path, but only in - Anthropic's native `/v1/messages` shape. -- SkillSpector pipes every provider through - `langchain_openai.ChatOpenAI`, which speaks OpenAI's - `/v1/chat/completions` shape. -- aibridge does not mount `/v1/chat/completions` on its `/anthropic` - path (verified: `route not supported`). -- SkillSpector's `anthropic` provider also hardcodes - `https://api.anthropic.com/v1/` in `providers/anthropic/provider.py` - and ignores `ANTHROPIC_BASE_URL`, so even if aibridge did expose the - OpenAI-compat route on its Anthropic path, an env-only swap would - not steer SkillSpector at it. - -Using `openai` against aibridge with `gpt-4.1-mini` is a viable -alternative (and is what the calibration table above was measured -against). The trade-off is real: aibridge routing keeps inference -spend on Coder's existing billing line and avoids a second vendor, -but commits the scanner to whichever OpenAI-class model aibridge -exposes rather than Claude. If aibridge later adds either a Claude -OpenAI-compat route on `/anthropic` or a native-Anthropic -integration into SkillSpector, the provider line in `config.yaml` -flips back without any workflow change. - -### How the LLM pass interacts with the verdict math - -The LLM pass does not affect the threshold math. SkillSpector's -`risk_score` is still a 0-100 weighted sum of rule hits, and the -51/81 cutoffs above still map directly to `HIGH` and `CRITICAL` bands. -What changes is which findings reach the verdict: false positives the -LLM filters out no longer contribute to the score. Verdicts move down -(or stay the same) when LLM mode flips on, not up. - -## What we did not change (and why) - -- We did not raise `suspicious_risk_score` above `51`. SkillSpector - itself escalates at that boundary; staying in sync keeps the - recommendation field on the per-skill page consistent with the - badge on the catalogue page. -- We did not add a separate "low confidence" verdict. A fourth tier - buys us little until we have a second scanner to combine signals - with. The schema's `unknown` verdict already covers the - "could not assess" case, which is the only failure mode v1 cares - about. -- We did not move thresholds into the published `latest.json`. The - SPA uses defaults that match `config.yaml`. If a future change makes - the artifact policy-aware, plumb the values through and drop the - defaults from `VerdictExplanation.tsx`. - -## When to revisit - -Re-run this analysis when any of: - -- A new scanner (gitleaks, Semgrep, VirusTotal Premium, ClawScan, etc.) - joins the pipeline. The combined verdict logic in - `scanner/verdict.py` will need a new branch and most likely - different thresholds per signal. -- SkillSpector bumps its scoring weights or rule catalogue in a way - that shifts where its bands sit. The pinned commit in `config.yaml` - protects us from drifting silently; a deliberate bump should walk - through this doc. -- The LLM model or provider changes (e.g., moving from - `claude-sonnet-4-6` to Opus, Fable, or to a non-Anthropic - provider). Different models filter differently; spot-check the - five in-tree skills before merging the provider swap and refresh - the table above. -- We observe a real-world skill that lands in an obviously wrong - bucket (false positive or false negative). Open a tracking issue, - link it from this doc, and adjust with evidence in the next PR. diff --git a/scanner/verdict.py b/scanner/verdict.py index 7e6d46a..8e77360 100644 --- a/scanner/verdict.py +++ b/scanner/verdict.py @@ -48,7 +48,7 @@ def evaluate( thresholds = config.get("verdict") or {} # Defaults match config.yaml. Keep these in sync with - # docs/CALIBRATION.md and VerdictExplanation.tsx's defaults. + # VerdictExplanation.tsx's defaults. malicious_at = int(thresholds.get("malicious_risk_score", 81)) suspicious_at = int(thresholds.get("suspicious_risk_score", 51)) diff --git a/site/src/components/RiskBar/RiskBar.tsx b/site/src/components/RiskBar/RiskBar.tsx index f515c98..94bdb49 100644 --- a/site/src/components/RiskBar/RiskBar.tsx +++ b/site/src/components/RiskBar/RiskBar.tsx @@ -19,7 +19,7 @@ interface RiskBarProps { * Optional cutoffs (0..100) for the suspicious and malicious bands. * When supplied, the bar renders thin tick marks at those positions so * the user can see how close a score is to escalating. Defaults match - * the policy in config.yaml and docs/CALIBRATION.md. + * the policy in config.yaml. */ suspicious_at?: number; malicious_at?: number; diff --git a/site/src/components/VerdictExplanation/VerdictExplanation.tsx b/site/src/components/VerdictExplanation/VerdictExplanation.tsx index 4db9f35..9d35217 100644 --- a/site/src/components/VerdictExplanation/VerdictExplanation.tsx +++ b/site/src/components/VerdictExplanation/VerdictExplanation.tsx @@ -374,8 +374,7 @@ const CategoryCard: FC = ({ group }) => { export const VerdictExplanation: FC = ({ skill, // Defaults match config.yaml and scanner/verdict.py. They are also - // SkillSpector's own HIGH and CRITICAL band edges; see - // docs/CALIBRATION.md for the calibration write-up. + // SkillSpector's own HIGH and CRITICAL band edges. malicious_at = 81, suspicious_at = 51, className, From 85911ecebbc847c6334648b604baf057df307e50 Mon Sep 17 00:00:00 2001 From: DevCats Date: Wed, 24 Jun 2026 17:37:31 +0000 Subject: [PATCH 16/18] docs(README.md): drop the 'One-time setup on the repo' section The section was instructions for setting up the canonical coder/coder-skill-scanner GitHub repo (Pages source, workflow permissions, ANTHROPIC_API_KEY secret). Those are one-shot operator steps, not user-facing docs about the product, and are tracked outside the repo. The Forking section keeps its own minimal, self-contained equivalent for downstream forks. --- README.md | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index cd60361..99d198e 100644 --- a/README.md +++ b/README.md @@ -62,27 +62,6 @@ Vite's dev proxy (see `site/vite.config.ts`) forwards `latest.json`, app sees real scanner output without CORS shenanigans. SPA routes such as `/skills/coder/setup` stay client-side. -## One-time setup on the repo - -Three things have to be configured once on the GitHub repo before the -scheduled scan publishes a useful result: - -1. **Settings > Pages**: set source to "GitHub Actions". The - `publish-pages` job in `scan.yaml` will fail until this is set. -2. **Settings > Actions**: workflow permissions "Read and write" so - `publish-release` can create the rolling `latest` release. -3. **Settings > Secrets and variables > Actions > Secrets**: add the - LLM credential matching the provider in `config.yaml`'s - `scanners.skillspector.llm.provider`. For the default `anthropic` - provider this is `ANTHROPIC_API_KEY` (from - [console.anthropic.com](https://console.anthropic.com); this is a - separate billing line from Coder usage because SkillSpector cannot - be routed through aibridge today). Without the secret, the scan - still runs but SkillSpector falls back to `--no-llm` static-only - mode and precision drops. The optional `SLACK_WEBHOOK_URL` secret - enables the `notify-slack-on-failure` job; without it that job is - a no-op. - ## Repo layout ```text @@ -120,9 +99,10 @@ This scanner is data-driven. To run it against a different registry: "GitHub Actions"). 4. Set Actions workflow permissions to "Read and write" so the publish-release job can create releases. -5. To enable the LLM semantic pass, add the credential secret per - "One-time setup on the repo" above, AND confirm - `.github/workflows/scan.yaml` exports the secret into the +5. To enable the LLM semantic pass, set the credential secret matching + `config.yaml`'s `scanners.skillspector.llm.provider` on your fork + (for the default `anthropic` provider, `ANTHROPIC_API_KEY`), AND + confirm `.github/workflows/scan.yaml` exports that secret into the SkillSpector step. Static-only mode (without the secret) is the default and works out of the box. 6. Enable Actions. From 0f10e7127e3f4ce8bd8f2ee87a307da4f4ae331d Mon Sep 17 00:00:00 2001 From: DevCats Date: Wed, 24 Jun 2026 17:52:07 +0000 Subject: [PATCH 17/18] feat(scan.yaml): wire ANTHROPIC_API_KEY + SKILLSPECTOR env into SkillSpector steps Three edits inside the scan job: 1. New 'Determine LLM mode' step (id=llm) that emits extra_flags=--no-llm when ANTHROPIC_API_KEY is unset, and extra_flags= when it is set. Also emits a workflow-level ::warning:: in the unset case so the degraded mode is visible on the run page. 2. SkillSpector (JSON) step: passes SKILLSPECTOR_PROVIDER=anthropic, SKILLSPECTOR_MODEL=claude-sonnet-4-6, ANTHROPIC_API_KEY into env, and uses steps.llm.outputs.extra_flags instead of hardcoded --no-llm. 3. SkillSpector (SARIF) step: same env block, same flag swap. With the secret set on the repo this turns on SkillSpector's LLM semantic pass; without it the workflow falls back to --no-llm. --- .github/workflows/scan.yaml | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scan.yaml b/.github/workflows/scan.yaml index c5ea763..f40714c 100644 --- a/.github/workflows/scan.yaml +++ b/.github/workflows/scan.yaml @@ -88,22 +88,43 @@ jobs: echo "drift=true" >> "$GITHUB_OUTPUT" echo "Skill path source/${{ matrix.skill_path }} not present upstream; will report catalogue drift." >&2 fi + - name: Determine LLM mode + id: llm + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + run: | + set -euo pipefail + if [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then + echo "extra_flags=" >> "$GITHUB_OUTPUT" + echo "SkillSpector LLM mode: enabled (anthropic provider, api.anthropic.com)." >&2 + else + echo "extra_flags=--no-llm" >> "$GITHUB_OUTPUT" + echo "::warning::ANTHROPIC_API_KEY secret not set; SkillSpector will run with --no-llm. Set the secret on this repo to enable the LLM semantic pass." + fi - name: SkillSpector (JSON) if: steps.path_check.outputs.drift == 'false' continue-on-error: true + env: + SKILLSPECTOR_PROVIDER: anthropic + SKILLSPECTOR_MODEL: claude-sonnet-4-6 + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | mkdir -p out skillspector scan "source/${{ matrix.skill_path }}" \ - --no-llm \ + ${{ steps.llm.outputs.extra_flags }} \ --format json \ --output "out/skillspector.json" || true - name: SkillSpector (SARIF) if: steps.path_check.outputs.drift == 'false' continue-on-error: true + env: + SKILLSPECTOR_PROVIDER: anthropic + SKILLSPECTOR_MODEL: claude-sonnet-4-6 + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | mkdir -p out skillspector scan "source/${{ matrix.skill_path }}" \ - --no-llm \ + ${{ steps.llm.outputs.extra_flags }} \ --format sarif \ --output "out/skillspector.sarif" || true - name: Combine From 2848e3cb2adbf7e236289e4f521e058bcbf876e9 Mon Sep 17 00:00:00 2001 From: DevCats Date: Wed, 24 Jun 2026 18:18:31 +0000 Subject: [PATCH 18/18] fix(config.yaml): disable in_tree enumeration to stop duplicate skills coder/registry declares the same upstream skills in two formats: in-tree as .agents/skills/coder-modules and .agents/skills/coder-templates, and via registry/coder/skills/README.md frontmatter as sources[].skills keyed 'setup', 'modules', 'templates' pointing at the coder/skills repo. scanner/enumerate.py dedupes on (namespace, slug); the different slug names ('coder-modules' vs 'modules') let both rows survive, so the catalogue scan sees 5 skills when it should see 3. The external-sources frontmatter is the canonical declaration in coder/registry going forward. Drop the in_tree block from this config so the enumerator only walks the README frontmatter. in_tree remains supported in scanner/enumerate.py for forks that maintain an .agents/skills/ layout; they re-enable by adding the block back to their config.yaml fork. --- config.yaml | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/config.yaml b/config.yaml index c447563..18a9019 100644 --- a/config.yaml +++ b/config.yaml @@ -6,11 +6,13 @@ config_version: 1 catalogue: - # Where to enumerate skills from. Both the current production format - # (in-tree under .agents/skills/) and the future external-sources - # format (registry//skills/README.md with sources[].repo) are - # supported. When both name the same slug, the external-sources entry - # wins. + # Skills are declared by per-namespace README.md files under + # registry//skills/ in the catalogue repo. Each README's + # frontmatter lists sources[].repo plus per-skill overrides. This is + # the canonical declaration; the in-tree .agents/skills/ format is + # supported in scanner/enumerate.py for forks that need it but is + # not enabled here because coder/registry duplicates the same + # upstream skills across both layouts under different slugs. registry_repo: owner: coder repo: registry @@ -22,13 +24,6 @@ catalogue: # has its frontmatter parsed for sources[].repo plus per-skill # overrides keyed by slug. readme_glob: registry/*/skills/README.md - in_tree: - enabled: true - # The namespace is fixed for in-tree skills today (coder). - namespace: coder - # Path glob inside the catalogue repo. Each /SKILL.md is one - # skill row in the matrix. - base_path: .agents/skills scanners: skillspector: