From 7f34493d9bd2ce6890d462b02bc3cad0e4a10e2e Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Sat, 27 Jun 2026 15:06:02 -0500 Subject: [PATCH 1/2] docs(design): cross-package error-code discipline + foundation extraction plans (bd-egcyeym9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Design artifacts for pulling the diagnostics-foundation crates out of the q2 monorepo into standalone posit-dev/ repos: - claude-notes/designs/cross-package-error-codes.md — the general two-identity error-code discipline: package-owned origin codes vs product-owned presentation codes, the remap that bridges them, the tier1/tier2/forbidden fallback hierarchy, per-node definer/remapper roles, terminal-vs-remapped inert provenance, and append-only ("cool URLs") code lifecycle. - claude-notes/plans/2026-06-26-extract-error-reporting-foundation.md — phased, leaf-first plan to extract quarto-source-map then quarto-error-reporting. - claude-notes/plans/2026-06-26-extract-quarto-yaml-validation-design.md — the YAML-stack extraction design (gated behind the foundation). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../designs/cross-package-error-codes.md | 426 +++++++++++++++ ...6-26-extract-error-reporting-foundation.md | 319 +++++++++++ ...6-extract-quarto-yaml-validation-design.md | 496 ++++++++++++++++++ 3 files changed, 1241 insertions(+) create mode 100644 claude-notes/designs/cross-package-error-codes.md create mode 100644 claude-notes/plans/2026-06-26-extract-error-reporting-foundation.md create mode 100644 claude-notes/plans/2026-06-26-extract-quarto-yaml-validation-design.md diff --git a/claude-notes/designs/cross-package-error-codes.md b/claude-notes/designs/cross-package-error-codes.md new file mode 100644 index 000000000..2c87e9bee --- /dev/null +++ b/claude-notes/designs/cross-package-error-codes.md @@ -0,0 +1,426 @@ +# Error-code identity across package boundaries + +**Status:** Design philosophy (proposed) +**Driver:** bd-egcyeym9 (extracting `quarto-yaml-validation`), but deliberately +written to be general — it governs *any* error that is **defined in one package +and surfaced by another product**. +**Related:** `claude-notes/plans/2026-06-26-extract-quarto-yaml-validation-design.md` + +## The problem, stated generally + +Quarto 2 assigns every user-facing error a code in a single flat, navigable space +— `Q--` — with one curated docs page per code at +`quarto.org/docs/errors/...`. Two goals are now in tension: + +1. **Quarto-2 users** should see *one* code space that is unique and easy to + navigate. They must **not** have to know how Quarto is internally decomposed + into packages — which crate emitted an error is an implementation detail of + *who builds Quarto*, not of *how Quarto reports errors*. +2. **Independently-developed packages** (e.g. a standalone `quarto-yaml-schema` + that non-Quarto developers also depend on) should own **their own** error + codes, meaningful to *their* users, with no knowledge of Quarto's `Q-*` scheme. + +These reconcile only if a code has **two identities** and the *product*, not the +*package*, owns the bridge between them. + +## What the inspirations actually teach + +The Quarto error-code scheme was inspired by the **TypeScript compiler** (numeric +`TSxxxx` diagnostics in a single `diagnosticMessages.json`). That model is an +excellent template for the *presentation* layer — flat, dense, centrally curated, +one docs page per code — **but it offers nothing for the cross-package case**: the +compiler is a monolith; all diagnostics are administered in one file by one team. +A flat numeric space cannot be minted by independently-developed packages without +a central allocator they all coordinate with — which is exactly the coupling we +are trying to remove. + +The precedents that *do* solve the cross-package case use **namespaced** codes — +and, crucially, they show what to borrow *and* what to add. + +### How Clippy does it (alongside rustc) + +- rustc owns a flat **numeric central** registry (`E0277`) with a central error + index (`doc.rust-lang.org/error_codes/E0277.html`) — the TS-compiler model. +- Clippy, developed independently, does **not** mint `E`-codes. It uses **named, + namespaced** lints: `clippy::needless_return`. The `clippy::` prefix is a + **registered tool namespace** — rustc has a first-class notion of "tool lints" + (`register_tool`), so the host's attribute/level machinery + (`#[allow(clippy::…)]`, allow/warn/deny) can refer to a subsystem's diagnostics + *generically*, without rustc knowing any specific Clippy lint. +- Clippy ships its **own** catalog (categories: correctness/style/perf/pedantic/…) + and its **own** docs site (`rust-lang.github.io/rust-clippy`). +- **Lesson:** the host provides shared *infrastructure* (the lint store, level + machinery, the renderer) but **not the identity namespace**. Uniqueness comes + from the `tool::name` structure, not a central allocator. This is exactly the + role split we want: `quarto-error-reporting` = the lint-store/renderer analog; + each library = a "tool" that owns its names. + +### How ESLint does it (core + plugins) + +- Core rules are bare (`no-unused-vars`); plugin rules are **namespaced by + package**: `@typescript-eslint/no-unused-vars`, `import/no-cycle`. The namespace + *is* the package identity (the npm package, minus the `eslint-plugin-` + convention). +- ESLint has a **self-description protocol**: each rule object carries + `meta.docs.url`. The plugin declares *where its own docs live*, and + editors/formatters surface that URL. The host does not own the docs; the rule + points at them. +- **Lesson:** package-prefix namespacing for composability **+ a per-code docs URL + the library owns**. This is precisely the pluggable-docs-URL mechanism our + `CatalogProvider` needs. + +### The thing neither does — and why Q2 must + +ESLint surfaces `@typescript-eslint/…` *to the user*; Clippy surfaces `clippy::…` +*to the user*. They **expose** the package decomposition because their users +intentionally assemble toolchains — they are *platforms*. Quarto 2 is a *product*, +and its principle is the opposite: **a Q2 user must never have to follow a chain of +library dependencies to explain an error.** So Q2 must add the one thing the +platforms omit — a **product-owned remap** to a single `Q-*` namespace. + +**Takeaway:** borrow the *library-side discipline* from Clippy/ESLint (namespaced, +package-owned codes that self-describe their docs) and the *presentation layer* +from the TS compiler (flat, navigable, central). Add the piece neither has: a +**product-owned remap** that bridges them. The two layers meet only through that +remap. + +## The two-identity model + +Every error carries: + +### 1. An **origin code** — owned by the package that *defines* the error +- **Namespaced by package:** `/`, e.g. + `yaml-schema/type-mismatch`. Namespacing is mandatory and structural; it is what + makes codes from independently-developed packages composable with **no central + registry**. +- **Stable for the package's own users:** adding origin codes is non-breaking; + removing or repurposing one is a breaking change that bumps the package version. +- The package may ship its **own** catalog (titles, docs, since-version) for its + own direct users, or ship none and let every embedder supply its own. + +### 2. A **presentation code** — owned by the *product* that surfaces the error +- Quarto 2's `Q--`: flat, navigable, centrally curated, one docs + page per code. **Unchanged from today** — this is the contract Q2 users rely on. +- The product owns a **remap**: `origin code → presentation code`. This is the + *only* place the two namespaces meet. +- Q2 users see **only** presentation codes and navigate quarto.org. They never + learn that `Q-1-11` was *defined* in an external `yaml-schema` crate. + +## The fallback hierarchy (and the contract it implies) + +When a product surfaces an error that a library defined, three outcomes are +possible, in descending order of quality: + +1. **Best — remapped.** The product has a presentation code for this origin code; + the user sees `Q-1-11` and navigates quarto.org. The package decomposition is + invisible. +2. **Acceptable — passthrough.** No presentation code, but the library's **own + stable origin code** shows through (`yaml-schema/type-mismatch`, library docs). + The user has *a* stable handle to search/report — strictly better than nothing. +3. **Forbidden — codeless.** A library error with no stable code at all. The user + has nothing durable to navigate. + +The discipline that **guarantees tier 3 never happens**: every diagnostic a +participating library emits MUST carry a stable, namespaced origin code. Then the +worst case is always tier 2, and the embedder's remap is a pure *upgrade* +(tier 2 → tier 1) — optional, per-code, and never load-bearing for correctness. + +This is why the audit only *encourages* full remap coverage rather than *failing* +on an unmapped code (it refines invariant I3): an unmapped origin code is +acceptable, not broken. + +## Roles are per-node, not per-package-type + +"Library" and "product/embedder" are **not** two kinds of crate — they are two +*roles a single crate can play at once*. Every node that uses the reporting crate +is simultaneously: + +- a **definer** — it mints its own *terminal* codes (errors it originates), and +- optionally a **remapper** — it relabels codes surfaced from its dependencies + under its own scheme. + +Quarto 2 is not a special "product" role; it is simply the **terminal remapper** in +a chain — the node whose presentation codes happen to be user-facing. A mid-chain +library that wraps `quarto-yaml-validation` and re-exposes some of its errors under +its own codes performs the *exact same* operation Q2 does. The chain always bottoms +out at **terminal** codes, and the library contract below ("every emittable error +carries a stable code") is what **guarantees every chain terminates**. + +## Terminal vs remapped: the developer-provenance lane + +A diagnostic's code is one of two kinds, and the crate exposes the distinction: + +- **Terminal** — a code this node *originates*. The definition lives here. +- **Remapped** — a code this node presents in place of a code surfaced from a + *dependency*. This node is relabelling someone else's terminal error. + +The remapped/terminal distinction is **not for end users** (a Q2 user wants only +`Q-1-11`); it is a **developer-facing provenance breadcrumb** — it lets a +developer or a bug report trace where an error *ultimately* originates, and where +to go to fix it. It rides in structured/JSON output, never in human error text. + +Three rules keep this cheap and decoupled: + +1. **Provenance is inert data, never a typed edge.** A remap discloses its upstream + as a *string code + optional source URL* — e.g. + `{ code: "yaml-schema/type-mismatch", source: "https://github.com/posit-dev/…" }` + — and **must not** depend on, or `use`, the upstream crate's error types. A + typed provenance edge would silently rebuild the compile-time coupling the whole + extraction exists to remove. Stringly-typed *on purpose*. +2. **Disclose the *immediate* upstream, not a resolved ultimate.** "Terminal" is + only well-defined transitively: a node can truthfully describe the dependency it + *directly* depends on (it can read that dependency's catalog), but cannot + reliably assert the chain's ultimate base — if a mid-chain node later re-bases + its own code, a hard-coded "ultimate" disclosure goes silently stale and + **nothing re-resolves it** (we deliberately do not traverse — see rule 3). So + each remap discloses its immediate upstream code + optional URL, and each node + carries a self-declared `terminal: bool`. A developer reconstructs a deep chain + by walking immediate disclosures hop-by-hop until a `terminal` node. **In the + common 1-hop case (Q2 ← `yaml-schema`), immediate *is* terminal**, so this costs + nothing today; it only keeps rare deep chains honest. +3. **No automated cross-repo traversal, and no resolved-ultimate pointer.** + Disclosure is a **best-effort breadcrumb, not a guarantee**: there is no + build-time check across repos (you do not have the upstream repo when you + compile), so a disclosed code/URL can rot. That is accepted. Optionally pin a + version or commit-ish in the URL when reproducibility matters; never mandate it. + Intra-workspace a lint *could* check disclosures against local crates; + cross-repo it cannot, and the discipline must not pretend otherwise. + +> **Design-for-1-hop.** Permit chaining so the model is never *wrong*, but optimize +> ergonomics for a single hop and build **zero** chain-resolution machinery. + +## The three contracts + +The design is a division of responsibility across three roles. A single crate may +play the first two at once (see "Roles are per-node"). Spelling them out *is* the +spec a library author follows. + +### Library-author contract (e.g. `quarto-yaml-validation`) +- **Reserve a namespace** (`/…`, e.g. `yaml-schema`) and mint all codes under + it. Never emit another party's codes (no `Q-*` in the library — the Clippy rule: + Clippy never emits `E`-codes). +- **Every emittable diagnostic carries a code.** No anonymous errors (guarantees + tier 2 is always available). +- **Codes are stable and append-only:** additive evolution is non-breaking; a code + is *retired* (stops being emitted) rather than deleted, stays documented forever, + and is **never** repurposed to a new meaning (see "Codes are append-only"). +- **Self-describe:** ship a `CatalogProvider` mapping each code → at least a title, + optionally a docs URL on the library's own site (the ESLint `meta.docs.url` + lesson). A library may also ship *no* catalog and let embedders supply + everything. + +### Embedder/remapper contract (any node that relabels a dependency's codes — Q2, `n2`, or a mid-chain library) +- **Own a remap** `immediate-origin → presentation` for the codes it chooses to + elevate to tier 1, plus a catalog for its own presentation codes. +- **Disclose provenance** on each remapped code: the *immediate* upstream code + + optional source URL, as inert data (see "Terminal vs remapped"). Mark the code + `remapped`, not `terminal`. +- **Choose the unmapped policy:** Q2 lets the origin code pass through (tier 2) and + audit-*warns* to encourage coverage. A different node could hide unmapped errors, + or fail its build — its call. +- **Never leak its own scheme upstream:** the remap lives at this node's reporting + boundary, not in the dependency. + +### Shared-infrastructure contract (`quarto-error-reporting` — the reusable host crate; keeps its name when it moves to `posit-dev/`, decided 2026-06-27) +- **Namespace-agnostic:** carries whatever code string it is handed; knows neither + `yaml-schema/*` nor `Q-*`. +- **Provides the seams:** the `CatalogProvider` trait (title/docs lookup) and the + remap hook applied before render; the diagnostic type, builder, and renderer. +- **Carries the provenance metadata:** the diagnostic model exposes `terminal` vs + `remapped` and an optional inert `{ code, source_url? }` provenance, so any + embedder's wire format can serialize it. (The *concept* lives here even though + Q2's specific JSON wire shape stays q2-side.) +- This is the rustc-`LintStore` analog: it supplies the machinery and the + rendering, and owns **no** identity policy. + +## Multiple embedders (the `n2` case) + +Consider `n2`, a different product that also depends on `quarto-yaml-validation` +and also does not want its users to see `yaml-schema/*` codes. Nothing special is +needed: `n2` supplies **its own** remap + catalog over the same +`quarto-error-reporting`. The remap is **not a Q2 feature** — it is a per-embedder +facility the shared crate provides. Same infrastructure, different policy tables. +This is the proof that the design is embedder-agnostic, and the reason the remap +hook must live in `quarto-error-reporting`, not in any Quarto-specific crate. + +## Invariants (what makes this sound) + +- **I1 — Subsystem ≠ package.** The `` in `Q--` is a + *product taxonomy of meaning*, not a map of the crate graph. The product may + route two packages into one subsystem, split one package across subsystems, or + renumber subsystems — all without touching any package. A package must never + assume its errors land in a particular subsystem. **This is the firewall** that + lets internal package decomposition change freely without disturbing the + user-facing code space. (Directly answers the user's requirement: Q2 users + don't see, and aren't affected by, the package decomposition.) + +- **I2 — Origin collisions are structurally impossible.** Because every origin + code carries its package namespace, two independently-developed packages can + never mint the same origin code. No central allocator. (This is precisely the + gap in the flat-numeric model.) + +- **I3 — The remap is product-owned and *partial-by-design*.** The product — not + the package — decides *which* origin errors it elevates to a presentation code + and *how* it groups them. A mapped code yields tier 1; an **unmapped** code falls + back to tier 2 (the library's own stable origin code passes through). So the + remap need not be total: Quarto 2 lets unmapped codes pass through and the audit + *warns* (not fails) to encourage coverage. (This refines the earlier + "unmapped = build failure" stance: the fallback hierarchy makes unmapped + *acceptable*, not broken.) + +- **I4 — Presentation uniqueness is the product's existing guarantee, untouched.** + Presentation codes live entirely in Q2's flat space, so the *existing* `Q-*` + audit's uniqueness/coverage guarantees carry over unchanged. The remap adds + **one** new check (totality over surfaced origin codes), not a rework. + +- **I5 — Provenance survives but does not navigate.** The origin code travels in + structured/JSON output and verbose diagnostics as *provenance* (a bug report can + say "`yaml-schema/type-mismatch`, surfaced as `Q-1-11`"), but the *navigational + handle* a user follows is always the presentation code. This satisfies "users + shouldn't have to know the package decomposition" while keeping debuggability. + +## Mechanism (how it maps onto the crates) + +- The package emits diagnostics tagged with **origin codes** + (`ValidationErrorKind::code() -> "yaml-schema/..."`). +- The reusable reporting core (`quarto-error-reporting`) is namespace-agnostic: it + carries whatever code string it is handed and renders it, consulting an installed + `CatalogProvider` for title/docs. It knows about neither `yaml-schema/*` nor + `Q-*`. +- The **product** installs two things at startup: + - a **`RemapTable`** (`origin → presentation`), applied at the reporting + boundary *before* render, so the diagnostic's primary code becomes `Q-1-11` + and the origin code is retained as provenance (I5); + - a **`CatalogProvider`** over `error_catalog.json`, keyed by presentation codes. +- A non-Quarto embedder installs neither (or its own): origin codes render as-is, + with the embedder's catalog or none. + +``` +package (yaml-schema) product (Quarto 2) user +───────────────────── ────────────────────────── ────────────── +ValidationErrorKind RemapTable: sees: Q-1-11 + .code() yaml-schema/type-mismatch docs: quarto.org/ + = "yaml-schema/ → Q-1-11 docs/errors/ + type-mismatch" CatalogProvider: yaml/Q-1-11 + Q-1-11 → {title, docs_url} (origin code only +(also usable standalone, quarto-error-reporting renders in JSON/verbose + with the package's own with Q-1-11 primary, as provenance) + catalog or none) yaml-schema/… as provenance +``` + +## Rejected alternatives + +- **Flat shared numeric space across packages (literal TS-compiler model).** + Needs a central allocator every package coordinates with; defeats independent + development. Rejected. +- **Package emits product codes directly (status quo: `error_code() -> "Q-1-11"`).** + Bakes product policy into the package; the package can't ship to non-Quarto + users; Q2's subsystem taxonomy leaks across the boundary. This is exactly what + we are undoing. Rejected. +- **No product codes; show origin codes to users.** Q2 users would see + `yaml-schema/type-mismatch` next to `Q-2-5` — an inconsistent, un-navigable + space, and precisely the "must know the package decomposition" we are avoiding. + Rejected. + +## Codes are append-only ("cool URLs for error codes") + +Error codes are **unique and, in principle, never deleted or repurposed** — the +"cool URLs don't change" covenant (Berners-Lee, 1998) applied to error +identifiers. A docs page accumulates codes the current version no longer emits, +**and that is fine**: someone is running an old version, or online content +(issues, Stack Overflow, bookmarks) references the code. A frozen, resolvable code +is the whole point. + +This also *protects the provenance breadcrumb* (rules under "Terminal vs +remapped"): if codes are never deleted or redefined, a disclosed upstream +code/URL degrades gracefully — at worst it resolves to a *retired-but-documented* +code, never a 404 and never a silently-different meaning. + +**Three lifecycle states, and which transitions are legal:** + +- **Active** — emitted by the current version. +- **Retired** — no longer emitted, but still documented (old versions / external + references still resolve). +- **Never** — never existed. + +Legal: `Active → Retired`. **Forbidden: `Active → Never` (deletion) and +`code → different meaning` (repurposing).** Meaning is frozen at allocation, +forever. So a library *retires* (stops emitting) a code rather than deleting it, +and **never** reuses a code string for a new meaning — this is stronger than "major +version bump"; repurposing is simply off the table. + +**The freeze binds at first *public* exposure, not first commit.** A code added on +a dev branch and never shipped — never emitted in a release, never publicly +documented — may still be renumbered or dropped (semver pre-1.0 logic). The +covenant starts at first public *emission or documentation*, whichever comes first. +This keeps ordinary development unconstrained. + +**Encourage cross-repo; enforce intra-repo.** Append-only is *checkable within a +repo* (diff the catalog against git history or a committed snapshot — no entry +removed, no meaning changed) and should be a CI lint there. It is *not* checkable +across repos (you do not have the upstream repo at build time), so for a +dependency's catalog this is documented expectation, self-enforced by that library +in its own CI — the same intra/cross asymmetry as provenance staleness. + +> **Audit consequence (concrete).** Q2's existing bidirectional +> `scripts/audit-error-codes.py` must change: **keep** "every *emitted* code is +> documented" (forward), **drop** "every *documented* code is emitted" (reverse — +> it contradicts retirement), and **add** an append-only check. A retired or +> dormant code is a legitimate catalog-only entry. `ErrorCodeInfo.since_version` +> already records the "introduced" end; an optional `retired_in`/`last_emitted` +> records the other, so a docs page can show the emitting window. + +## Governance + +- Each package documents its origin-code namespace and stability promise in its + own repo. +- Quarto 2 owns `error_catalog.json` + the remap table + the audit. Surfacing a new + external error = add origin code (upstream) **+** add remap entry **+** add + catalog entry (q2); the audit enforces the three-way join. +- **Versioning:** upstream adding an origin code is non-breaking; Q2 surfaces it on + its own schedule by adding remap+catalog entries. Upstream *retiring* a code + (ceasing to emit it) keeps the code documented (append-only — see "Codes are + append-only"); upstream *repurposing* a code is forbidden, not merely a major + bump. Q2's remap pins the upstream version it maps so its provenance disclosure + stays accurate. + +## Open questions + +- **Origin-code shape:** namespaced slugs (`yaml-schema/type-mismatch`, + recommended — human-readable, obviously package-owned) vs. namespaced numerics + (`yaml-schema:0001`). Slugs are recommended; the only argument for numerics is + parity with the TS-compiler aesthetic, which does not compose and is not needed + at the package layer. +- **Provenance surfacing:** confirm I5's "JSON/verbose only" placement vs. always + showing origin code in parentheses. (Recommendation: structured/verbose only.) + Mechanism decided — see "Terminal vs remapped": inert `{ code, source_url? }`, + immediate upstream only, self-declared `terminal` flag, no chain resolution. +- **`terminal`/`remapped` exposure:** decided to expose it as developer-facing + provenance. Remaining nit: whether `terminal` is a boolean flag or implied by the + *absence* of provenance (a code with no upstream disclosure is terminal). The + latter is more economical; the former is more explicit. (Lean: implied-by-absence, + with an explicit flag only if a node needs to assert "terminal here" while also + carrying unrelated metadata.) +- **Multi-product future:** resolved — see "Multiple embedders (the `n2` case)". + Captured here only as a reminder that the remap hook must live in + `quarto-error-reporting`, never in a Quarto-specific crate. + +## Sequencing (decided 2026-06-26) + +This discipline is the *host* contract, so it is designed and proven **before** +its first client: + +1. Extract the **diagnostics foundation** first — `quarto-source-map` (leaf) + + `quarto-error-reporting` (carrying the three contracts above: `CatalogProvider`, + remap hook, namespace-agnostic rendering) — into a standalone repo under the + **`posit-dev/`** GitHub org. Publish to crates.io; validate standalone. +2. Only then extract **`quarto-yaml` + `quarto-yaml-validation`** as the **first + client** of the discipline (it adopts a `yaml-schema/*` origin namespace). +3. Only then **migrate q2** to consume the published crates (q2 keeps its in-tree + copies until the external ones are proven). The motivating external consumers + are invisible internal Posit users of `quarto-yaml-validation`. + +The yaml-extraction mechanics live in +`claude-notes/plans/2026-06-26-extract-quarto-yaml-validation-design.md`; the +error-reporting extraction (step 1) warrants its own plan doc once this +discipline is accepted. diff --git a/claude-notes/plans/2026-06-26-extract-error-reporting-foundation.md b/claude-notes/plans/2026-06-26-extract-error-reporting-foundation.md new file mode 100644 index 000000000..84a725217 --- /dev/null +++ b/claude-notes/plans/2026-06-26-extract-error-reporting-foundation.md @@ -0,0 +1,319 @@ +# Step 1: extract the diagnostics foundation into two standalone `posit-dev/` repos — `quarto-source-map` first, then `quarto-error-reporting` + +> **Naming (decided 2026-06-27):** both externalized crates **keep their current +> names** — `quarto-source-map` and `quarto-error-reporting` (and `quarto-yaml` +> later). No `error-reporting-core` rename. We rename only "if it comes to it." +> Consequence: there is **no q2-side façade** (the name belongs to the external +> crate); q2 depends on the external `quarto-error-reporting` directly. The only +> crate carved out q2-side is `quarto-error-catalog` (the `Q-*` policy). `json.rs` + +> `coalesce.rs` stay in the external crate behind a default-off `json` feature +> (see "The split"). + +**Strand:** bd-egcyeym9 +**Date:** 2026-06-26 +**Status:** Plan (not started) +**Design contract:** `claude-notes/designs/cross-package-error-codes.md` +**Sibling plan (gated behind this):** + `claude-notes/plans/2026-06-26-extract-quarto-yaml-validation-design.md` + +This is **step 1 of 3** in the agreed sequence (foundation → YAML stack → q2 +migration). It extracts and publishes the diagnostics *host* — the crate that +defines the cross-package error-code discipline — and proves it standalone +*before* any client (yaml-validation) adopts it. + +## Scope + +**In scope:** extract `quarto-source-map` into its own `posit-dev/` repo and +publish it (leaf-first); make `quarto-error-reporting` catalog-agnostic by carving +its `Q-*` catalog data out into a new q2-side `quarto-error-catalog` policy crate; +extract the (now catalog-agnostic) `quarto-error-reporting` into a **separate** +`posit-dev/` repo (depending on the now-published `quarto-source-map`) and publish +it; cut q2 over to the published crates, incrementally (source-map first). + +**Two repos, not one** (decided 2026-06-27 — see Decisions §1). The crates split +naturally: `quarto-source-map` is a general source-location leaf with 14 in-repo +dependents and no relationship to diagnostics; `quarto-error-reporting` is the +diagnostics host. Independent repos = independent release cadences. + +**Explicitly NOT in scope:** `quarto-yaml` / `quarto-yaml-validation` (step 2), +deleting `validate-yaml` (step 2), wiring yaml validation into q2 config, the +`Q-1-x` remap (step 2/3). This plan touches **no YAML code**. + +## Grounding facts (measured 2026-06-26) + +- **The catalog coupling inside `error-reporting` is one line.** + `crates/quarto-error-reporting/src/diagnostic.rs:290` — + `DiagnosticMessage::docs_url()` calls `crate::catalog::get_docs_url(code)`. That + is the *entire* hard seam between the renderer and the Quarto catalog. +- **Only two external q2 crates call the catalog free-functions directly:** + `quarto-core/src/project_resources.rs` and `quarto-core/src/theme_diagnostic.rs`. + Everything else goes through `DiagnosticMessage`. +- **Direct Cargo dependents:** `quarto-source-map` 14, `quarto-error-reporting` 9. + (The research doc's 26/19 counted transitive reach.) +- **Module sizes** (`crates/quarto-error-reporting/src/`): `diagnostic.rs` 1187, + `builder.rs` 595, `coalesce.rs` 461, `json.rs` 480, `catalog.rs` 317, + `macros.rs` 98, `lib.rs` 87. +- **`quarto-source-map`** is a true leaf: deps `serde`, `serde_json`, `smallvec`; + no `description` field (must add one to publish). +- **The dependency between the two is strictly one-directional:** + `quarto-error-reporting → quarto-source-map`, pervasively (`SourceInfo` is a field + on `DiagnosticMessage` at `diagnostic.rs:126`; `SourceContext` threads through + rendering and the builder). `quarto-source-map` depends on *nothing* in + error-reporting. **This forces leaf-first extraction order:** source-map must be + published before `quarto-error-reporting` can be (crates.io rejects unpublished + path deps). +- **Workspace metadata** all crates inherit (`[workspace.package]`): `version = + "0.7.0"`, `repository = posit-dev/quarto-markdown-syntax`, `license = MIT`, + `edition = 2024`. The externalized crates must stop inheriting q2's `version` + and `repository` (they get their own in the new repo). +- Neither crate has ever been published to crates.io (no public version line yet). + +## The split: what goes where + +Only **one** thing actually leaves `quarto-error-reporting`: the `Q-*` catalog +*data*. Everything else stays in the (now catalog-agnostic) crate, so consumers' +imports are unchanged. + +``` +quarto-error-reporting (EXTERNAL — keeps its name) quarto-error-catalog (q2 — NEW) + diagnostic.rs DiagnosticMessage, render error_catalog.json (Q-* data) + builder.rs DiagnosticMessageBuilder ERROR_CATALOG static (moved here) + macros.rs convenience macros QuartoCatalog: CatalogProvider + ErrorCodeInfo (struct, moved from catalog.rs) install() at startup + CatalogProvider trait + OnceLock registry (the Q-* policy + audit live here) + get_docs_url/get_error_info/get_subsystem + (now DELEGATE to the installed provider) + json.rs behind default-off `json` feature + coalesce.rs (render-summary helper) + deps: url, ariadne, thiserror, serde(_json); + schemars ONLY under `json` feature + NO error_catalog.json / ERROR_CATALOG static + +quarto-source-map (EXTERNAL — unchanged surface) + leaf; serde/serde_json/smallvec +``` + +**Rationale for each boundary** (full version in the design note): + +- `ErrorCodeInfo` (the *struct*) stays, because the `CatalogProvider` trait returns + it. Only `ERROR_CATALOG` (the *static* + `error_catalog.json` loader) leaves → + `quarto-error-catalog`, because the *data* is Quarto policy. +- `get_docs_url`/`get_error_info`/`get_subsystem` stay, reimplemented to delegate to + the installed provider (was: read the static map). The two `quarto-core` callers + and the `lib.rs` re-export are therefore **source-unchanged** (they still resolve + against `quarto-error-reporting`). The one symbol that *does* move is the + `ERROR_CATALOG` static re-export — audit for direct importers (the free functions + cover almost all uses). +- `json.rs` + `coalesce.rs` **stay** in `quarto-error-reporting`, with `json` behind + a **default-off `json` feature** so a non-Quarto consumer pulls neither the + `schemars` dep nor the wire format. q2 enables `features = ["json"]`. This keeps + every `use quarto_error_reporting::json::…` / `::coalesce::…` consumer + source-unchanged. (Reverses the earlier Q4/Q5 "move json q2-side" — the + feature-gate recovers the clean-build benefit with far less churn. Residual: + the `quarto.org` `SCHEMA_URL` const ships behind the feature; revisit only if a + non-Quarto user needs the wire format under a different scheme.) +- `quarto-source-map` moves with **no surface change** — only its manifest changes. + +## The host seam (canonical definition — supersedes the sketch in the sibling plan) + +```rust +// quarto-error-reporting (external, catalog-agnostic) +pub struct ErrorCodeInfo { // moved verbatim from catalog.rs + pub subsystem: String, + pub title: String, + pub message_template: String, + pub docs_url: Option, + pub since_version: String, +} + +pub trait CatalogProvider: Send + Sync { + fn lookup(&self, code: &str) -> Option<&ErrorCodeInfo>; +} + +struct EmptyCatalog; // default: catalog-agnostic, standalone-usable +impl CatalogProvider for EmptyCatalog { + fn lookup(&self, _: &str) -> Option<&ErrorCodeInfo> { None } +} + +static CATALOG: std::sync::OnceLock> = std::sync::OnceLock::new(); + +/// Embedders call once at startup; first write wins, later writes are no-ops. +pub fn install_catalog(p: Box) { let _ = CATALOG.set(p); } + +fn catalog() -> &'static dyn CatalogProvider { + CATALOG.get().map(|b| b.as_ref()).unwrap_or(&EmptyCatalog) +} + +pub fn get_docs_url(code: &str) -> Option<&'static str> { /* via catalog() */ } +// get_error_info / get_subsystem likewise delegate. +``` + +```rust +// quarto-error-catalog (q2) +struct QuartoCatalog(std::collections::HashMap); +impl CatalogProvider for QuartoCatalog { /* HashMap::get */ } +pub fn install() { + quarto_error_reporting::install_catalog(Box::new(QuartoCatalog::load_embedded())); +} +``` + +`std::sync::OnceLock` (not `once_cell`) so core needs no extra dep for the +registry. Init point: q2 binary `main` + the WASM bootstrap + a test helper call +`quarto_error_catalog::install()` before the first diagnostic renders. Double +install is harmless (first-write-wins). + +The three phases below. **Phase 1** (extract source-map) and **Phase 2** (make +error-reporting catalog-agnostic in place) are independent and may overlap; +**Phase 3** (extract `quarto-error-reporting`) requires both — the carve-out done +*and* source-map published. + +## Phase 1 — Extract `quarto-source-map` into its own repo (the leaf-first warmup) + +The simplest possible extraction (a clean leaf, no split, no policy). Doing it +first proves the whole pipeline — `posit-dev/` repo setup, crates.io publish, and +the **WASM cutover** — on the easiest crate, before the harder error-reporting work. + +- [x] **1a.** Created `posit-dev/quarto-source-map` (public, + https://github.com/posit-dev/quarto-source-map): copied the 8 sources + + `LICENSE`, wrote a standalone single-crate manifest (`version = "0.1.0"`, + `edition = "2024"`, `description`, `repository`, `keywords`, `categories`, + pinned dep versions serde 1.0.228 +rc / serde_json 1.0.149 / smallvec 1.13 + +serde), added `README.md` + `.gitignore`. Builds on **stable** rustc 1.95 + (no nightly needed). +- [x] **1b.** Local validation green: `cargo build`, `cargo test` (104 unit + 4 + doctests pass), `cargo publish --dry-run` clean. *(Gap: no GitHub Actions CI + workflow committed to the new repo yet — tests were run locally. Add one.)* +- [x] **1c.** Published `quarto-source-map 0.1.0` to crates.io (Carlos's personal + account; `cargo owner --add github:posit-dev:` deferred — weekend). +- [x] **1d.** q2 cutover (branch `braid/bd-egcyeym9-source-map-extraction`): + flipped `[workspace.dependencies.quarto-source-map]` `path` → `version = + "0.1.0"`; consolidated the 13 *main-workspace* members onto + `{ workspace = true }`; deleted the in-tree `crates/quarto-source-map`. + **Gotcha:** `wasm-quarto-hub-client` is *excluded* from the main workspace + and is its own standalone workspace (refs every q2 crate by `path`), so it + can't inherit a workspace dep — it gets a **direct** `quarto-source-map = + "0.1.0"`. (A blanket `crates/*/Cargo.toml` rewrite broke it first; the WASM + build caught it — and a `| tail` had masked the first verify's real exit + code. Lesson: don't pipe `cargo xtask verify` through `tail`.) Final state: + `cargo build --workspace` ✅, `cargo nextest run --workspace` 10238 ✅, + **full `cargo xtask verify` (all 14 steps, incl. WASM build + hub tests) + ✅**. Both root and wasm-crate `Cargo.lock` resolve from the crates.io + registry with matching checksum. *(Not yet committed — awaiting user + go-ahead per GIT PUSH POLICY.)* + +**Phase 1 is functionally complete** (crate published + q2 cut over + full verify +green). Only the new repo's CI workflow (1b gap) and the deferred crates.io +owner-add remain as tidy-ups. + +## Phase 2 — Make `quarto-error-reporting` catalog-agnostic, in place (TDD; independent of Phase 1) + +Goal: carve the `Q-*` catalog *data* out into `quarto-error-catalog` and route the +renderer through the installed provider, proving it compiles and is +behaviour-preserving *before* any code leaves the repo. This is the bulk of the +engineering and is valuable even if the repo move slipped. Does **not** depend on +Phase 1. There is **no façade** — `quarto-error-reporting` keeps its name and its +public surface (minus the moved catalog *data*). + +- [ ] **2a (test first).** Pin the behaviour the carve-out must preserve, against + the *current* code: + - `get_docs_url("Q-0-1")` returns the quarto.org URL (with the q2 catalog + active); + - a new test asserting that with **no** catalog installed, `get_docs_url` + returns `None` (passes only after the registry exists — write it now, + expect red). +- [ ] **2b.** Introduce `CatalogProvider` + the `OnceLock` registry + delegating + free-functions in `quarto-error-reporting`; keep `ErrorCodeInfo` here. + Repoint `diagnostic.rs:290` at `catalog()`. Green. +- [ ] **2c.** Create `quarto-error-catalog` crate: move `error_catalog.json` + the + `ERROR_CATALOG` static + the `QuartoCatalog` provider + `install()` there. + Move the catalog-data tests with it. Wire `install()` into q2 binary/WASM/test + entry points. +- [ ] **2d.** Put `json.rs` behind a default-off `json` feature in + `quarto-error-reporting`; have q2's dependents that use it enable + `features = ["json"]`. Confirm the 9 dependents + the 2 `quarto-core` catalog + callers + all `json`/`coalesce` consumers compile **unchanged** (only feature + flags and the `ERROR_CATALOG` import path may move). +- [ ] **2e.** Audit manifests: `schemars` becomes `json`-feature-only; drop + `once_cell` if the registry's `OnceLock` makes it unused; `url` stays. +- [ ] **2f.** `cargo xtask verify` green (touches `quarto-core` → hub-client/WASM; + do NOT `--skip-hub-build`). + +> At the end of Phase 2, q2 still builds `quarto-error-reporting` from a path dep; +> it is now catalog-agnostic and cleanly carve-able. + +## Phase 3 — Extract `quarto-error-reporting` into a *separate* repo and cut q2 over + +Requires Phase 1 (source-map published) **and** Phase 2 (carve-out done). + +- [ ] **3a.** Create `posit-dev/`; **copy** the + `quarto-error-reporting` sources (fresh `git init`, no history). Own + `[workspace.package]`. Crucially, its `quarto-source-map` dependency is now + the **published version** dep (Phase 1c), *not* a path dep. (`json.rs` + + `coalesce.rs` travel with it, `json` behind its feature; the catalog data + already left in Phase 2c.) +- [ ] **3b.** Standalone CI: `cargo build` + `cargo nextest run` with the + **`EmptyCatalog`** default — proving catalog-agnostic operation with zero + Quarto policy present. +- [ ] **3c.** External-consumer smoke test: a throwaway crate builds a + `DiagnosticMessage`, installs a trivial `CatalogProvider`, renders — proving + the published API is usable with no Quarto context. +- [ ] **3d.** Publish `quarto-error-reporting` to crates.io. +- [ ] **3e.** q2 cutover: replace the in-tree `quarto-error-reporting` path dep with + the published version dep (enable `features = ["json"]`); delete the in-tree + copy. `quarto-error-catalog` stays in q2 and now depends on the external + `quarto-error-reporting`. **Full `cargo xtask verify` incl. hub-build** (WASM + risk surface again, now de-risked by Phase 1d). +- [ ] **3f.** Update `CLAUDE.md`'s crate-layout section + the workspace member list. + +## Decisions + +1. **Repo granularity — DECIDED (2026-06-27): two repos**, one per crate. The + crates split naturally (leaf source-location utility vs. diagnostics host) and + already have independent dependent sets (14 vs. 9); co-locating buys only weak + CI cohesion and needlessly couples their release cadences. (The YAML stack is a + third, separate repo in step 2.) + +2. **Crate names — DECIDED (2026-06-27): keep current names.** Both externalized + crates stay `quarto-source-map` and `quarto-error-reporting` (and `quarto-yaml` + later). No rename now ("if it comes to it"). Consequence: no q2-side façade + crate; the q2-only carve-out is `quarto-error-catalog`; `json`/`coalesce` stay in + the external crate (`json` feature-gated). See the header note + "The split". + +Open forks (settle before 1a / 3a): + +3. **Version start** (each repo independently). Fresh `0.1.0` public line (honest — + never published), vs. continue `0.7.0`. Recommend `0.1.0`. +4. **Distribution channel.** crates.io (recommended — that is the whole point for + non-Quarto users) vs. git deps as an interim before the first publish. +5. **Repo names** for the two `posit-dev/…` repos (the source-map repo and the + error-reporting repo — the *crate* names are fixed; the *repo* slugs are not). + +## Risks + +- **WASM cutover (1d, 3e).** Each external crate must build for + `wasm32-unknown-unknown` (both should — pure Rust, no `std::fs`), and the + async-trait/`?Send` rules in `.claude/rules/wasm.md` apply if any trait is + touched. The `CatalogProvider` registry uses `OnceLock>`; + the `Send + Sync` bound is fine natively and irrelevant single-threaded in WASM, + but confirm it compiles for the target. **Verify with the full + `cargo xtask verify`, not just `cargo build`.** Phase 1d does this on the trivial + leaf first, so 3e is already de-risked. +- **Install-ordering.** A diagnostic rendered before `install()` would silently use + `EmptyCatalog` (no titles/URLs). Mitigation: install at the earliest entry + points; a debug-only assertion or a test that renders a known `Q-*` code and + checks its URL guards against a missing install. +- **Two copies during extract→cutover.** Each crate exists in both repos between + publish and the q2 cutover that deletes the in-tree copy (1c→1d, 3d→3e). Keep the + window short; pin the q2 dep to the exact published version. + +## Test plan (TDD gates) + +- *Behaviour-preservation (Phase 2):* the 2a tests — installed catalog reproduces + today's `docs_url`; empty catalog returns `None`. +- *Catalog-agnostic (Phase 3):* `quarto-error-reporting`'s own test suite passes + with `EmptyCatalog` and **no** dependency on any `Q-*` data. +- *External-consumer smoke (3c):* a throwaway crate builds a `DiagnosticMessage`, + installs a trivial `CatalogProvider`, and renders — proving the published API is + usable with zero Quarto context. +- *Full regression (1d, 3e):* `cargo xtask verify` (incl. hub-build) green with q2 + consuming each published crate in turn. diff --git a/claude-notes/plans/2026-06-26-extract-quarto-yaml-validation-design.md b/claude-notes/plans/2026-06-26-extract-quarto-yaml-validation-design.md new file mode 100644 index 000000000..0fcc6fd4d --- /dev/null +++ b/claude-notes/plans/2026-06-26-extract-quarto-yaml-validation-design.md @@ -0,0 +1,496 @@ +# Extracting `quarto-yaml-validation`: design decisions + +**Strand:** bd-egcyeym9 +**Date:** 2026-06-26 +**Status:** Design decisions (chosen direction; not yet implemented) +**Predecessor:** `claude-notes/research/2026-06-17-extract-quarto-yaml-validation.md` + (current-state architecture) + +> **⚠️ Partially superseded (2026-06-27) by +> `claude-notes/plans/2026-06-26-extract-error-reporting-foundation.md`.** This +> doc's YAML substance — origin codes (`yaml-schema/*`), the q2 remap, deleting +> `validate-yaml`, the discipline application — stands. But its assumptions about +> the *error-reporting crate structure* are **out of date**: there is **no +> `error-reporting-core` rename** (the externalized crate keeps the name +> `quarto-error-reporting`), **no q2-side façade**, and **`json.rs` does not move +> q2-side** (it stays in the external crate behind a default-off `json` feature; +> only the `Q-*` catalog *data* carves out, into `quarto-error-catalog`). Where Q1, +> Q4, and Q5 below describe `error-reporting-core`/façade/json-relocation, defer to +> the foundation plan. Step ordering is also foundation-first (source-map → +> error-reporting → *then* this YAML work). + +This document moves the extraction from *current-state architecture* to +*chosen design*. It resolves the seven open questions the research doc left +hanging, given two decisions the user has already made: + +- **Session goal:** decide the open questions. +- **Extraction strategy lean:** *new repo owns the foundation crates; q2 + consumes them as published crates* (research-doc Option 1). + +The hardest question — error-code identity across the package boundary — is +elevated to its own general design note, because it governs *any* error defined +in one package and surfaced by another product, not just YAML: +**`claude-notes/designs/cross-package-error-codes.md`**. Q3/Q6 below are the +YAML-specific application of that philosophy. + +### Two clarifications from the user (2026-06-26) + +- **`validate-yaml` is to be deleted, not carried along.** It is a demo binary, + not a supported one. It is the **only** in-repo Cargo dependent of + `quarto-yaml-validation` (the other two grep hits — in + `quarto-error-reporting/Cargo.toml` and `quarto-core/.../attribution/mode.rs` + — are comments, not deps). **Consequence: once `validate-yaml` is removed, + `quarto-yaml-validation` has ZERO in-repo consumers.** The crate lifts out with + nothing trailing it, and the error-code remap (Q3) is needed only by *external* + embedders today — the YAML validator becomes the **proving ground** for the + general cross-package philosophy rather than a forced q2 integration. +- **"TypeScript" = the language/compiler, not TS Quarto.** The `Q-*` scheme was + inspired by the TypeScript *compiler*'s flat numeric `TSxxxx` catalog. That is a + good template for the *presentation* layer but offers nothing for the + cross-package case (it is a monolith with a central allocator). The composable + precedents are Clippy (`clippy::needless_return`) and ESLint plugin namespacing — + see the design note. + +Every section below is **Decision → Rationale**, and flags where a decision is a +judgment call the user may want to override. + +--- + +## 0. The reframing that drives everything + +The research doc treated `quarto-error-reporting` as one of three +interchangeable "foundation crates" to externalize alongside `quarto-source-map` +and `quarto-yaml`. Reading the code, it is not interchangeable: + +- It is **q2's entire diagnostics substrate**, not just a YAML helper: + `DiagnosticMessage`, `DiagnosticMessageBuilder`, ariadne/text rendering, + `coalesce`, `macros`, and the **JSON wire shape** (`json.rs`). +- The JSON wire shape is a **shared cross-crate contract**: consumers include + `wasm-quarto-hub-client`, `quarto-hub`, `quarto-publish`, `quarto-trace`, + `quarto-parse-errors`, `quarto-mcp-launcher`, plus the preview SPA and CLI. +- It bakes in **Quarto policy** in two places, not one: the catalog's + `docs_url` (`https://quarto.org/docs/errors/...`) *and* + `JsonDiagnostic::SCHEMA_URL` (`https://quarto.org/schemas/v1/...`). +- The catalog is reached as a **global static + free functions** + (`ERROR_CATALOG`, `get_docs_url`, `get_error_info`, `get_subsystem`) called + from ~19 crates — not as an injected dependency. + +So "externalize `quarto-error-reporting`" is really two separable things welded +together: a **catalog-agnostic reporting core** (reusable, belongs outside) and +a **Quarto error-code policy** (the `Q-*` catalog + quarto.org URLs + the audit, +belongs in q2). The whole design hinges on splitting them. + +--- + +## Q1. Which extraction strategy? — DECIDED: Option 1 (own), but split first + +**Decision.** New repo (`quarto-yaml-schema`, name TBD — see Q7) owns four +crates and q2 consumes them as published crates. But `quarto-error-reporting` +is **split before the move**, and only the *core* half leaves: + +External repo owns: + +| Crate (external) | Was | Role | +|---|---|---| +| `quarto-source-map` | unchanged | clean leaf, location tracking | +| `quarto-yaml` | unchanged | annotated YAML parse | +| `error-reporting-core` | **split** out of `quarto-error-reporting` | catalog-agnostic `DiagnosticMessage` + builder + render + pluggable catalog | +| `quarto-yaml-validation` | unchanged crate, error-code change | schema validation over the above | + +q2 keeps / gains: + +| Crate (in q2) | Was | Role | +|---|---|---| +| `quarto-error-catalog` | **split** out of `quarto-error-reporting` | the `Q-*` `error_catalog.json`, quarto.org URLs, the audit, the `CatalogProvider` impl | +| `quarto-error-reporting` (façade) | shrinks to a re-export shim | re-exports `error-reporting-core` + installs the q2 catalog, so the ~19 existing `use quarto_error_reporting::…` call sites keep compiling | + +**Rationale.** Option 1 is the only strategy that delivers the actual goal — a +crate non-Quarto developers can `cargo add` without a Quarto identity. Options 2 +(mirror) and 3 (publish-from-q2 with pins) were rejected because: + +- They leave the `Q-*`/quarto.org policy baked into the shipped artifact, so the + "non-Quarto" identity is cosmetic. The research doc itself notes Option 2's + external identity "is thin if it is just a mirror." +- The error-code pluggability work (Q2/Q3) is *unavoidable* the moment a real + external user appears; deferring it (Option 2's only advantage) just moves the + cost without shrinking it. + +**The cost we are accepting (judgment call):** every q2 diagnostic — not just +YAML — now renders through an externally-owned `error-reporting-core`. Cross-repo +coordination on the diagnostic builder/render is the standing tax. We mitigate +it by keeping a thin `quarto-error-reporting` façade in q2 so day-to-day q2 code +does not change its imports, and by making `error-reporting-core`'s surface +deliberately small and slow-moving. + +> **Override point.** If that tax is judged too high right now, the fallback is +> "split, but don't move yet" — do all of §Q2–Q3 (the core/catalog split lands +> *inside q2*), publish nothing, and defer the repo move to a later strand once +> the seam has proven stable. This keeps every decision below valid and de-risks +> the cross-repo step. + +--- + +## Q2. Does `quarto-error-reporting` split into core + catalog, or gain a provider trait? — DECIDED: both (split *and* a provider trait) + +**Decision.** Split into `error-reporting-core` + `quarto-error-catalog`, and +the seam between them is a `CatalogProvider` trait installed into a process-global +registry. + +```rust +// error-reporting-core (external) +pub struct ErrorCodeInfo { // moved verbatim from catalog.rs + pub subsystem: String, + pub title: String, + pub message_template: String, + pub docs_url: Option, + pub since_version: String, +} + +pub trait CatalogProvider: Send + Sync { + fn lookup(&self, code: &str) -> Option<&ErrorCodeInfo>; +} + +// A no-op default so the core is usable standalone with zero config. +struct EmptyCatalog; +impl CatalogProvider for EmptyCatalog { fn lookup(&self, _: &str) -> Option<&ErrorCodeInfo> { None } } + +static CATALOG: OnceLock> = OnceLock::new(); + +/// Embedders call this once at startup. Idempotent-by-first-write. +pub fn install_catalog(p: Box) { let _ = CATALOG.set(p); } + +fn catalog() -> &'static dyn CatalogProvider { + CATALOG.get().map(|b| b.as_ref()).unwrap_or(&EmptyCatalog) +} + +// The existing free functions keep their signatures — now delegating: +pub fn get_docs_url(code: &str) -> Option<&'static str> { /* via catalog() */ } +pub fn get_error_info(code: &str) -> Option<&'static ErrorCodeInfo> { /* via catalog() */ } +``` + +```rust +// quarto-error-catalog (stays in q2) +struct QuartoCatalog(HashMap); // from error_catalog.json +impl CatalogProvider for QuartoCatalog { /* HashMap::get */ } + +pub fn install() { error_reporting_core::install_catalog(Box::new(QuartoCatalog::load())); } +``` + +**Rationale — why a global registry rather than threading the provider through +every call site.** Today the catalog is a `Lazy` global reached by free +functions from ~19 crates. Converting all of those to take a `&dyn +CatalogProvider` parameter is a large, invasive churn with no behavioural payoff. +A `OnceLock`-installed global keeps every existing call site (`get_docs_url(code)`) +source-compatible; only the *initialization* changes (q2 calls +`quarto_error_catalog::install()` once at binary/WASM entry). The standalone +library, installing nothing, transparently gets the `EmptyCatalog` (codes render +without titles/URLs — exactly right for a non-Quarto user). + +**Trade-off (acknowledged).** This is process-global state with an init-ordering +requirement: q2 must `install()` before the first diagnostic renders. We accept +it because it mirrors the *current* global-static behaviour (no regression) and +because the install point is trivially early (binary `main` / WASM bootstrap). A +test-only `install()` helper covers the test binaries. The `OnceLock::set` +swallow-on-second-write keeps double-install (e.g. test + lib) harmless. + +> This is the *"split into core + catalog"* and *"provider trait"* candidates +> from the research doc, combined. They were never mutually exclusive: the split +> is the crate boundary; the trait is the seam across it. + +--- + +## Q3. Where does the `Q-1-x` mapping live, and what does `quarto-yaml-validation` emit? — DECIDED: library-local stable ids, remapped at the q2 boundary + +> This is the YAML-specific application of the general philosophy in +> `claude-notes/designs/cross-package-error-codes.md`. "Library-local id" = +> *origin code* (namespaced, package-owned); "Q-1-x" = *presentation code* +> (flat, product-owned); the remap is the product-owned bridge. Read that note +> for the invariants (esp. I1 subsystem≠package, I5 provenance). + +**Decision.** `quarto-yaml-validation` stops returning `Q-1-x`. Its +`ValidationErrorKind` is already a clean, string-free enum; we give it a +**library-local stable code namespace** (an *origin-code* namespace) owned by the +library, e.g.: + +```rust +impl ValidationErrorKind { + /// Stable, library-owned identifiers. NOT Quarto codes. + pub fn code(&self) -> &'static str { + match self { + ValidationErrorKind::MissingRequiredProperty { .. } => "yaml-schema/missing-required", + ValidationErrorKind::TypeMismatch { .. } => "yaml-schema/type-mismatch", + ValidationErrorKind::InvalidEnumValue { .. } => "yaml-schema/invalid-enum", + // … one per variant … + } + } +} +``` + +q2 owns the **remap** from those origin codes to its `Q-1-x` presentation codes, +at the reporting boundary, in `quarto-error-catalog` (since `validate-yaml` is +being deleted, there is no binary-local place for it to live — it belongs with the +product's catalog policy): + +```rust +// q2 side +fn quarto_code(lib_code: &str) -> Option<&'static str> { + match lib_code { + "yaml-schema/missing-required" => Some("Q-1-10"), + "yaml-schema/type-mismatch" => Some("Q-1-11"), + // … + _ => None, + } +} +``` + +So the *same* `TypeMismatch` renders as `Q-1-11` + a quarto.org URL inside q2, +and as `yaml-schema/type-mismatch` (or code-less, per the user's catalog) +outside q2. + +**Rationale.** This is the research doc's "remap error codes when errors are +defined in different packages" idea, and it is the only option that satisfies +*both* contracts simultaneously: + +- The library gets a stability contract it **owns** (its ids never depend on + Quarto's numbering). +- q2 keeps `Q-1-x` as its public stability contract (the catalog/audit/docs URLs + are unchanged downstream of the remap). + +It also **generalizes** beyond YAML: any future externalized crate that defines +its own errors uses the same pattern (own ids → q2 remap table). That is a +reusable architectural seam, not a one-off. + +**Migration safety net.** The existing `error_code()` (returning `Q-1-x`) and its +~15 unit tests in `error.rs` are the regression oracle, but the check **splits** +across the boundary once the crate leaves: + +- *Upstream (library):* a unit test pins `kind.code()` → origin-code string for + every `ValidationErrorKind` variant (exhaustive match guarantees coverage). +- *q2 side:* a test pins the remap `origin-code → Q-1-x` against a **captured + snapshot** of today's `error_code()` output (a frozen `[(origin, Q-code)]` + table), proving the remap reproduces today's presentation codes exactly. The + snapshot is taken *before* any code moves (TDD: capture first). + +The split is necessary because, post-extraction, q2 no longer depends on the enum +(see honesty note), so the q2 oracle keys on origin-code **strings**, not on +`ValidationErrorKind`. + +> **Honesty note — the yaml remap is dormant at first.** With `validate-yaml` +> deleted and the validator not wired into q2's config pipeline, **q2 surfaces no +> yaml-validation errors today**, so the `Q-1-x` catalog entries and the remap are +> *forward-looking*: they exist for when q2 actually consumes the external +> validator (presumably front-matter validation — the validator's reason to +> exist). Two honest options: (a) keep the `Q-1-*` catalog entries + remap as a +> dormant, audited contract ready for that integration; or (b) **remove** the +> `Q-1-*` yaml entries from q2's catalog now and re-add them with the integration. +> Recommendation: (a) — the entries are cheap, the docs URLs are already public, +> and keeping them avoids a churn later; the audit's remap-completeness check +> (Q6) is simply scoped to "surfaced" codes, which is currently empty for yaml. +> **The append-only principle largely settles this toward (a):** if any `Q-1-*` +> yaml docs page has been *published*, it is under the cool-URL covenant and must +> not be deleted (it is a "dormant" catalog entry, a first-class state — see the +> design note's lifecycle). Removal is only on the table for entries never +> publicly documented. The fork survives only for those un-published entries. + +--- + +## Q4. Where do the JSON wire types (`json.rs`) live after the split? — DECIDED: stay in q2 + +**Decision.** `JsonDiagnostic`, `JsonDiagnosticDetail`, `JsonPass1Failure`, +`diagnostic_to_json`, `with_source_file` stay **q2-side** (in the +`quarto-error-reporting` façade or a small `quarto-diagnostic-json` crate). They +are **not** part of the externalized library. + +**Rationale.** + +- Every consumer of the wire shape is a q2 concern (the preview SPA, the WASM + bridge, the hub, publish, trace, the CLI). A non-Quarto user of + `quarto-yaml-validation` has no use for it — they have `ValidationDiagnostic` + and the text/ariadne renderer. +- The shape **hard-codes quarto.org schema URLs** (`SCHEMA_URL` consts) and is + versioned under Quarto's `/v1/` scheme. That is Quarto policy, same category as + the catalog — it belongs with the policy, not in the neutral core. +- Keeping it q2-side means the externalized core need not take a `schemars` + dependency for *this* shape (see Q5), and the cross-repo surface stays smaller. + +`json.rs` depends only on `DiagnosticMessage` + `SourceContext`, both of which +remain available (the former re-exported through the façade from +`error-reporting-core`, the latter an external dep q2 already consumes). So the +move is mechanical. + +--- + +## Q5. Does the external core keep `schemars`? — DECIDED: no + +**Decision.** `error-reporting-core` does **not** depend on `schemars`. The only +`schemars`-deriving types are the JSON wire shapes, which stay q2-side (Q4), so +`schemars` stays a q2 dependency. + +**Rationale.** `schemars` exists in `quarto-error-reporting` solely to emit JSON +Schema for the machine-to-machine wire types. With those types staying in q2, the +external core's surface (`DiagnosticMessage`, builder, render, `ErrorCodeInfo`, +`CatalogProvider`) needs only `serde`/`serde_json` for `ErrorCodeInfo` +(de)serialization. Smaller external dependency footprint = easier for a +non-Quarto user to adopt, and one fewer version to coordinate across repos. + +--- + +## Q6. How does the error-code audit (`scripts/audit-error-codes.py`) adapt? — DECIDED: it stays q2-only and polices the q2 catalog + remap table + +**Decision.** The audit remains a q2 script. Its scope changes from bidirectional +"codes referenced in source ↔ `error_catalog.json`" to three checks — and the +**bidirectionality is deliberately broken** to honour the append-only principle +(see `claude-notes/designs/cross-package-error-codes.md`, "Codes are append-only"): + +1. **q2 catalog consistency — forward only.** Every `Q-*` *emitted* in q2 source + has an `error_catalog.json` entry. The **reverse is dropped**: a catalog entry + with no emitter is a legitimate *retired* or *dormant* code, not an error. (This + is the one concrete code-change the append-only principle forces.) +2. **append-only (new).** No `error_catalog.json` entry is ever *removed* or + *redefined* (diff against git history or a committed snapshot). Enforceable + because it is q2's own catalog in q2's own repo; the cross-repo analogue is only + documented expectation. +3. **remap completeness (new).** Every library-local id an externalized crate's + `code()` returns *that q2 chooses to map* has a `Q-*` target that exists in the + catalog. Unmapped is allowed (tier-2 passthrough), so this checks the *mapped* + subset only. (The `quarto_code()`/`old error_code()` equivalence test from Q3 is + the machine-checkable half; the audit covers the catalog-entry half.) + +The externalized library carries its **own**, much simpler check (its `code()` +arms are exhaustive over the enum — guaranteed by the compiler's match +exhaustiveness; a unit test pins the id strings so they are not changed +accidentally). + +**Rationale.** The `Q-*` namespace, the `quarto.org` URLs, and the cross-subsystem +numbering are Quarto policy; the audit enforces that policy and has no meaning for +an external user. Keeping it q2-only is the natural consequence of the core/catalog +split. The new remap-completeness check is small and replaces the coupling the old +audit had to the (now externalized) yaml-validation source. + +--- + +## Q7. Naming / identity of the external project — DECIDED (proposal): `quarto-yaml-schema`, kept under the Quarto brand + +**Decision (proposal, lowest-confidence — explicitly flagged for the user).** +Ship the external repo as **`quarto-yaml-schema`** (or a `quarto-` family of the +four crate names, unchanged), and keep the Quarto brand rather than rebranding to +a generic name. + +**Rationale.** The validator implements *Quarto's* YAML schema dialect (the code +comments call it exactly that), and the crate names are already `quarto-*` with +`[workspace.package] repository = posit-dev/quarto-markdown-syntax`. A rename to a +neutral identity (a) loses the discoverability of the Quarto association, (b) +forces a crate-rename churn, and (c) is not required for a non-Quarto user to +adopt it — the catalog-pluggability (Q2/Q3) is what makes it non-Quarto-*specific*, +not the name. "Quarto-flavoured but catalog-agnostic" is an honest description. + +> **This is the decision most likely to be wrong without the user's product +> intent.** If the goal is to court non-Quarto adopters aggressively, a neutral +> name may matter more than I am weighting it. Cheap to revisit before the first +> `cargo publish`; expensive after. Left as a proposal. + +--- + +## Target topology (summary diagram) + +``` +EXTERNAL REPO (Option 1 — owns these, published to crates.io) + quarto-source-map leaf + ▲ + quarto-yaml → source-map + ▲ + error-reporting-core → source-map (CatalogProvider trait; EmptyCatalog default; + ▲ NO schemars; NO quarto.org URLs) + quarto-yaml-validation → the above three + ValidationErrorKind::code() -> "yaml-schema/*" + +Q2 (consumes the four as published deps; adds policy) + quarto-error-catalog → error-reporting-core (error_catalog.json, Q-*, + quarto.org URLs, install()) + quarto-diagnostic-json → error-reporting-core + source-map (JsonDiagnostic, schemars, + (or kept in façade) quarto.org SCHEMA_URLs) + quarto-error-reporting → façade: re-exports core + installs catalog + (shim) (keeps ~19 `use quarto_error_reporting::…` sites compiling) + (remap origin -> Q-1-x) lives in quarto-error-catalog; DORMANT until q2 wires + the external validator into its config pipeline + validate-yaml → DELETED (demo binary; was the only consumer) +``` + +--- + +## Sequencing decision (2026-06-26 — supersedes the in-q2-first ordering) + +The user chose **extract-first, migrate-q2-last**, and **error-reporting before +yaml-validation**, because (a) there are invisible internal Posit consumers of +`quarto-yaml-validation` that need a real standalone repo, and (b) the error-code +*discipline* (`claude-notes/designs/cross-package-error-codes.md`) is a **host** +contract that must be designed and proven before its first client. Concretely: + +1. **Foundation repo first** (`posit-dev/…`): extract `quarto-source-map` (leaf) + + `error-reporting-core` (the split-out catalog-agnostic half, carrying the + `CatalogProvider` + remap hook). Publish to crates.io; validate standalone. + q2 keeps its in-tree copies until the external crates are proven, then switches + its deps; a thin `quarto-error-reporting` façade + `quarto-error-catalog` + remain q2-side (Q1, Q4). +2. **YAML repo second**: extract `quarto-yaml` + `quarto-yaml-validation` as the + **first client** of the discipline (origin namespace `yaml-schema/*`); delete + `validate-yaml`. Publish; the Posit consumers repoint here. +3. **q2 migration last**: q2 consumes the published crates; adds its (dormant) + yaml remap. + +This **inverts** the "P0–P3 in q2, P4–P5 cross-repo" ordering below — the repo +moves now come *first*, not last. The phase *contents* below remain valid as a +work breakdown; only their order relative to the repo split changes. The +error-reporting extraction (step 1) warrants **its own plan doc** once the +discipline is accepted; this doc remains the yaml-validation-specific plan +(gated behind step 1). + +> **Open repo-granularity fork:** one foundation repo + one YAML repo (recommended +> — clean separation of "general diagnostics infra" from "the YAML stack"), vs. a +> single workspace repo holding all four crates (simpler publish/CI, but couples +> the YAML stack's release cadence to the infra's). Needs the user. + +## Phased implementation outline (TDD — to become braid sub-strands) + +Each phase is independently shippable and leaves the workspace green. + +- [ ] **P0 — Capture the equivalence oracle (no code moves).** Snapshot today's + `ValidationErrorKind → Q-1-x` mapping as a frozen `[(origin, Q-code)]` table, + and add the (initially failing) remap test that will guard P2. This is the + regression contract for everything after. +- [ ] **P0b — Delete `validate-yaml`.** Remove the demo binary and its workspace + member entry. Confirms `quarto-yaml-validation` then has zero in-repo + consumers (`cargo build --workspace` green). Independent of the rest. +- [ ] **P1 — Split `quarto-error-reporting` *in place* (still one repo).** Carve + `error-reporting-core` (catalog-agnostic) + `quarto-error-catalog` (Q-* + policy) + the `CatalogProvider` registry; turn `quarto-error-reporting` into + the re-export façade that calls `install()`. Move `json.rs` to its q2 home + (Q4). Workspace stays green; ~19 dependents unchanged. **This is the bulk of + the work and is valuable even if the repo move never happens.** +- [ ] **P2 — Re-point `quarto-yaml-validation` to library-local ids.** Replace + `error_code()`’s `Q-1-x` with `code() -> "yaml-schema/*"`; move the + `Q-1-x` knowledge into the q2 remap table; make P0's oracle pass. +- [ ] **P3 — Adapt the audit.** Implement the two-check audit (Q6). +- [ ] **P4 — Decouple workspace metadata.** Per-crate `repository`/`version` for + the four externalizing crates so they can publish independently (research + doc's note on shared `[workspace.package]`). +- [ ] **P5 — Stand up the external repo + publish.** Move the four crates; q2 + switches its `path` deps to version (or git) deps; CI on both sides. + *(Gated on the Q1 override decision — may be deferred.)* + +P0–P3 land entirely inside q2 and deliver the reusable seam; P4–P5 are the +cross-repo commitment. This ordering means we can stop after P3 with a clean, +catalog-pluggable diagnostics stack and decide the repo move on its own merits. + +--- + +## Open items genuinely needing the user (not decided here) + +1. **Q1 override:** full repo move now (P5) vs. stop after the in-q2 split (P3) + and defer the move? (Recommendation: do P0–P3 regardless; treat P4–P5 as a + separate go/no-go.) +2. **Q7 naming:** keep `quarto-*` / `quarto-yaml-schema`, or rebrand neutral? +3. **Publish channel:** crates.io vs. git deps for q2→external consumption (P5). +4. Whether `quarto-error-reporting` keeps its name as the façade, or the façade is + removed and the ~19 dependents migrate to `error-reporting-core` directly + (more churn, cleaner end state). From 4c94eacf20fbda24f5a11a00c9f0b2929f20da06 Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Sat, 27 Jun 2026 15:06:13 -0500 Subject: [PATCH 2/2] build: consume published quarto-source-map 0.1.0 from crates.io (bd-egcyeym9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit quarto-source-map has been extracted to https://github.com/posit-dev/quarto-source-map and published to crates.io as 0.1.0 — the first of the diagnostics-foundation crates to leave the monorepo. Cut q2 over to the published crate: - [workspace.dependencies.quarto-source-map]: path -> version = "0.1.0" - 13 main-workspace members: path dep -> { workspace = true } - wasm-quarto-hub-client (excluded, standalone workspace, refs every q2 crate by path): direct quarto-source-map = "0.1.0" — it cannot inherit a workspace dep, so { workspace = true } would fail to resolve at the wasm32 build. - delete in-tree crates/quarto-source-map/ - both Cargo.lock files now resolve quarto-source-map from the crates.io registry (matching checksum). wasm-quarto-hub-client/Cargo.lock additionally picks up incidental 0.5.0 -> 0.7.0 member-version catch-up that was already pending (pre-existing lockfile drift, regenerated by the build). Verified: cargo build --workspace; cargo nextest run --workspace (10238 passed); full cargo xtask verify — all 14 steps incl. the WASM build and hub-client tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 4 +- Cargo.toml | 2 +- crates/pampa/Cargo.toml | 2 +- crates/quarto-ast-reconcile/Cargo.toml | 2 +- crates/quarto-citeproc/Cargo.toml | 2 +- crates/quarto-config/Cargo.toml | 2 +- crates/quarto-csl/Cargo.toml | 2 +- crates/quarto-doctemplate/Cargo.toml | 2 +- crates/quarto-error-reporting/Cargo.toml | 2 +- crates/quarto-pandoc-types/Cargo.toml | 2 +- crates/quarto-parse-errors/Cargo.toml | 2 +- crates/quarto-source-map/Cargo.toml | 18 - crates/quarto-source-map/src/context.rs | 282 --- crates/quarto-source-map/src/file_info.rs | 346 ---- crates/quarto-source-map/src/lib.rs | 46 - crates/quarto-source-map/src/mapping.rs | 264 --- crates/quarto-source-map/src/source_info.rs | 2031 ------------------- crates/quarto-source-map/src/types.rs | 169 -- crates/quarto-source-map/src/utils.rs | 211 -- crates/quarto-xml/Cargo.toml | 2 +- crates/quarto-yaml/Cargo.toml | 2 +- crates/wasm-quarto-hub-client/Cargo.lock | 43 +- crates/wasm-quarto-hub-client/Cargo.toml | 2 +- 23 files changed, 39 insertions(+), 3401 deletions(-) delete mode 100644 crates/quarto-source-map/Cargo.toml delete mode 100644 crates/quarto-source-map/src/context.rs delete mode 100644 crates/quarto-source-map/src/file_info.rs delete mode 100644 crates/quarto-source-map/src/lib.rs delete mode 100644 crates/quarto-source-map/src/mapping.rs delete mode 100644 crates/quarto-source-map/src/source_info.rs delete mode 100644 crates/quarto-source-map/src/types.rs delete mode 100644 crates/quarto-source-map/src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index 7b4353320..d8a352c0c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3879,7 +3879,9 @@ dependencies = [ [[package]] name = "quarto-source-map" -version = "0.7.0" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e3d29b1ef6088d016dfe84d831e5aa3daaba4624e23621364c2aef6d593114f" dependencies = [ "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 3d96c4229..625c1d47f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -116,7 +116,7 @@ path = "./crates/quarto-yaml-validation" path = "./crates/quarto-error-reporting" [workspace.dependencies.quarto-source-map] -path = "./crates/quarto-source-map" +version = "0.1.0" [workspace.dependencies.quarto-xml] path = "./crates/quarto-xml" diff --git a/crates/pampa/Cargo.toml b/crates/pampa/Cargo.toml index db06de041..f81c3a106 100644 --- a/crates/pampa/Cargo.toml +++ b/crates/pampa/Cargo.toml @@ -41,7 +41,7 @@ tree-sitter-qmd = { workspace = true } comrak = { version = "0.52.0", default-features = false } comrak-to-pandoc = { path = "../comrak-to-pandoc" } quarto-error-reporting = { path = "../quarto-error-reporting" } -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = { workspace = true } quarto-yaml = { path = "../quarto-yaml" } quarto-config = { path = "../quarto-config" } quarto-parse-errors = { path = "../quarto-parse-errors" } diff --git a/crates/quarto-ast-reconcile/Cargo.toml b/crates/quarto-ast-reconcile/Cargo.toml index 57caa34cc..53568b2fc 100644 --- a/crates/quarto-ast-reconcile/Cargo.toml +++ b/crates/quarto-ast-reconcile/Cargo.toml @@ -13,7 +13,7 @@ description = "Three-phase AST reconciliation for preserving source locations" [dependencies] quarto-pandoc-types = { path = "../quarto-pandoc-types" } -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } hashlink = { version = "0.11.0", features = ["serde_impl"] } diff --git a/crates/quarto-citeproc/Cargo.toml b/crates/quarto-citeproc/Cargo.toml index 3450b0a84..889f05e1a 100644 --- a/crates/quarto-citeproc/Cargo.toml +++ b/crates/quarto-citeproc/Cargo.toml @@ -10,7 +10,7 @@ description = "Citation processing engine using CSL styles" quarto-csl = { path = "../quarto-csl" } quarto-error-reporting = { path = "../quarto-error-reporting" } quarto-pandoc-types = { path = "../quarto-pandoc-types" } -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = { workspace = true } quarto-xml = { path = "../quarto-xml" } hashlink = "0.11.0" rust-embed = { version = "8", features = ["include-exclude"] } diff --git a/crates/quarto-config/Cargo.toml b/crates/quarto-config/Cargo.toml index 93a65fe53..89b1f5afc 100644 --- a/crates/quarto-config/Cargo.toml +++ b/crates/quarto-config/Cargo.toml @@ -12,7 +12,7 @@ repository.workspace = true description = "Configuration merging with source tracking for Quarto" [dependencies] -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = { workspace = true } quarto-yaml = { path = "../quarto-yaml" } quarto-pandoc-types = { path = "../quarto-pandoc-types" } quarto-error-reporting = { path = "../quarto-error-reporting" } diff --git a/crates/quarto-csl/Cargo.toml b/crates/quarto-csl/Cargo.toml index 1b7adfd68..a655997f8 100644 --- a/crates/quarto-csl/Cargo.toml +++ b/crates/quarto-csl/Cargo.toml @@ -9,7 +9,7 @@ description = "CSL (Citation Style Language) parsing with source tracking for Qu [dependencies] quarto-error-reporting = { path = "../quarto-error-reporting" } -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = { workspace = true } quarto-xml = { path = "../quarto-xml" } thiserror = { workspace = true } diff --git a/crates/quarto-doctemplate/Cargo.toml b/crates/quarto-doctemplate/Cargo.toml index 6f6da3599..36ba8cc1d 100644 --- a/crates/quarto-doctemplate/Cargo.toml +++ b/crates/quarto-doctemplate/Cargo.toml @@ -22,7 +22,7 @@ quarto-treesitter-ast = { workspace = true } # Error reporting infrastructure quarto-parse-errors = { path = "../quarto-parse-errors" } quarto-error-reporting = { path = "../quarto-error-reporting" } -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = { workspace = true } # Serialization (for TemplateValue conversion from JSON) serde = { workspace = true, features = ["derive"] } diff --git a/crates/quarto-error-reporting/Cargo.toml b/crates/quarto-error-reporting/Cargo.toml index 5fb5503cd..780a8b151 100644 --- a/crates/quarto-error-reporting/Cargo.toml +++ b/crates/quarto-error-reporting/Cargo.toml @@ -9,7 +9,7 @@ description = "Error reporting and diagnostic messages for Quarto" [dependencies] # Source location tracking -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = { workspace = true } # Error reporting ariadne = { workspace = true } diff --git a/crates/quarto-pandoc-types/Cargo.toml b/crates/quarto-pandoc-types/Cargo.toml index 84f626efe..841e86376 100644 --- a/crates/quarto-pandoc-types/Cargo.toml +++ b/crates/quarto-pandoc-types/Cargo.toml @@ -12,7 +12,7 @@ repository.workspace = true description = "Pandoc AST type definitions for Quarto" [dependencies] -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } hashlink = { version = "0.11.0", features = ["serde_impl"] } diff --git a/crates/quarto-parse-errors/Cargo.toml b/crates/quarto-parse-errors/Cargo.toml index 196881b3a..8f76f4bb7 100644 --- a/crates/quarto-parse-errors/Cargo.toml +++ b/crates/quarto-parse-errors/Cargo.toml @@ -12,7 +12,7 @@ repository.workspace = true [dependencies] quarto-error-reporting = { path = "../quarto-error-reporting" } -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = { workspace = true } tree-sitter = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = "1.0" diff --git a/crates/quarto-source-map/Cargo.toml b/crates/quarto-source-map/Cargo.toml deleted file mode 100644 index e231a7f95..000000000 --- a/crates/quarto-source-map/Cargo.toml +++ /dev/null @@ -1,18 +0,0 @@ -[package] -name = "quarto-source-map" -version.workspace = true -edition.workspace = true -authors.workspace = true -license.workspace = true -repository.workspace = true - -[dependencies] -serde = { workspace = true, features = ["derive", "rc"] } -serde_json.workspace = true -smallvec.workspace = true - -[dev-dependencies] -serde_json.workspace = true - -[lints] -workspace = true diff --git a/crates/quarto-source-map/src/context.rs b/crates/quarto-source-map/src/context.rs deleted file mode 100644 index 657c9455c..000000000 --- a/crates/quarto-source-map/src/context.rs +++ /dev/null @@ -1,282 +0,0 @@ -//! Source context for managing files - -use crate::file_info::FileInformation; -use crate::types::FileId; -use serde::{Deserialize, Serialize}; - -use std::collections::HashMap; - -/// Context for managing source files -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SourceContext { - files: Vec, - /// Sparse mapping for non-sequential file IDs (e.g., from hash-based IDs) - /// Only populated when add_file_with_id is used - #[serde(skip_serializing_if = "HashMap::is_empty", default)] - file_id_map: HashMap, // Maps FileId.0 -> index in files vec -} - -/// A source file with content and metadata -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SourceFile { - /// File path or identifier - pub path: String, - /// File content (for ephemeral/in-memory files) - /// When Some, content is stored in memory (e.g., for or test files) - /// When None, content should be read from disk using the path - #[serde(skip_serializing_if = "Option::is_none")] - pub content: Option, - /// File information for efficient location lookups (optional for serialization) - #[serde(skip_serializing_if = "Option::is_none")] - pub file_info: Option, - /// File metadata - pub metadata: FileMetadata, -} - -/// Metadata about a source file -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct FileMetadata { - /// File type (qmd, yaml, md, etc.) - pub file_type: Option, -} - -impl SourceContext { - /// Create a new empty source context - pub fn new() -> Self { - SourceContext { - files: Vec::new(), - file_id_map: HashMap::new(), - } - } - - /// Add a file to the context and return its ID - /// - /// - If content is Some: Creates an ephemeral (in-memory) file. Content is stored and used for ariadne rendering. - /// - If content is None: Creates a disk-backed file. Content will be read from disk when needed (path must exist). - /// - /// For ephemeral files, FileInformation is created immediately from the provided content. - /// For disk-backed files, FileInformation is created by reading from disk if the path exists. - pub fn add_file(&mut self, path: String, content: Option) -> FileId { - let id = FileId(self.files.len()); - - // For ephemeral files (content provided), store it and create FileInformation - // For disk-backed files (no content), try to read from disk for FileInformation only - let (stored_content, content_for_info) = match content { - Some(c) => { - // Ephemeral file: store content and use it for FileInformation - (Some(c.clone()), Some(c)) - } - None => { - // Disk-backed file: don't store content, but try to read for FileInformation - (None, std::fs::read_to_string(&path).ok()) - } - }; - - let file_info = content_for_info.as_ref().map(|c| FileInformation::new(c)); - self.files.push(SourceFile { - path, - content: stored_content, - file_info, - metadata: FileMetadata { file_type: None }, - }); - id - } - - /// Add a file with pre-computed FileInformation - /// - /// This is useful when deserializing from formats (like JSON) that include - /// serialized FileInformation, avoiding the need to recompute line breaks - /// or read from disk. - /// - /// The file is created without content (content=None), so ariadne rendering - /// won't work, but map_offset() will work using the provided FileInformation. - pub fn add_file_with_info(&mut self, path: String, file_info: FileInformation) -> FileId { - let id = FileId(self.files.len()); - self.files.push(SourceFile { - path, - content: None, - file_info: Some(file_info), - metadata: FileMetadata { file_type: None }, - }); - id - } - - /// Add a file with a specific FileId - /// - /// This is useful when interfacing with systems that use hash-based or non-sequential - /// FileIds (like quarto-yaml). The FileId must not already exist in the context. - /// - /// # Panics - /// - /// Panics if the FileId already exists in the context. - pub fn add_file_with_id( - &mut self, - id: FileId, - path: String, - content: Option, - ) -> FileId { - // Check if ID already exists - if self.get_file(id).is_some() { - panic!("FileId {:?} already exists in SourceContext", id); - } - - // Process content same as add_file - let (stored_content, content_for_info) = match content { - Some(c) => (Some(c.clone()), Some(c)), - None => (None, std::fs::read_to_string(&path).ok()), - }; - - let file_info = content_for_info.as_ref().map(|c| FileInformation::new(c)); - - // Add to files vec and create mapping - let index = self.files.len(); - self.files.push(SourceFile { - path, - content: stored_content, - file_info, - metadata: FileMetadata { file_type: None }, - }); - - // Store mapping from FileId to index - self.file_id_map.insert(id.0, index); - - id - } - - /// Get a file by ID - pub fn get_file(&self, id: FileId) -> Option<&SourceFile> { - // First check if this is a mapped ID - if let Some(&index) = self.file_id_map.get(&id.0) { - return self.files.get(index); - } - - // Otherwise use direct indexing (for sequential IDs from add_file) - self.files.get(id.0) - } - - /// Create a copy without FileInformation (for serialization) - /// - /// Note: This preserves the content field for ephemeral files, as they need - /// content to be serialized for proper deserialization. Only FileInformation - /// is removed since it can be reconstructed from content. - pub fn without_content(&self) -> Self { - SourceContext { - files: self - .files - .iter() - .map(|f| SourceFile { - path: f.path.clone(), - content: f.content.clone(), // Preserve content for ephemeral files - file_info: None, - metadata: f.metadata.clone(), - }) - .collect(), - file_id_map: self.file_id_map.clone(), // Preserve mapping - } - } -} - -impl Default for SourceContext { - fn default() -> Self { - Self::new() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_empty_context() { - let ctx = SourceContext::new(); - assert!(ctx.get_file(FileId(0)).is_none()); - } - - #[test] - fn test_add_and_get_file() { - let mut ctx = SourceContext::new(); - let id = ctx.add_file("test.qmd".to_string(), Some("# Hello".to_string())); - - assert_eq!(id, FileId(0)); - let file = ctx.get_file(id).unwrap(); - assert_eq!(file.path, "test.qmd"); - assert!(file.file_info.is_some()); - - // Verify the file info was built correctly - let info = file.file_info.as_ref().unwrap(); - assert_eq!(info.total_length(), 7); - } - - #[test] - fn test_multiple_files() { - let mut ctx = SourceContext::new(); - let id1 = ctx.add_file("first.qmd".to_string(), Some("First".to_string())); - let id2 = ctx.add_file("second.qmd".to_string(), Some("Second".to_string())); - - assert_eq!(id1, FileId(0)); - assert_eq!(id2, FileId(1)); - - let file1 = ctx.get_file(id1).unwrap(); - let file2 = ctx.get_file(id2).unwrap(); - - assert_eq!(file1.path, "first.qmd"); - assert_eq!(file2.path, "second.qmd"); - assert!(file1.file_info.is_some()); - assert!(file2.file_info.is_some()); - assert_eq!(file1.file_info.as_ref().unwrap().total_length(), 5); - assert_eq!(file2.file_info.as_ref().unwrap().total_length(), 6); - } - - #[test] - fn test_file_without_content() { - let mut ctx = SourceContext::new(); - let id = ctx.add_file("no-content.qmd".to_string(), None); - - let file = ctx.get_file(id).unwrap(); - assert_eq!(file.path, "no-content.qmd"); - assert!(file.file_info.is_none()); - } - - #[test] - fn test_without_content() { - let mut ctx = SourceContext::new(); - ctx.add_file("test1.qmd".to_string(), Some("Content 1".to_string())); - ctx.add_file("test2.qmd".to_string(), Some("Content 2".to_string())); - - let ctx_no_content = ctx.without_content(); - - let file1 = ctx_no_content.get_file(FileId(0)).unwrap(); - let file2 = ctx_no_content.get_file(FileId(1)).unwrap(); - - assert_eq!(file1.path, "test1.qmd"); - assert_eq!(file2.path, "test2.qmd"); - assert!(file1.file_info.is_none()); - assert!(file2.file_info.is_none()); - } - - #[test] - fn test_serialization() { - let mut ctx = SourceContext::new(); - ctx.add_file("test.qmd".to_string(), Some("# Test".to_string())); - - let json = serde_json::to_string(&ctx).unwrap(); - let deserialized: SourceContext = serde_json::from_str(&json).unwrap(); - - let file = deserialized.get_file(FileId(0)).unwrap(); - assert_eq!(file.path, "test.qmd"); - assert!(file.file_info.is_some()); - assert_eq!(file.file_info.as_ref().unwrap().total_length(), 6); - } - - #[test] - fn test_serialization_without_content() { - let mut ctx = SourceContext::new(); - ctx.add_file("test.qmd".to_string(), Some("# Test".to_string())); - - let ctx_no_content = ctx.without_content(); - let json = serde_json::to_string(&ctx_no_content).unwrap(); - - // Verify that None file_info is skipped in serialization - assert!(!json.contains("\"file_info\"")); - } -} diff --git a/crates/quarto-source-map/src/file_info.rs b/crates/quarto-source-map/src/file_info.rs deleted file mode 100644 index 80933587d..000000000 --- a/crates/quarto-source-map/src/file_info.rs +++ /dev/null @@ -1,346 +0,0 @@ -//! Efficient file information for location lookups - -use crate::types::Location; -use serde::{Deserialize, Serialize}; - -/// Efficient file content analysis for location lookups -/// -/// This struct stores metadata about a file that enables fast conversion -/// from byte offsets to (row, column) positions without storing the full -/// file content. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct FileInformation { - /// Byte offsets of each newline character in the file - line_breaks: Vec, - - /// Total length of the file in bytes - total_length: usize, -} - -impl FileInformation { - /// Create file information by analyzing content - /// - /// Scans the content once to build an index of line break positions. - /// This enables O(log n) offset-to-location lookups via binary search. - /// - /// # Example - /// - /// ``` - /// use quarto_source_map::FileInformation; - /// - /// let info = FileInformation::new("line 1\nline 2\nline 3"); - /// ``` - pub fn new(content: &str) -> Self { - let line_breaks: Vec = content - .char_indices() - .filter_map(|(idx, ch)| if ch == '\n' { Some(idx) } else { None }) - .collect(); - - FileInformation { - line_breaks, - total_length: content.len(), - } - } - - /// Create file information from pre-computed parts - /// - /// This is useful when deserializing from formats that store - /// line break information directly (like JSON). - /// - /// # Example - /// - /// ``` - /// use quarto_source_map::FileInformation; - /// - /// let info = FileInformation::from_parts(vec![6, 13], 20); - /// ``` - pub fn from_parts(line_breaks: Vec, total_length: usize) -> Self { - FileInformation { - line_breaks, - total_length, - } - } - - /// Convert a byte offset to a Location with row and column - /// - /// Uses binary search to find which line contains the offset. - /// Runs in O(log n) time where n is the number of lines. - /// - /// The column is computed as character count (not byte count) from the start - /// of the line to the offset, which requires the content parameter. - /// - /// Returns None if the offset is out of bounds. - /// - /// # Example - /// - /// ``` - /// use quarto_source_map::FileInformation; - /// - /// let content = "hello\nworld"; - /// let info = FileInformation::new(content); - /// let loc = info.offset_to_location(6, content).unwrap(); - /// assert_eq!(loc.row, 1); - /// assert_eq!(loc.column, 0); - /// ``` - pub fn offset_to_location(&self, offset: usize, content: &str) -> Option { - if offset > self.total_length { - return None; - } - - // Binary search to find which line the offset is on - // line_breaks[i] is the position of the i-th newline (0-indexed) - // So line 0 contains [0, line_breaks[0]) - // Line 1 contains [line_breaks[0]+1, line_breaks[1]) - // etc. - - let row = match self.line_breaks.binary_search(&offset) { - // Offset is exactly at a newline character - // That newline belongs to the line it terminates, not the next line - Ok(idx) => idx, - // Offset is between line breaks (or before the first, or after the last) - Err(idx) => idx, - }; - - // Column is distance from the start of this line - let line_start = if row == 0 { - 0 - } else { - self.line_breaks[row - 1] + 1 // +1 to skip past the '\n' - }; - - // Count characters (not bytes) from line_start to offset. - // Tree-sitter and Pandoc-source byte ranges occasionally produce - // offsets that land inside a multi-byte UTF-8 sequence; floor such - // offsets to the start of the enclosing char so the slice stays on - // a valid char boundary instead of panicking. - let mut safe_offset = offset; - while safe_offset > line_start && !content.is_char_boundary(safe_offset) { - safe_offset -= 1; - } - let column = content[line_start..safe_offset].chars().count(); - - Some(Location { - offset, - row, - column, - }) - } - - /// Get the total length of the file in bytes - pub fn total_length(&self) -> usize { - self.total_length - } - - /// Get the line breaks array (byte offsets of newline characters) - pub fn line_breaks(&self) -> &[usize] { - &self.line_breaks - } - - /// Get the number of lines in the file - pub fn line_count(&self) -> usize { - // If there are no newlines, there's 1 line - // If there are n newlines, there are n+1 lines - self.line_breaks.len() + 1 - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_empty_file() { - let content = ""; - let info = FileInformation::new(content); - assert_eq!(info.total_length(), 0); - assert_eq!(info.line_count(), 1); - - let loc = info.offset_to_location(0, content).unwrap(); - assert_eq!(loc.offset, 0); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 0); - } - - #[test] - fn test_single_line() { - let content = "hello world"; - let info = FileInformation::new(content); - assert_eq!(info.total_length(), 11); - assert_eq!(info.line_count(), 1); - - // Start of line - let loc = info.offset_to_location(0, content).unwrap(); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 0); - - // Middle of line - let loc = info.offset_to_location(6, content).unwrap(); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 6); - - // End of line - let loc = info.offset_to_location(11, content).unwrap(); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 11); - } - - #[test] - fn test_multiple_lines() { - let content = "line 1\nline 2\nline 3"; - let info = FileInformation::new(content); - assert_eq!(info.line_count(), 3); - - // First line - let loc = info.offset_to_location(0, content).unwrap(); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 0); - - // At first newline (offset 6 is '\n') - let loc = info.offset_to_location(6, content).unwrap(); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 6); - - // Start of second line (offset 7 is 'l' in "line 2") - let loc = info.offset_to_location(7, content).unwrap(); - assert_eq!(loc.row, 1); - assert_eq!(loc.column, 0); - - // At second newline (offset 13 is '\n') - let loc = info.offset_to_location(13, content).unwrap(); - assert_eq!(loc.row, 1); - assert_eq!(loc.column, 6); - - // Start of third line (offset 14 is 'l' in "line 3") - let loc = info.offset_to_location(14, content).unwrap(); - assert_eq!(loc.row, 2); - assert_eq!(loc.column, 0); - - // End of file - let loc = info.offset_to_location(20, content).unwrap(); - assert_eq!(loc.row, 2); - assert_eq!(loc.column, 6); - } - - #[test] - fn test_out_of_bounds() { - let content = "hello"; - let info = FileInformation::new(content); - assert!(info.offset_to_location(100, content).is_none()); - } - - #[test] - fn test_unicode_content() { - // "café" - 'é' is 2 bytes in UTF-8 - let content = "café\nwörld"; // 4 chars + 1 newline + 5 chars = but more bytes - let info = FileInformation::new(content); - - // Verify we're working with byte offsets for positioning, but character counts for columns - // "café" is 5 bytes: c(1) a(1) f(1) é(2) - // newline is 1 byte - // So second line starts at byte offset 6 - let loc = info.offset_to_location(6, content).unwrap(); - assert_eq!(loc.row, 1); - assert_eq!(loc.column, 0); - } - - #[test] - fn test_file_ending_with_newline() { - let content = "line 1\nline 2\n"; - let info = FileInformation::new(content); - assert_eq!(info.line_count(), 3); // Empty third line - - // The final newline - let loc = info.offset_to_location(13, content).unwrap(); - assert_eq!(loc.row, 1); - assert_eq!(loc.column, 6); - - // After the final newline (empty line 3) - let loc = info.offset_to_location(14, content).unwrap(); - assert_eq!(loc.row, 2); - assert_eq!(loc.column, 0); - } - - #[test] - fn test_consecutive_newlines() { - let content = "a\n\n\nb"; - let info = FileInformation::new(content); - assert_eq!(info.line_count(), 4); - - // First line - let loc = info.offset_to_location(0, content).unwrap(); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 0); - - // First newline (offset 1) - let loc = info.offset_to_location(1, content).unwrap(); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 1); - - // Empty second line (offset 2) - let loc = info.offset_to_location(2, content).unwrap(); - assert_eq!(loc.row, 1); - assert_eq!(loc.column, 0); - - // Empty third line (offset 3) - let loc = info.offset_to_location(3, content).unwrap(); - assert_eq!(loc.row, 2); - assert_eq!(loc.column, 0); - - // Fourth line 'b' (offset 4) - let loc = info.offset_to_location(4, content).unwrap(); - assert_eq!(loc.row, 3); - assert_eq!(loc.column, 0); - } - - #[test] - fn test_multibyte_utf8_column_should_be_character_count() { - // This test verifies that column is character count, not byte offset - // Swedish text with multi-byte UTF-8 characters (å = 2 bytes, ä = 2 bytes, ö = 2 bytes) - let content = "Gällande frågorna om något"; - // Character positions: G=0, ä=1, l=2, l=3, a=4, n=5, d=6, e=7, space=8, f=9, r=10, å=11, g=12, ... - // Byte positions: G=0, ä=1-2, l=3, l=4, a=5, n=6, d=7, e=8, space=9, f=10, r=11, å=12-13, g=14, ... - - let info = FileInformation::new(content); - - // Test position at "å" in "frågorna" (character 11, byte offset starts at 12) - // The byte offset 12 is where "å" starts (it's 2 bytes: 12-13) - let loc = info.offset_to_location(12, content).unwrap(); - assert_eq!(loc.row, 0); - // With the fix, this should return 11 (character count), not 12 (byte offset) - assert_eq!( - loc.column, 11, - "Column should be character count (11), not byte offset (12)" - ); - - // Test position at "g" after "å" in "frågorna" (character 12, byte offset 14) - let loc = info.offset_to_location(14, content).unwrap(); - assert_eq!(loc.row, 0); - // Should return 12 (character count), not 14 (byte offset) - assert_eq!( - loc.column, 12, - "Column should be character count (12), not byte offset (14)" - ); - } - - #[test] - fn test_offset_inside_multibyte_char_does_not_panic() { - // Regression: a byte offset that lands inside a multi-byte UTF-8 - // sequence used to panic with "byte index N is not a char boundary". - // Tree-sitter and Pandoc-source byte ranges can both produce such - // offsets in the wild, so the function must return a valid Location - // (rounded down to the previous char boundary) rather than panic. - let content = "x ❤️ y"; // ❤ = 3 bytes (E2 9D A4), ️ = 3 bytes (EF B8 8F) - let info = FileInformation::new(content); - - // Byte 3 is the second byte of "❤" (which spans bytes 2..5). - // Floor to the start of "❤" at byte 2 → 2 chars before it ("x "). - let loc = info.offset_to_location(3, content).unwrap(); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 2); - - // Byte 4 is the third byte of "❤" — also inside the same char. - let loc = info.offset_to_location(4, content).unwrap(); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 2); - } -} diff --git a/crates/quarto-source-map/src/lib.rs b/crates/quarto-source-map/src/lib.rs deleted file mode 100644 index e09f26d91..000000000 --- a/crates/quarto-source-map/src/lib.rs +++ /dev/null @@ -1,46 +0,0 @@ -//! Source mapping for Quarto -//! -//! This crate provides unified source location tracking with support for -//! transformations (extraction, concatenation, normalization). It enables -//! precise error reporting and mapping positions back through transformation -//! chains to original source files. -//! -//! # Overview -//! -//! The core types are: -//! - [`SourceInfo`]: Enum tracking a location and its transformation history -//! - [`SourceContext`]: Manages files and provides content for mapping -//! - [`MappedLocation`]: Result of mapping through transformation chains -//! -//! # Example -//! -//! ```rust -//! use quarto_source_map::*; -//! -//! // Create a context and register a file -//! let mut ctx = SourceContext::new(); -//! let file_id = ctx.add_file("main.qmd".into(), Some("# Hello\nWorld".into())); -//! -//! // Create a source location (stores only offsets) -//! let info = SourceInfo::original(file_id, 0, 7); -//! -//! // Map to get row/column information -//! let mapped = info.map_offset(0, &ctx).unwrap(); -//! assert_eq!(mapped.location.row, 0); -//! assert_eq!(mapped.location.column, 0); -//! ``` - -pub mod context; -pub mod file_info; -pub mod mapping; -pub mod source_info; -pub mod types; -pub mod utils; - -// Re-export main types -pub use context::{FileMetadata, SourceContext, SourceFile}; -pub use file_info::FileInformation; -pub use mapping::MappedLocation; -pub use source_info::{Anchor, AnchorRole, By, SourceInfo, SourcePiece}; -pub use types::{FileId, Location, Range}; -pub use utils::{line_col_to_offset, offset_to_location, range_from_offsets}; diff --git a/crates/quarto-source-map/src/mapping.rs b/crates/quarto-source-map/src/mapping.rs deleted file mode 100644 index 98daa6ad0..000000000 --- a/crates/quarto-source-map/src/mapping.rs +++ /dev/null @@ -1,264 +0,0 @@ -//! Position mapping through transformation chains - -use crate::types::{FileId, Location}; -use crate::{SourceContext, SourceInfo}; - -/// Result of mapping a position back to an original file -#[derive(Debug, Clone, PartialEq)] -pub struct MappedLocation { - /// The original file - pub file_id: FileId, - /// Location in the original file - pub location: Location, -} - -impl SourceInfo { - /// Map an offset in the current text back to original source - pub fn map_offset(&self, offset: usize, ctx: &SourceContext) -> Option { - match self { - SourceInfo::Original { - file_id, - start_offset, - .. - } => { - // Direct mapping to original file - let file = ctx.get_file(*file_id)?; - let file_info = file.file_info.as_ref()?; - - // Compute the absolute offset in the file - let absolute_offset = start_offset + offset; - - // Get file content: use stored content for ephemeral files, or read from disk - let content = match &file.content { - Some(c) => c.clone(), - None => std::fs::read_to_string(&file.path).ok()?, - }; - - // Convert offset to Location with row/column using efficient binary search - let location = file_info.offset_to_location(absolute_offset, &content)?; - - Some(MappedLocation { - file_id: *file_id, - location, - }) - } - SourceInfo::Substring { - parent, - start_offset, - .. - } => { - // Map to parent coordinates and recurse - let parent_offset = start_offset + offset; - parent.map_offset(parent_offset, ctx) - } - SourceInfo::Concat { pieces } => { - // Find which piece contains this offset - for piece in pieces { - let piece_start = piece.offset_in_concat; - let piece_end = piece_start + piece.length; - - if offset >= piece_start && offset < piece_end { - // Offset is within this piece - let offset_in_piece = offset - piece_start; - return piece.source_info.map_offset(offset_in_piece, ctx); - } - } - // Exclusive end: `offset == total` matches no piece above; map it to - // the end of the last piece (like Original/Substring's map_offset(length)). - if let Some(last) = pieces.last() - && offset == last.offset_in_concat + last.length - { - return last.source_info.map_offset(last.length, ctx); - } - None // Offset not found in any piece - } - SourceInfo::Generated { .. } => { - // Generated nodes have no offset-within-current-text; - // callers wanting source coordinates use resolve_byte_range. - None - } - } - } - - /// Map a range in the current text back to original source - pub fn map_range( - &self, - start: usize, - end: usize, - ctx: &SourceContext, - ) -> Option<(MappedLocation, MappedLocation)> { - let start_mapped = self.map_offset(start, ctx)?; - let end_mapped = self.map_offset(end, ctx)?; - Some((start_mapped, end_mapped)) - } -} - -#[cfg(test)] -mod tests { - use crate::types::{Location, Range}; - use crate::{SourceContext, SourceInfo}; - - #[test] - fn test_map_offset_original() { - let mut ctx = SourceContext::new(); - let file_id = ctx.add_file("test.qmd".to_string(), Some("hello\nworld".to_string())); - - let info = SourceInfo::from_range( - file_id, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 11, - row: 1, - column: 5, - }, - }, - ); - - // Test mapping offset 0 (start of first line) - let mapped = info.map_offset(0, &ctx).unwrap(); - assert_eq!(mapped.file_id, file_id); - assert_eq!(mapped.location.offset, 0); - assert_eq!(mapped.location.row, 0); - assert_eq!(mapped.location.column, 0); - - // Test mapping offset 6 (start of second line) - let mapped = info.map_offset(6, &ctx).unwrap(); - assert_eq!(mapped.file_id, file_id); - assert_eq!(mapped.location.offset, 6); - assert_eq!(mapped.location.row, 1); - assert_eq!(mapped.location.column, 0); - } - - #[test] - fn test_map_offset_substring() { - let mut ctx = SourceContext::new(); - let file_id = ctx.add_file("test.qmd".to_string(), Some("0123456789".to_string())); - - let original = SourceInfo::from_range( - file_id, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 10, - row: 0, - column: 10, - }, - }, - ); - - // Extract substring from offset 3 to 7 ("3456") - let substring = SourceInfo::substring(original, 3, 7); - - // Map offset 0 in substring (should be '3' at offset 3 in original) - let mapped = substring.map_offset(0, &ctx).unwrap(); - assert_eq!(mapped.file_id, file_id); - assert_eq!(mapped.location.offset, 3); - - // Map offset 2 in substring (should be '5' at offset 5 in original) - let mapped = substring.map_offset(2, &ctx).unwrap(); - assert_eq!(mapped.file_id, file_id); - assert_eq!(mapped.location.offset, 5); - } - - #[test] - fn test_map_offset_concat() { - let mut ctx = SourceContext::new(); - let file_id1 = ctx.add_file("first.qmd".to_string(), Some("AAA".to_string())); - let file_id2 = ctx.add_file("second.qmd".to_string(), Some("BBB".to_string())); - - let info1 = SourceInfo::from_range( - file_id1, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 3, - row: 0, - column: 3, - }, - }, - ); - - let info2 = SourceInfo::from_range( - file_id2, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 3, - row: 0, - column: 3, - }, - }, - ); - - // Concatenate: "AAABBB" - let concat = SourceInfo::concat(vec![(info1, 3), (info2, 3)]); - - // Map offset 1 (should be in first piece, second 'A') - let mapped = concat.map_offset(1, &ctx).unwrap(); - assert_eq!(mapped.file_id, file_id1); - assert_eq!(mapped.location.offset, 1); - - // Map offset 4 (should be in second piece, second 'B') - let mapped = concat.map_offset(4, &ctx).unwrap(); - assert_eq!(mapped.file_id, file_id2); - assert_eq!(mapped.location.offset, 1); - - // Exclusive end (offset 6 == total): maps to end of last piece - let mapped = concat.map_offset(6, &ctx).unwrap(); - assert_eq!(mapped.file_id, file_id2); - assert_eq!(mapped.location.offset, 3); - - // map_range over the whole concat: exclusive end must resolve - let (start, end) = concat.map_range(0, 6, &ctx).unwrap(); - assert_eq!(start.file_id, file_id1); - assert_eq!(start.location.offset, 0); - assert_eq!(end.file_id, file_id2); - assert_eq!(end.location.offset, 3); - } - - #[test] - fn test_map_range() { - let mut ctx = SourceContext::new(); - let file_id = ctx.add_file("test.qmd".to_string(), Some("hello\nworld".to_string())); - - let info = SourceInfo::from_range( - file_id, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 11, - row: 1, - column: 5, - }, - }, - ); - - // Map range [0, 5) which is "hello" - let (start, end) = info.map_range(0, 5, &ctx).unwrap(); - assert_eq!(start.file_id, file_id); - assert_eq!(start.location.offset, 0); - assert_eq!(end.file_id, file_id); - assert_eq!(end.location.offset, 5); - } -} diff --git a/crates/quarto-source-map/src/source_info.rs b/crates/quarto-source-map/src/source_info.rs deleted file mode 100644 index 196d4949c..000000000 --- a/crates/quarto-source-map/src/source_info.rs +++ /dev/null @@ -1,2031 +0,0 @@ -//! Source information with transformation tracking - -use crate::types::{FileId, Range}; -use serde::{Deserialize, Serialize}; -use smallvec::SmallVec; -use std::sync::Arc; - -/// Source information tracking a location and its transformation history -/// -/// This enum stores only byte offsets. Row and column information is computed -/// on-demand via `map_offset()` using the FileInformation line break index. -/// -/// Design notes: -/// - Original: Points directly to a file with byte offsets -/// - Substring: Points to a range within a parent SourceInfo (offsets are relative to parent) -/// - Concat: Combines multiple SourceInfo pieces (preserves provenance when coalescing text) -/// - Generated: Produced by a pipeline transform. `by` records the producer; `from` -/// records source-side anchors (empty for pure synthesis, `Invocation` for -/// shortcode-style resolutions). -/// -/// The Transformed variant was removed because it's not used in production code. -/// Text transformations (smart quotes, em-dashes) use Original SourceInfo pointing -/// to the pre-transformation text, accepting that the byte offsets are approximate. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum SourceInfo { - /// Direct position in an original file - /// - /// Stores only byte offsets. Use `map_offset()` to get row/column information. - Original { - file_id: FileId, - start_offset: usize, - end_offset: usize, - }, - /// Substring extraction from a parent source - /// - /// Offsets are relative to the parent's text. - /// The chain of Substrings always resolves to an Original. - Substring { - parent: Arc, - start_offset: usize, - end_offset: usize, - }, - /// Concatenation of multiple sources - /// - /// Used when coalescing adjacent text nodes while preserving - /// the fact that they came from different source locations. - Concat { pieces: Vec }, - /// Node produced by a pipeline transform - /// - /// `by` records the producer ("which transform made me"); `from` is a - /// list of typed, role-labeled source-info pointers ("which source - /// bytes contributed to me"). Empty `from` means pure synthesis - /// (sectionize wrappers, filter constructions, title-block h1). - /// An `Invocation` anchor present means there is a source-side - /// preimage (every shortcode resolution). - Generated { - by: By, - #[serde(default, skip_serializing_if = "SmallVec::is_empty")] - from: SmallVec<[Anchor; 2]>, - }, -} - -/// Producer identity for a [`SourceInfo::Generated`] node. -/// -/// `kind` is a short, kebab-case identifier describing which transform -/// produced the node ("filter", "shortcode", "sectionize", ...). Third -/// parties should namespace as `ext//`. -/// -/// `data` is per-kind configuration that is **not** a source-info pointer. -/// Source-side anchors live in the parent `Generated.from` list, not here. -/// `Null` for kinds that don't carry per-instance data. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct By { - /// Short kind tag, kebab-case. Examples: "filter", "shortcode", - /// "sectionize", "user-edit", "title-block". - /// Third-party kinds should namespace: "ext/my-extension/foo". - pub kind: String, - - /// Per-kind configuration that is NOT a source-info pointer. - /// Anchors live in `Generated.from`, not here. - /// `Null` for kinds that don't carry per-instance data. - #[serde(default, skip_serializing_if = "serde_json::Value::is_null")] - pub data: serde_json::Value, -} - -/// Role describing what kind of source-side contribution an anchor records. -/// -/// The known roles are load-bearing — `Invocation` is what the writer's -/// preimage walk and attribution consult; `ValueSource` is diagnostic-only. -/// `Other(String)` is an open escape hatch for extension-defined roles. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum AnchorRole { - /// The user-written construct that triggered this node's creation - /// (e.g. the `{{< meta foo >}}` token in the active document). - /// Load-bearing: the writer's `preimage_in` and attribution's - /// `resolve_byte_range` consult the first anchor with this role. - /// At most one per node by convention. - Invocation, - - /// Where the VALUE this node carries was defined, when distinct - /// from the invocation site (e.g. `footer:` in `_metadata.yml` for - /// a `{{< meta footer >}}` resolution). Diagnostic-only — does not - /// affect the writer or attribution decisions in v1. - ValueSource, - - /// Extension-defined or future role we haven't enumerated. - /// String is kebab-case, namespaced (`ext//`). - /// - /// **`preimage_in` does not walk this role.** Future anchor roles - /// default to non-walked unless explicitly added to - /// [`SourceInfo::preimage_in`]'s `Generated` arm. Extensions adding - /// `Other("…")` should treat this as a feature: attribution data - /// attached via `Other` is not accidentally consulted by the writer's - /// byte-copying path. If a role *does* contribute to body-text - /// preimage in `target`, it must be explicitly enumerated in - /// `preimage_in`. - Other(String), -} - -/// A single typed, role-labeled source-info pointer attached to a -/// [`SourceInfo::Generated`] node. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct Anchor { - pub role: AnchorRole, - pub source_info: Arc, -} - -/// A piece of a concatenated source -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct SourcePiece { - /// Source information for this piece - pub source_info: SourceInfo, - /// Where this piece starts in the concatenated string - pub offset_in_concat: usize, - /// Length of this piece - pub length: usize, -} - -impl Default for SourceInfo { - fn default() -> Self { - SourceInfo::Original { - file_id: FileId(0), - start_offset: 0, - end_offset: 0, - } - } -} - -impl SourceInfo { - /// Deprecated: use `SourceInfo::for_test()` in tests or an explicit - /// `Generated{by: }` in production. See provenance-contract.md. - /// - /// This inherent method shadows `Default::default()` so that callers - /// writing `SourceInfo::default()` see a deprecation error under - /// `deny(deprecated)`. The trait impl is retained (and called by this - /// method) so that `unwrap_or_default()` and `#[derive(Default)]` still - /// compile; those are caught by separate grep tooling. - #[deprecated( - since = "0.1.0", - note = "Use SourceInfo::for_test() in tests, or the appropriate Generated{by: } in production. See provenance-contract.md." - )] - #[doc(hidden)] - // Intentionally shadows `Default::default` (see the doc comment above): this - // deprecated inherent method is the provenance-contract tripwire, kept so - // `unwrap_or_default()`/`#[derive(Default)]` still compile while flagging - // direct calls. The name must match the trait method, so the lint is moot. - #[allow(clippy::should_implement_trait)] - pub fn default() -> Self { - ::default() - } - - /// Create source info for a position in an original file (from offsets) - pub fn original(file_id: FileId, start_offset: usize, end_offset: usize) -> Self { - SourceInfo::Original { - file_id, - start_offset, - end_offset, - } - } - - /// Create source info for a position in an original file (from Range) - /// - /// This is a compatibility helper for code that still uses Range. - /// The row and column information in the Range is ignored; only offsets are stored. - pub fn from_range(file_id: FileId, range: Range) -> Self { - SourceInfo::Original { - file_id, - start_offset: range.start.offset, - end_offset: range.end.offset, - } - } - - /// Create source info for a substring extraction - pub fn substring(parent: SourceInfo, start: usize, end: usize) -> Self { - SourceInfo::Substring { - parent: Arc::new(parent), - start_offset: start, - end_offset: end, - } - } - - /// Create source info for concatenated sources - pub fn concat(pieces: Vec<(SourceInfo, usize)>) -> Self { - let source_pieces: Vec = pieces - .into_iter() - .map(|(source_info, length)| SourcePiece { - source_info, - offset_in_concat: 0, // Will be calculated based on cumulative lengths - length, - }) - .collect(); - - // Calculate cumulative offsets - let mut cumulative_offset = 0; - let pieces_with_offsets: Vec = source_pieces - .into_iter() - .map(|mut piece| { - piece.offset_in_concat = cumulative_offset; - cumulative_offset += piece.length; - piece - }) - .collect(); - - SourceInfo::Concat { - pieces: pieces_with_offsets, - } - } - - /// Create a [`SourceInfo::Generated`] with an empty anchor list. - /// - /// Use [`SourceInfo::append_anchor`] to add anchors after construction. - /// For Generated nodes that need to carry anchors at construction - /// time, build the variant directly: `SourceInfo::Generated { by, from }`. - pub fn generated(by: By) -> Self { - SourceInfo::Generated { - by, - from: SmallVec::new(), - } - } - - /// Convenience for tests: produce a non-atomic `Generated` source_info - /// with `By::test_scaffold()` and no anchors. Use this in test code - /// where a constructor requires a `SourceInfo` but there's no real - /// provenance to record. Replaces the historical - /// `SourceInfo::default()` pattern in tests. - pub fn for_test() -> Self { - SourceInfo::Generated { - by: By::test_scaffold(), - from: SmallVec::new(), - } - } - - /// If this is a [`SourceInfo::Generated`], return the first anchor whose - /// role is [`AnchorRole::Invocation`]. - /// - /// Returns `None` otherwise (including for non-`Generated` variants). - /// By convention there is at most one `Invocation` anchor per node. - pub fn invocation_anchor(&self) -> Option<&Arc> { - match self { - SourceInfo::Generated { from, .. } => from - .iter() - .find(|a| matches!(a.role, AnchorRole::Invocation)) - .map(|a| &a.source_info), - _ => None, - } - } - - /// If this is a [`SourceInfo::Generated`], return the first anchor whose - /// role is [`AnchorRole::ValueSource`]. - /// - /// Returns `None` otherwise. By convention there is at most one - /// `ValueSource` anchor per node. - pub fn value_source_anchor(&self) -> Option<&Arc> { - match self { - SourceInfo::Generated { from, .. } => from - .iter() - .find(|a| matches!(a.role, AnchorRole::ValueSource)) - .map(|a| &a.source_info), - _ => None, - } - } - - /// Iterate over every anchor in this [`SourceInfo::Generated`] whose role - /// equals `role`. - /// - /// Returns an empty iterator for non-`Generated` variants. Iteration order - /// is the append order. - pub fn anchors_with_role<'a>( - &'a self, - role: &'a AnchorRole, - ) -> Box> + 'a> { - match self { - SourceInfo::Generated { from, .. } => Box::new( - from.iter() - .filter(move |a| &a.role == role) - .map(|a| &a.source_info), - ), - _ => Box::new(std::iter::empty()), - } - } - - /// Append `(role, source_info)` to this [`SourceInfo::Generated`]'s - /// anchor list. - /// - /// Panics if `self` is not [`SourceInfo::Generated`]. By convention there - /// is at most one anchor per known role; appending a second anchor with - /// the same role does not replace the first — accessors that find by - /// role return the earliest match. - pub fn append_anchor(&mut self, role: AnchorRole, source_info: Arc) { - match self { - SourceInfo::Generated { from, .. } => { - from.push(Anchor { role, source_info }); - } - _ => panic!("append_anchor called on non-Generated SourceInfo"), - } - } - - /// Combine two SourceInfo objects representing adjacent text - /// - /// This creates a Concat mapping that preserves both sources. - /// The resulting SourceInfo spans from the start of self to the end of other. - pub fn combine(&self, other: &SourceInfo) -> Self { - let self_length = self.length(); - let other_length = other.length(); - - SourceInfo::concat(vec![ - (self.clone(), self_length), - (other.clone(), other_length), - ]) - } - - /// Get the length (in bytes) represented by this SourceInfo - pub fn length(&self) -> usize { - match self { - SourceInfo::Original { - start_offset, - end_offset, - .. - } => end_offset - start_offset, - SourceInfo::Substring { - start_offset, - end_offset, - .. - } => end_offset - start_offset, - SourceInfo::Concat { pieces } => pieces.iter().map(|p| p.length).sum(), - SourceInfo::Generated { .. } => 0, - } - } - - /// Get the start offset for this SourceInfo - /// - /// For Original and Substring, returns the start_offset field. - /// For Concat, returns 0 (the concat represents a new text starting at 0). - /// For Generated, returns 0. - pub fn start_offset(&self) -> usize { - match self { - SourceInfo::Original { start_offset, .. } => *start_offset, - SourceInfo::Substring { start_offset, .. } => *start_offset, - SourceInfo::Concat { .. } => 0, - SourceInfo::Generated { .. } => 0, - } - } - - /// Get the end offset for this SourceInfo - /// - /// For Original and Substring, returns the end_offset field. - /// For Concat, returns the total length. - /// For Generated, returns 0. - pub fn end_offset(&self) -> usize { - match self { - SourceInfo::Original { end_offset, .. } => *end_offset, - SourceInfo::Substring { end_offset, .. } => *end_offset, - SourceInfo::Concat { .. } => self.length(), - SourceInfo::Generated { .. } => 0, - } - } - - /// Chain-resolve to `(file_id, start_offset, end_offset)` in the - /// root source file. - /// - /// Returns `None` for `Concat` — Concat doesn't map cleanly to a - /// single contiguous byte range. For `Generated`, delegates to the - /// first `Invocation` anchor and recurses (`None` when no - /// `Invocation` anchor is present). The attribution v1 sidecar - /// relies on this contract; project-scoped (v2) features that need - /// the full chain resolver should use `map_offset` against a - /// `SourceContext` instead. - pub fn resolve_byte_range(&self) -> Option<(usize, usize, usize)> { - match self { - SourceInfo::Original { - file_id, - start_offset, - end_offset, - } => Some((file_id.0, *start_offset, *end_offset)), - SourceInfo::Substring { - parent, - start_offset, - end_offset, - } => { - let (fid, parent_start, _) = parent.resolve_byte_range()?; - Some((fid, parent_start + start_offset, parent_start + end_offset)) - } - SourceInfo::Concat { .. } => None, - SourceInfo::Generated { .. } => self - .invocation_anchor() - .and_then(|si| si.resolve_byte_range()), - } - } - - /// Byte range in `target` that this `SourceInfo`'s preimage covers, if any. - /// - /// This is the writer's "can I Verbatim-copy bytes from `target` for the - /// node carrying this source_info?" check. - /// - /// Semantics by variant: - /// - `Original` → `Some(start..end)` iff the file matches `target`, else `None`. - /// - `Substring` → recurse the parent; offsets compose additively. - /// - `Concat` → every piece must resolve into `target` AND the resolved - /// ranges must be byte-contiguous (no gaps, no overlaps). A gappy Concat - /// returns `None` — the writer can't Verbatim-copy a non-contiguous span. - /// - `Generated` → walk the `Invocation` anchor only via - /// [`invocation_anchor`](Self::invocation_anchor). **No other anchor - /// role is consulted** — not `ValueSource` (Plan 9), not future - /// `Dispatch` (Plan 10), not `AnchorRole::Other`. See the - /// role-asymmetry section below. - /// - /// # Role asymmetry - /// - /// `preimage_in` only walks `AnchorRole::Invocation`. This is load-bearing: - /// copying bytes from a `ValueSource` source range would emit raw YAML - /// metadata (or whatever the value lived in) into the body — a hard - /// correctness bug. The same applies to `Dispatch` (which points at Lua - /// source) and to any extension-defined `Other` role. - /// - /// **Future anchor roles default to non-walked.** Extensions introducing - /// `AnchorRole::Other("…")` should treat this as a feature: their - /// attribution metadata is not accidentally consulted by the writer's - /// byte-copying path. If a role *does* contribute to body-text preimage, - /// it must be explicitly added to this function's `Generated` arm. - pub fn preimage_in(&self, target: FileId) -> Option> { - match self { - SourceInfo::Original { - file_id, - start_offset, - end_offset, - } if *file_id == target => Some(*start_offset..*end_offset), - SourceInfo::Original { .. } => None, - SourceInfo::Substring { - parent, - start_offset, - end_offset, - } => { - let parent_range = parent.preimage_in(target)?; - Some(parent_range.start + start_offset..parent_range.start + end_offset) - } - SourceInfo::Concat { pieces } => { - let ranges: Vec> = pieces - .iter() - .map(|p| p.source_info.preimage_in(target)) - .collect::>>()?; - if ranges.is_empty() { - return None; - } - if ranges.windows(2).all(|w| w[0].end == w[1].start) { - let first = ranges.first().unwrap().start; - let last = ranges.last().unwrap().end; - Some(first..last) - } else { - None - } - } - SourceInfo::Generated { .. } => self - .invocation_anchor() - .and_then(|si| si.preimage_in(target)), - } - } - - /// Remap every `FileId` referenced by this `SourceInfo` (including those - /// inside `Substring` parents and `Concat` pieces) using the provided - /// mapping function. - /// - /// Used when merging ASTs that were parsed against different files into a - /// single `ASTContext` with a shared filename table — callers shift each - /// AST's `FileId`s to their slot in the merged table before combining. - pub fn remap_file_ids(&mut self, map: &F) - where - F: Fn(FileId) -> FileId, - { - match self { - SourceInfo::Original { file_id, .. } => { - *file_id = map(*file_id); - } - SourceInfo::Substring { parent, .. } => { - // Arc::make_mut clones if there are other references. - let parent = Arc::make_mut(parent); - parent.remap_file_ids(map); - } - SourceInfo::Concat { pieces } => { - for piece in pieces { - piece.source_info.remap_file_ids(map); - } - } - SourceInfo::Generated { from, .. } => { - for anchor in from { - // Arc::make_mut clones if there are other references. - let inner = Arc::make_mut(&mut anchor.source_info); - inner.remap_file_ids(map); - } - } - } - } - - /// First `FileId` reachable from this `SourceInfo`'s root. - /// - /// - `Original` → `Some(file_id)`. - /// - `Substring` → recurse parent. - /// - `Concat` → `pieces.iter().find_map(|p| p.source_info.root_file_id())` - /// (`find_map` semantics — skips Generated holes and empty pieces). - /// - `Generated` → `invocation_anchor().and_then(|si| si.root_file_id())`; - /// `None` when no `Invocation` anchor is present. - pub fn root_file_id(&self) -> Option { - match self { - SourceInfo::Original { file_id, .. } => Some(*file_id), - SourceInfo::Substring { parent, .. } => parent.root_file_id(), - SourceInfo::Concat { pieces } => { - pieces.iter().find_map(|p| p.source_info.root_file_id()) - } - SourceInfo::Generated { .. } => { - self.invocation_anchor().and_then(|si| si.root_file_id()) - } - } - } - - /// Insert every `FileId` reachable from this `SourceInfo` into `out`. - /// - /// Walks every `Original`, every `Substring` parent, every `Concat` - /// piece, and every `Generated` anchor (all roles — `Invocation`, - /// `ValueSource`, `Other`). - pub fn collect_file_ids(&self, out: &mut std::collections::HashSet) { - match self { - SourceInfo::Original { file_id, .. } => { - out.insert(*file_id); - } - SourceInfo::Substring { parent, .. } => parent.collect_file_ids(out), - SourceInfo::Concat { pieces } => { - for piece in pieces { - piece.source_info.collect_file_ids(out); - } - } - SourceInfo::Generated { from, .. } => { - for anchor in from { - anchor.source_info.collect_file_ids(out); - } - } - } - } -} - -impl By { - /// Producer kind for a node constructed by a Lua filter - /// (e.g. `pandoc.Str("decoration")` inside a filter callback). - /// - /// `filter_path` is the path the Lua engine reported via - /// `debug.getinfo(...).source` (with the leading "@" stripped); - /// `line` is the line number inside that file where the constructor - /// ran. Until Lua-file-registration lands (bd-36fr9), `(filter_path, - /// line)` lives in `by.data`; afterwards it migrates to a `Dispatch` - /// anchor and `by.data` shrinks to `{}`. - pub fn filter(filter_path: impl Into, line: usize) -> Self { - Self { - kind: "filter".to_string(), - data: serde_json::json!({ - "filter_path": filter_path.into(), - "line": line, - }), - } - } - - /// Producer kind for the `SectionizeTransform`'s synthesized section - /// Divs. Children remain editable; the wrapper itself is structural. - pub fn sectionize() -> Self { - Self { - kind: "sectionize".to_string(), - data: serde_json::Value::Null, - } - } - - /// Producer kind for React-constructed (user-typed) content reaching - /// the AST through the q2-preview client. - pub fn user_edit() -> Self { - Self { - kind: "user-edit".to_string(), - data: serde_json::Value::Null, - } - } - - /// Producer kind for shortcode resolutions. - /// - /// **Invariant.** Every `Generated { by: shortcode(...), .. }` must - /// carry at least one `Invocation` anchor in `from` pointing at the - /// source token's byte range. Use only inside a `Generated` whose - /// anchor list is populated; constructing the bare shape with empty - /// `from` is rejected by Plan 6's audit-completion test and trips - /// Plan 7's writer `debug_assert!`. - pub fn shortcode(name: impl Into) -> Self { - Self { - kind: "shortcode".to_string(), - data: serde_json::json!({ "name": name.into() }), - } - } - - /// Producer kind for `IncludeStage`'s expansion wrapper. Note that - /// most include-related synthesized content keeps its `Original` - /// `source_info` (inherited from the include-line Paragraph) — this - /// kind is only used where a `Generated` is explicitly required. - pub fn include() -> Self { - Self { - kind: "include".to_string(), - data: serde_json::Value::Null, - } - } - - /// Producer kind for the title-block stage's synthesized title `h1`. - pub fn title_block() -> Self { - Self { - kind: "title-block".to_string(), - data: serde_json::Value::Null, - } - } - - /// Producer kind for the footnotes stage's container Div. - pub fn footnotes() -> Self { - Self { - kind: "footnotes".to_string(), - data: serde_json::Value::Null, - } - } - - /// Producer kind for `RevealSlidesTransform`'s synthesized slide - /// structure — title-slide Div, section wrappers, speaker-notes Div, - /// and any other chrome built from the slide-level heading tree. - /// Non-atomic: the slide container is structural chrome; the content - /// inside (headings, paragraphs) retains its own source_info. - pub fn revealjs() -> Self { - Self { - kind: "revealjs".to_string(), - data: serde_json::Value::Null, - } - } - - /// Producer kind for the appendix-structure stage's wrapper Div. - pub fn appendix() -> Self { - Self { - kind: "appendix".to_string(), - data: serde_json::Value::Null, - } - } - - /// Producer kind for parser-side synthetic Spaces inserted by the - /// tree-sitter post-processing pass. - pub fn tree_sitter_postprocess() -> Self { - Self { - kind: "tree-sitter-postprocess".to_string(), - data: serde_json::Value::Null, - } - } - - /// "We don't know" placeholder used by `json::read_completing_source_info` - /// when a node arrives without an `s:` field from outside the q2 - /// source-tracking world (qmd-syntax-helper Pandoc subprocess, CLI - /// `--from json`, external filter binaries, Lua AST handoff). - /// - /// Non-atomic by design — nodes carrying `By::unknown()` remain - /// editable in the preview; user edits re-stamp them as `user_edit` - /// on save. See Plan 7f Phase 4's per-caller table for placement - /// guidance. - pub fn unknown() -> Self { - Self { - kind: "unknown".to_string(), - data: serde_json::Value::Null, - } - } - - /// Producer kind for test scaffolding. Non-atomic; appears only in - /// test code where `source_info` is required by a constructor but - /// has no real provenance to record. Paired with - /// [`SourceInfo::for_test`]. - pub fn test_scaffold() -> Self { - Self { - kind: "test-scaffold".to_string(), - data: serde_json::Value::Null, - } - } - - /// Producer kind for citeproc-rendered content (citation Str - /// replacements, bibliography `Div`s, `#refs` wrappers). The bytes - /// come from CSL processing of bibliographic metadata, not from - /// user-written source. - /// - /// Atomic — citeproc output is generated content the user can't - /// edit through the preview; changes go through the CSL pipeline, - /// not through inline editing. - pub fn citeproc() -> Self { - Self { - kind: "citeproc".to_string(), - data: serde_json::Value::Null, - } - } - - /// Producer kind for content synthesized from execution-engine - /// output (Jupyter cell stdout / stderr, rich-display MIME bundles, - /// kernel error tracebacks). The bytes come from kernel execution, - /// not from user-written source. - /// - /// Atomic — execution outputs are regenerated on every re-run; - /// editing them through the preview would be a UX bug. - pub fn jupyter_output() -> Self { - Self { - kind: "jupyter-output".to_string(), - data: serde_json::Value::Null, - } - } - - /// Producer kind for callout-decoration synthesis: - /// default-title injection (`Note`, `Warning`, etc. when the user - /// omits a title and `appearance="default"`) and the - /// screen-reader-only type announcement span. - /// - /// Non-atomic — the wrapper Div is structural, and its children - /// (the user's actual callout body) remain editable through the - /// preview. The synthesized title text itself has no preimage but - /// regenerates from the callout type when the user changes it, - /// so atomicity at the wrapper level would be incorrect. - pub fn callout() -> Self { - Self { - kind: "callout".to_string(), - data: serde_json::Value::Null, - } - } - - /// Empty-Map sentinel `ConfigValue` used during metadata merging - /// when no value is present. Non-atomic. The bytes don't exist — - /// the node is structural. See [`By::is_programmatic_sentinel`]. - pub fn config_default() -> Self { - Self { - kind: "config-default".to_string(), - data: serde_json::Value::Null, - } - } - - /// Programmatic construction of `ConfigValue` (e.g. - /// `ConfigValue::from_path`, intermediate maps created during - /// `insert_path`). No source bytes exist for these nodes. - /// See [`By::is_programmatic_sentinel`]. - pub fn programmatic_config() -> Self { - Self { - kind: "programmatic-config".to_string(), - data: serde_json::Value::Null, - } - } - - /// True for kinds whose source bytes don't exist — `config-default`, - /// `programmatic-config`, `unknown`. Used by code that needs to - /// distinguish "no real source" sentinels from a genuine - /// `Original{FileId(0), …}` pointing at a real document. - pub fn is_programmatic_sentinel(&self) -> bool { - matches!( - self.kind.as_str(), - "config-default" | "programmatic-config" | "unknown" - ) - } - - /// Escape-hatch constructor for any `kind` string — including built-in - /// names and extension-defined kinds (`ext//`). - /// - /// Forgery (an extension calling `By::raw("shortcode", …)` without the - /// required `Invocation` anchor) is caught downstream by Plan 6's - /// audit-completion test and Plan 7's `debug_assert!`. The convention - /// for third-party kinds is `ext//`. - pub fn raw(kind: impl Into, data: serde_json::Value) -> Self { - Self { - kind: kind.into(), - data, - } - } - - /// True if a `Generated { by: , .. }` node should be treated - /// as atomic by the incremental writer. - /// - /// Atomic nodes are produced by the pipeline and represent content - /// the user shouldn't edit through React (filter constructions, - /// shortcode resolutions, synthesized title h1, tree-sitter-inserted - /// spaces). Atomicity is determined by `kind` alone — orthogonal to - /// anchor-presence. - /// - /// Extensions that contribute new `by.kind` values are not atomic by - /// default in v1. - pub fn is_atomic_kind(&self) -> bool { - matches!( - self.kind.as_str(), - "filter" - | "shortcode" - | "title-block" - | "tree-sitter-postprocess" - | "citeproc" - | "jupyter-output" - ) - } - - /// True if this `By`'s `kind` equals `kind`. - pub fn is_kind(&self, kind: &str) -> bool { - self.kind == kind - } - - /// If `self.kind == "filter"`, return `(filter_path, line)`. - /// - /// Returns `None` for any other kind, or when the data payload is - /// malformed (missing or non-string `filter_path`, missing or - /// non-integer `line`). - pub fn as_filter(&self) -> Option<(&str, usize)> { - if self.kind != "filter" { - return None; - } - let path = self.data.get("filter_path")?.as_str()?; - let line = self.data.get("line")?.as_u64()? as usize; - Some((path, line)) - } -} - -impl Anchor { - /// Construct an [`AnchorRole::Invocation`] anchor. - pub fn invocation(source_info: Arc) -> Self { - Self { - role: AnchorRole::Invocation, - source_info, - } - } - - /// Construct an [`AnchorRole::ValueSource`] anchor. - pub fn value_source(source_info: Arc) -> Self { - Self { - role: AnchorRole::ValueSource, - source_info, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::types::{FileId, Location, Range}; - - #[test] - fn test_original_source_info() { - let file_id = FileId(0); - let range = Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 10, - row: 0, - column: 10, - }, - }; - - let info = SourceInfo::from_range(file_id, range.clone()); - - assert_eq!(info.start_offset(), 0); - assert_eq!(info.end_offset(), 10); - assert_eq!(info.length(), 10); - match info { - SourceInfo::Original { - file_id: mapped_id, .. - } => { - assert_eq!(mapped_id, file_id); - } - _ => panic!("Expected Original mapping"), - } - } - - #[test] - fn test_remap_file_ids_original() { - let mut info = SourceInfo::original(FileId(0), 0, 10); - info.remap_file_ids(&|id| FileId(id.0 + 1)); - match info { - SourceInfo::Original { file_id, .. } => assert_eq!(file_id, FileId(1)), - _ => panic!("Expected Original"), - } - } - - #[test] - fn test_remap_file_ids_substring() { - let parent = SourceInfo::original(FileId(0), 0, 100); - let mut info = SourceInfo::substring(parent, 5, 20); - info.remap_file_ids(&|id| FileId(id.0 + 7)); - match info { - SourceInfo::Substring { parent, .. } => match &*parent { - SourceInfo::Original { file_id, .. } => assert_eq!(*file_id, FileId(7)), - _ => panic!("Expected Original parent"), - }, - _ => panic!("Expected Substring"), - } - } - - #[test] - fn test_remap_file_ids_concat() { - let a = SourceInfo::original(FileId(0), 0, 5); - let b = SourceInfo::original(FileId(3), 5, 10); - let mut info = SourceInfo::concat(vec![(a, 5), (b, 5)]); - info.remap_file_ids(&|id| FileId(id.0 + 10)); - match info { - SourceInfo::Concat { pieces } => { - match &pieces[0].source_info { - SourceInfo::Original { file_id, .. } => assert_eq!(*file_id, FileId(10)), - _ => panic!("Expected Original"), - } - match &pieces[1].source_info { - SourceInfo::Original { file_id, .. } => assert_eq!(*file_id, FileId(13)), - _ => panic!("Expected Original"), - } - } - _ => panic!("Expected Concat"), - } - } - - #[test] - fn test_remap_file_ids_generated_empty_from_is_noop() { - let mut info = SourceInfo::generated(By::filter("foo.lua", 42)); - info.remap_file_ids(&|_| FileId(99)); - match info { - SourceInfo::Generated { by, from } => { - assert!(from.is_empty()); - let (path, line) = by.as_filter().unwrap(); - assert_eq!(path, "foo.lua"); - assert_eq!(line, 42); - } - _ => panic!("Expected Generated"), - } - } - - // ------------------------------------------------------------------------- - // Plan 4 — By / Anchor / Generated coverage - // ------------------------------------------------------------------------- - - #[test] - fn test_by_filter_builder() { - let by = By::filter("a.lua", 7); - assert_eq!(by.kind, "filter"); - assert_eq!(by.as_filter(), Some(("a.lua", 7))); - } - - #[test] - fn test_by_sectionize_builder() { - let by = By::sectionize(); - assert_eq!(by.kind, "sectionize"); - assert!(by.data.is_null()); - } - - #[test] - fn test_by_user_edit_builder() { - assert_eq!(By::user_edit().kind, "user-edit"); - } - - #[test] - fn test_by_shortcode_builder_records_name() { - let by = By::shortcode("meta"); - assert_eq!(by.kind, "shortcode"); - assert_eq!(by.data.get("name").and_then(|v| v.as_str()), Some("meta")); - } - - #[test] - fn test_by_include_title_footnotes_appendix_tree_sitter_builders() { - assert_eq!(By::include().kind, "include"); - assert_eq!(By::title_block().kind, "title-block"); - assert_eq!(By::footnotes().kind, "footnotes"); - assert_eq!(By::appendix().kind, "appendix"); - assert_eq!( - By::tree_sitter_postprocess().kind, - "tree-sitter-postprocess" - ); - } - - #[test] - fn test_by_raw_builder_accepts_any_kind() { - let by = By::raw("ext/my-plugin/foo", serde_json::json!({"k": 1})); - assert_eq!(by.kind, "ext/my-plugin/foo"); - assert_eq!(by.data.get("k").and_then(|v| v.as_u64()), Some(1)); - } - - #[test] - fn test_by_is_atomic_kind() { - assert!(By::filter("x.lua", 1).is_atomic_kind()); - assert!(By::shortcode("meta").is_atomic_kind()); - assert!(By::title_block().is_atomic_kind()); - assert!(By::tree_sitter_postprocess().is_atomic_kind()); - assert!(By::citeproc().is_atomic_kind()); - assert!(By::jupyter_output().is_atomic_kind()); - - assert!(!By::callout().is_atomic_kind()); - - assert!(!By::sectionize().is_atomic_kind()); - assert!(!By::user_edit().is_atomic_kind()); - assert!(!By::include().is_atomic_kind()); - assert!(!By::footnotes().is_atomic_kind()); - assert!(!By::appendix().is_atomic_kind()); - assert!(!By::unknown().is_atomic_kind()); - assert!(!By::test_scaffold().is_atomic_kind()); - assert!(!By::config_default().is_atomic_kind()); - assert!(!By::programmatic_config().is_atomic_kind()); - assert!(!By::raw("ext/anywhere/foo", serde_json::Value::Null).is_atomic_kind()); - } - - #[test] - fn test_by_unknown_constructor() { - let by = By::unknown(); - assert_eq!(by.kind, "unknown"); - assert!(by.data.is_null()); - // Non-atomic — nodes carrying By::unknown() remain editable; the - // strict reader rejects missing `s:`, the completing reader stamps - // them with this kind only at the explicit call site. - assert!(!by.is_atomic_kind()); - } - - #[test] - fn test_by_test_scaffold_constructor() { - let by = By::test_scaffold(); - assert_eq!(by.kind, "test-scaffold"); - assert!(by.data.is_null()); - assert!(!by.is_atomic_kind()); - // Not a "no real source" sentinel — it's test scaffolding. - assert!(!by.is_programmatic_sentinel()); - } - - #[test] - fn test_by_config_default_constructor() { - let by = By::config_default(); - assert_eq!(by.kind, "config-default"); - assert!(by.data.is_null()); - assert!(!by.is_atomic_kind()); - } - - #[test] - fn test_by_programmatic_config_constructor() { - let by = By::programmatic_config(); - assert_eq!(by.kind, "programmatic-config"); - assert!(by.data.is_null()); - assert!(!by.is_atomic_kind()); - } - - #[test] - fn test_by_citeproc_constructor() { - let by = By::citeproc(); - assert_eq!(by.kind, "citeproc"); - assert!(by.data.is_null()); - // Atomic — citeproc output is non-editable in the preview. - assert!(by.is_atomic_kind()); - // Not a "no real source" sentinel; the bytes come from CSL output. - assert!(!by.is_programmatic_sentinel()); - } - - #[test] - fn test_by_jupyter_output_constructor() { - let by = By::jupyter_output(); - assert_eq!(by.kind, "jupyter-output"); - assert!(by.data.is_null()); - // Atomic — execution outputs regenerate on every re-run. - assert!(by.is_atomic_kind()); - assert!(!by.is_programmatic_sentinel()); - } - - #[test] - fn test_by_callout_constructor() { - let by = By::callout(); - assert_eq!(by.kind, "callout"); - assert!(by.data.is_null()); - // Non-atomic — callout wrapper is structural; children stay editable. - assert!(!by.is_atomic_kind()); - assert!(!by.is_programmatic_sentinel()); - } - - #[test] - fn test_by_is_programmatic_sentinel() { - assert!(By::config_default().is_programmatic_sentinel()); - assert!(By::programmatic_config().is_programmatic_sentinel()); - assert!(By::unknown().is_programmatic_sentinel()); - - assert!(!By::user_edit().is_programmatic_sentinel()); - assert!(!By::filter("x.lua", 1).is_programmatic_sentinel()); - assert!(!By::shortcode("meta").is_programmatic_sentinel()); - assert!(!By::test_scaffold().is_programmatic_sentinel()); - assert!(!By::sectionize().is_programmatic_sentinel()); - } - - #[test] - fn test_source_info_for_test() { - let si = SourceInfo::for_test(); - match si { - SourceInfo::Generated { by, from } => { - assert_eq!(by.kind, "test-scaffold"); - assert!(from.is_empty()); - } - _ => panic!("for_test() must return Generated"), - } - } - - #[test] - fn test_by_is_kind() { - let by = By::shortcode("meta"); - assert!(by.is_kind("shortcode")); - assert!(!by.is_kind("filter")); - } - - #[test] - fn test_by_as_filter_rejects_non_filter() { - assert!(By::sectionize().as_filter().is_none()); - // Malformed filter (missing line) → None. - let by = By { - kind: "filter".to_string(), - data: serde_json::json!({ "filter_path": "x.lua" }), - }; - assert!(by.as_filter().is_none()); - } - - #[test] - fn test_anchor_invocation_value_source_constructors() { - let original = Arc::new(SourceInfo::original(FileId(1), 0, 5)); - let inv = Anchor::invocation(Arc::clone(&original)); - let vs = Anchor::value_source(Arc::clone(&original)); - assert!(matches!(inv.role, AnchorRole::Invocation)); - assert!(matches!(vs.role, AnchorRole::ValueSource)); - } - - #[test] - fn test_by_json_round_trip() { - let by = By::shortcode("meta"); - let json = serde_json::to_string(&by).unwrap(); - let back: By = serde_json::from_str(&json).unwrap(); - assert_eq!(by, back); - } - - #[test] - fn test_anchor_json_round_trip() { - let anchor = Anchor::invocation(Arc::new(SourceInfo::original(FileId(2), 10, 20))); - let json = serde_json::to_string(&anchor).unwrap(); - let back: Anchor = serde_json::from_str(&json).unwrap(); - assert_eq!(anchor, back); - } - - #[test] - fn test_generated_json_round_trip_empty_from() { - let info = SourceInfo::generated(By::sectionize()); - let json = serde_json::to_string(&info).unwrap(); - let back: SourceInfo = serde_json::from_str(&json).unwrap(); - assert_eq!(info, back); - } - - #[test] - fn test_generated_json_round_trip_with_invocation_anchor() { - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor( - AnchorRole::Invocation, - Arc::new(SourceInfo::original(FileId(5), 100, 110)), - ); - let json = serde_json::to_string(&info).unwrap(); - let back: SourceInfo = serde_json::from_str(&json).unwrap(); - assert_eq!(info, back); - } - - #[test] - fn test_generated_json_round_trip_multi_anchor() { - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor( - AnchorRole::Invocation, - Arc::new(SourceInfo::original(FileId(5), 100, 110)), - ); - info.append_anchor( - AnchorRole::ValueSource, - Arc::new(SourceInfo::original(FileId(7), 200, 220)), - ); - let json = serde_json::to_string(&info).unwrap(); - let back: SourceInfo = serde_json::from_str(&json).unwrap(); - assert_eq!(info, back); - } - - #[test] - fn test_generated_length_start_end_are_zero() { - let info = SourceInfo::generated(By::sectionize()); - assert_eq!(info.length(), 0); - assert_eq!(info.start_offset(), 0); - assert_eq!(info.end_offset(), 0); - } - - #[test] - fn test_generated_resolve_byte_range_recurses_through_substring() { - let parent = SourceInfo::original(FileId(42), 100, 200); - let sub = SourceInfo::substring(parent, 10, 20); - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor(AnchorRole::Invocation, Arc::new(sub)); - assert_eq!(info.resolve_byte_range(), Some((42, 110, 120))); - } - - #[test] - fn test_generated_resolve_byte_range_empty_returns_none() { - let info = SourceInfo::generated(By::sectionize()); - assert!(info.resolve_byte_range().is_none()); - } - - #[test] - fn test_generated_resolve_byte_range_value_source_only_returns_none() { - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor( - AnchorRole::ValueSource, - Arc::new(SourceInfo::original(FileId(5), 100, 110)), - ); - assert!(info.resolve_byte_range().is_none()); - } - - #[test] - fn test_generated_remap_file_ids_walks_anchors() { - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor( - AnchorRole::Invocation, - Arc::new(SourceInfo::original(FileId(0), 0, 5)), - ); - info.append_anchor( - AnchorRole::ValueSource, - Arc::new(SourceInfo::original(FileId(3), 10, 20)), - ); - info.remap_file_ids(&|id| FileId(id.0 + 10)); - match &info { - SourceInfo::Generated { from, .. } => { - assert_eq!(from.len(), 2); - match from[0].source_info.as_ref() { - SourceInfo::Original { file_id, .. } => assert_eq!(*file_id, FileId(10)), - _ => panic!("Expected Original anchor 0"), - } - match from[1].source_info.as_ref() { - SourceInfo::Original { file_id, .. } => assert_eq!(*file_id, FileId(13)), - _ => panic!("Expected Original anchor 1"), - } - } - _ => panic!("Expected Generated"), - } - } - - #[test] - fn test_root_file_id_per_variant() { - // Original - let original = SourceInfo::original(FileId(7), 0, 5); - assert_eq!(original.root_file_id(), Some(FileId(7))); - - // Substring → recurse parent - let sub = SourceInfo::substring(original.clone(), 0, 5); - assert_eq!(sub.root_file_id(), Some(FileId(7))); - - // Concat find_map skips Generated holes - let empty_gen = SourceInfo::generated(By::sectionize()); - let real = SourceInfo::original(FileId(42), 0, 5); - let concat = SourceInfo::concat(vec![(empty_gen, 0), (real, 5)]); - assert_eq!(concat.root_file_id(), Some(FileId(42))); - - // Generated with Invocation - let mut g = SourceInfo::generated(By::shortcode("meta")); - g.append_anchor( - AnchorRole::Invocation, - Arc::new(SourceInfo::original(FileId(9), 0, 1)), - ); - assert_eq!(g.root_file_id(), Some(FileId(9))); - - // Generated with no Invocation - let mut g2 = SourceInfo::generated(By::shortcode("meta")); - g2.append_anchor( - AnchorRole::ValueSource, - Arc::new(SourceInfo::original(FileId(9), 0, 1)), - ); - assert_eq!(g2.root_file_id(), None); - - // Generated empty - let g3 = SourceInfo::generated(By::sectionize()); - assert_eq!(g3.root_file_id(), None); - } - - #[test] - fn test_collect_file_ids_walks_every_anchor_role() { - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor( - AnchorRole::Invocation, - Arc::new(SourceInfo::original(FileId(1), 0, 1)), - ); - info.append_anchor( - AnchorRole::ValueSource, - Arc::new(SourceInfo::original(FileId(2), 0, 1)), - ); - info.append_anchor( - AnchorRole::Other("dispatch".to_string()), - Arc::new(SourceInfo::original(FileId(3), 0, 1)), - ); - let mut out = std::collections::HashSet::new(); - info.collect_file_ids(&mut out); - assert!(out.contains(&FileId(1))); - assert!(out.contains(&FileId(2))); - assert!(out.contains(&FileId(3))); - assert_eq!(out.len(), 3); - } - - #[test] - fn test_collect_file_ids_walks_concat_and_substring() { - let inner = SourceInfo::original(FileId(5), 0, 100); - let sub = SourceInfo::substring(inner, 10, 20); - let other = SourceInfo::original(FileId(11), 0, 5); - let concat = SourceInfo::concat(vec![(sub, 10), (other, 5)]); - let mut out = std::collections::HashSet::new(); - concat.collect_file_ids(&mut out); - assert!(out.contains(&FileId(5))); - assert!(out.contains(&FileId(11))); - assert_eq!(out.len(), 2); - } - - #[test] - fn test_invocation_anchor_accessor() { - let mut info = SourceInfo::generated(By::shortcode("meta")); - assert!(info.invocation_anchor().is_none()); - info.append_anchor( - AnchorRole::ValueSource, - Arc::new(SourceInfo::original(FileId(2), 0, 1)), - ); - assert!(info.invocation_anchor().is_none()); - info.append_anchor( - AnchorRole::Invocation, - Arc::new(SourceInfo::original(FileId(1), 0, 1)), - ); - assert!(info.invocation_anchor().is_some()); - // Non-Generated returns None. - assert!( - SourceInfo::original(FileId(0), 0, 0) - .invocation_anchor() - .is_none() - ); - } - - #[test] - fn test_value_source_anchor_accessor() { - let mut info = SourceInfo::generated(By::shortcode("meta")); - assert!(info.value_source_anchor().is_none()); - info.append_anchor( - AnchorRole::Invocation, - Arc::new(SourceInfo::original(FileId(1), 0, 1)), - ); - assert!(info.value_source_anchor().is_none()); - info.append_anchor( - AnchorRole::ValueSource, - Arc::new(SourceInfo::original(FileId(2), 0, 1)), - ); - assert!(info.value_source_anchor().is_some()); - } - - #[test] - fn test_anchors_with_role() { - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor( - AnchorRole::Invocation, - Arc::new(SourceInfo::original(FileId(1), 0, 1)), - ); - info.append_anchor( - AnchorRole::ValueSource, - Arc::new(SourceInfo::original(FileId(2), 0, 1)), - ); - info.append_anchor( - AnchorRole::Other("ext/foo".to_string()), - Arc::new(SourceInfo::original(FileId(3), 0, 1)), - ); - assert_eq!(info.anchors_with_role(&AnchorRole::Invocation).count(), 1); - assert_eq!(info.anchors_with_role(&AnchorRole::ValueSource).count(), 1); - assert_eq!( - info.anchors_with_role(&AnchorRole::Other("ext/foo".to_string())) - .count(), - 1 - ); - assert_eq!( - info.anchors_with_role(&AnchorRole::Other("missing".to_string())) - .count(), - 0 - ); - } - - #[test] - fn test_append_anchor_preserves_order() { - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor( - AnchorRole::Invocation, - Arc::new(SourceInfo::original(FileId(1), 0, 1)), - ); - info.append_anchor( - AnchorRole::ValueSource, - Arc::new(SourceInfo::original(FileId(2), 0, 1)), - ); - match info { - SourceInfo::Generated { from, .. } => { - assert_eq!(from.len(), 2); - assert!(matches!(from[0].role, AnchorRole::Invocation)); - assert!(matches!(from[1].role, AnchorRole::ValueSource)); - } - _ => panic!("Expected Generated"), - } - } - - #[test] - fn test_combine_with_generated_is_zero_length_piece() { - let original = SourceInfo::original(FileId(0), 10, 20); - let generated = SourceInfo::generated(By::sectionize()); - let combined = original.combine(&generated); - match &combined { - SourceInfo::Concat { pieces } => { - assert_eq!(pieces.len(), 2); - assert_eq!(pieces[1].length, 0); - } - _ => panic!("Expected Concat"), - } - // Length of the combined value equals only the Original side. - assert_eq!(combined.length(), 10); - } - - #[test] - fn test_source_info_serialization() { - let file_id = FileId(0); - let range = Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 10, - row: 0, - column: 10, - }, - }; - - let info = SourceInfo::from_range(file_id, range); - let json = serde_json::to_string(&info).unwrap(); - let deserialized: SourceInfo = serde_json::from_str(&json).unwrap(); - - assert_eq!(info, deserialized); - } - - #[test] - fn test_substring_source_info() { - let file_id = FileId(0); - let parent_range = Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 100, - row: 0, - column: 100, - }, - }; - let parent = SourceInfo::from_range(file_id, parent_range); - - let substring = SourceInfo::substring(parent, 10, 20); - - assert_eq!(substring.start_offset(), 10); - assert_eq!(substring.end_offset(), 20); - assert_eq!(substring.length(), 10); - - match substring { - SourceInfo::Substring { - start_offset, - end_offset, - .. - } => { - assert_eq!(start_offset, 10); - assert_eq!(end_offset, 20); - } - _ => panic!("Expected Substring mapping"), - } - } - - #[test] - fn test_concat_source_info() { - let file_id1 = FileId(0); - let file_id2 = FileId(1); - - let info1 = SourceInfo::from_range( - file_id1, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 10, - row: 0, - column: 10, - }, - }, - ); - - let info2 = SourceInfo::from_range( - file_id2, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 15, - row: 0, - column: 15, - }, - }, - ); - - let concat = SourceInfo::concat(vec![(info1, 10), (info2, 15)]); - - assert_eq!(concat.start_offset(), 0); - assert_eq!(concat.end_offset(), 25); // 10 + 15 - assert_eq!(concat.length(), 25); - - match concat { - SourceInfo::Concat { pieces } => { - assert_eq!(pieces.len(), 2); - assert_eq!(pieces[0].offset_in_concat, 0); - assert_eq!(pieces[0].length, 10); - assert_eq!(pieces[1].offset_in_concat, 10); - assert_eq!(pieces[1].length, 15); - } - _ => panic!("Expected Concat mapping"), - } - } - - #[test] - fn test_combine_two_sources() { - let file_id = FileId(0); - - // Create two separate source info objects - let info1 = SourceInfo::from_range( - file_id, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 10, - row: 0, - column: 10, - }, - }, - ); - - let info2 = SourceInfo::from_range( - file_id, - Range { - start: Location { - offset: 15, - row: 0, - column: 15, - }, - end: Location { - offset: 25, - row: 0, - column: 25, - }, - }, - ); - - // Combine them - let combined = info1.combine(&info2); - - // Should create a Concat with total length = 10 + 10 = 20 - assert_eq!(combined.start_offset(), 0); - assert_eq!(combined.end_offset(), 20); - assert_eq!(combined.length(), 20); - - match combined { - SourceInfo::Concat { pieces } => { - assert_eq!(pieces.len(), 2); - assert_eq!(pieces[0].length, 10); - assert_eq!(pieces[0].offset_in_concat, 0); - assert_eq!(pieces[1].length, 10); - assert_eq!(pieces[1].offset_in_concat, 10); - } - _ => panic!("Expected Concat mapping"), - } - } - - #[test] - fn test_combine_preserves_source_tracking() { - // Combine sources from different files - let file_id1 = FileId(5); - let file_id2 = FileId(10); - - let info1 = SourceInfo::from_range( - file_id1, - Range { - start: Location { - offset: 100, - row: 5, - column: 0, - }, - end: Location { - offset: 105, - row: 5, - column: 5, - }, - }, - ); - - let info2 = SourceInfo::from_range( - file_id2, - Range { - start: Location { - offset: 200, - row: 10, - column: 0, - }, - end: Location { - offset: 207, - row: 10, - column: 7, - }, - }, - ); - - let combined = info1.combine(&info2); - - // Verify both sources are preserved in the Concat - match combined { - SourceInfo::Concat { pieces } => { - assert_eq!(pieces.len(), 2); - - // First piece should come from file_id1 - match &pieces[0].source_info { - SourceInfo::Original { file_id, .. } => assert_eq!(*file_id, file_id1), - _ => panic!("Expected Original mapping for first piece"), - } - - // Second piece should come from file_id2 - match &pieces[1].source_info { - SourceInfo::Original { file_id, .. } => assert_eq!(*file_id, file_id2), - _ => panic!("Expected Original mapping for second piece"), - } - } - _ => panic!("Expected Concat mapping"), - } - } - - /// Test JSON serialization of Original mapping - #[test] - fn test_json_serialization_original() { - let file_id = FileId(0); - let range = Range { - start: Location { - offset: 10, - row: 1, - column: 5, - }, - end: Location { - offset: 50, - row: 3, - column: 10, - }, - }; - - let info = SourceInfo::from_range(file_id, range); - let json = serde_json::to_value(&info).unwrap(); - - // Verify JSON structure - assert_eq!(json["Original"]["file_id"], 0); - assert_eq!(json["Original"]["start_offset"], 10); - assert_eq!(json["Original"]["end_offset"], 50); - - // Verify round-trip - let deserialized: SourceInfo = serde_json::from_value(json).unwrap(); - assert_eq!(info, deserialized); - } - - /// Test JSON serialization of Substring mapping - #[test] - fn test_json_serialization_substring() { - let file_id = FileId(0); - let parent_range = Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 100, - row: 5, - column: 20, - }, - }; - let parent = SourceInfo::from_range(file_id, parent_range); - - let substring = SourceInfo::substring(parent, 10, 30); - let json = serde_json::to_value(&substring).unwrap(); - - // Verify JSON structure - assert_eq!(json["Substring"]["start_offset"], 10); - assert_eq!(json["Substring"]["end_offset"], 30); - - // Verify parent is serialized (with Rc, it's a full copy in JSON) - assert!(json["Substring"]["parent"].is_object()); - assert_eq!(json["Substring"]["parent"]["Original"]["file_id"], 0); - - // Verify round-trip - let deserialized: SourceInfo = serde_json::from_value(json).unwrap(); - assert_eq!(substring, deserialized); - } - - /// Test JSON serialization of nested Substring mappings (simulates .qmd frontmatter) - #[test] - fn test_json_serialization_nested_substring() { - let file_id = FileId(0); - - // Level 1: Original file - let file_range = Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 200, - row: 10, - column: 0, - }, - }; - let file_info = SourceInfo::from_range(file_id, file_range); - - // Level 2: YAML frontmatter (substring of file) - let yaml_info = SourceInfo::substring(file_info, 4, 150); - - // Level 3: YAML value (substring of frontmatter) - let value_info = SourceInfo::substring(yaml_info, 20, 35); - - let json = serde_json::to_value(&value_info).unwrap(); - - // Verify nested structure - assert_eq!(json["Substring"]["start_offset"], 20); - assert_eq!(json["Substring"]["end_offset"], 35); - assert_eq!(json["Substring"]["parent"]["Substring"]["start_offset"], 4); - assert_eq!( - json["Substring"]["parent"]["Substring"]["parent"]["Original"]["file_id"], - 0 - ); - - // Verify round-trip - let deserialized: SourceInfo = serde_json::from_value(json).unwrap(); - assert_eq!(value_info, deserialized); - } - - /// Test JSON serialization of Concat mapping - #[test] - fn test_json_serialization_concat() { - let file_id1 = FileId(0); - let file_id2 = FileId(1); - - let info1 = SourceInfo::from_range( - file_id1, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 10, - row: 0, - column: 10, - }, - }, - ); - - let info2 = SourceInfo::from_range( - file_id2, - Range { - start: Location { - offset: 20, - row: 2, - column: 0, - }, - end: Location { - offset: 30, - row: 2, - column: 10, - }, - }, - ); - - let combined = info1.combine(&info2); - let json = serde_json::to_value(&combined).unwrap(); - - // Verify JSON structure - assert!(json["Concat"]["pieces"].is_array()); - let pieces = json["Concat"]["pieces"].as_array().unwrap(); - assert_eq!(pieces.len(), 2); - - // First piece - assert_eq!(pieces[0]["offset_in_concat"], 0); - assert_eq!(pieces[0]["length"], 10); - assert_eq!(pieces[0]["source_info"]["Original"]["file_id"], 0); - - // Second piece - assert_eq!(pieces[1]["offset_in_concat"], 10); - assert_eq!(pieces[1]["length"], 10); - assert_eq!(pieces[1]["source_info"]["Original"]["file_id"], 1); - - // Verify round-trip - let deserialized: SourceInfo = serde_json::from_value(json).unwrap(); - assert_eq!(combined, deserialized); - } - - /// Test JSON serialization of complex nested structure (real-world example) - #[test] - fn test_json_serialization_complex_nested() { - let file_id = FileId(0); - - // Simulate a .qmd file structure - let qmd_file = SourceInfo::from_range( - file_id, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 500, - row: 20, - column: 0, - }, - }, - ); - - // YAML frontmatter is a substring - let yaml_frontmatter = SourceInfo::substring(qmd_file.clone(), 4, 200); - - // A YAML key is a substring of frontmatter - let yaml_key = SourceInfo::substring(yaml_frontmatter.clone(), 10, 20); - - // A YAML value is another substring of frontmatter - let yaml_value = SourceInfo::substring(yaml_frontmatter, 25, 50); - - // Combine key and value (simulating metadata entry) - let combined = yaml_key.combine(&yaml_value); - - let json = serde_json::to_value(&combined).unwrap(); - - // Verify this complex structure serializes - assert!(json.is_object()); - assert!(json["Concat"].is_object()); - - // Verify round-trip - let deserialized: SourceInfo = serde_json::from_value(json).unwrap(); - assert_eq!(combined, deserialized); - } - - // ------------------------------------------------------------------------- - // Plan 7 — preimage_in accessor - // ------------------------------------------------------------------------- - - #[test] - fn test_preimage_in_original_same_file() { - let info = SourceInfo::original(FileId(0), 10, 25); - assert_eq!(info.preimage_in(FileId(0)), Some(10..25)); - } - - #[test] - fn test_preimage_in_original_different_file_returns_none() { - let info = SourceInfo::original(FileId(0), 10, 25); - assert_eq!(info.preimage_in(FileId(1)), None); - } - - #[test] - fn test_preimage_in_substring_composes_offsets() { - // Parent points at bytes 100..200 in file 0. - // Substring takes bytes 5..15 *relative to parent*. - // Preimage in file 0 should be 105..115. - let parent = SourceInfo::original(FileId(0), 100, 200); - let info = SourceInfo::substring(parent, 5, 15); - assert_eq!(info.preimage_in(FileId(0)), Some(105..115)); - } - - #[test] - fn test_preimage_in_substring_different_file_returns_none() { - let parent = SourceInfo::original(FileId(0), 100, 200); - let info = SourceInfo::substring(parent, 5, 15); - assert_eq!(info.preimage_in(FileId(7)), None); - } - - #[test] - fn test_preimage_in_substring_chain() { - // Original 1000..2000 in file 0; Substring 100..500 relative; Substring 10..50 relative. - // Expected preimage in file 0: 1100 + 10 .. 1100 + 50 = 1110..1150. - let root = SourceInfo::original(FileId(0), 1000, 2000); - let mid = SourceInfo::substring(root, 100, 500); - let leaf = SourceInfo::substring(mid, 10, 50); - assert_eq!(leaf.preimage_in(FileId(0)), Some(1110..1150)); - } - - #[test] - fn test_preimage_in_concat_contiguous() { - // Two adjacent pieces of file 0: 10..15 and 15..25 → contiguous → 10..25. - let a = SourceInfo::original(FileId(0), 10, 15); - let b = SourceInfo::original(FileId(0), 15, 25); - let info = SourceInfo::concat(vec![(a, 5), (b, 10)]); - assert_eq!(info.preimage_in(FileId(0)), Some(10..25)); - } - - #[test] - fn test_preimage_in_concat_gappy_returns_none() { - // 10..15 then 20..25 → gap between 15 and 20 → None. - let a = SourceInfo::original(FileId(0), 10, 15); - let b = SourceInfo::original(FileId(0), 20, 25); - let info = SourceInfo::concat(vec![(a, 5), (b, 5)]); - assert_eq!(info.preimage_in(FileId(0)), None); - } - - #[test] - fn test_preimage_in_concat_overlapping_returns_none() { - // 10..20 then 15..25 → overlap → not byte-contiguous → None. - let a = SourceInfo::original(FileId(0), 10, 20); - let b = SourceInfo::original(FileId(0), 15, 25); - let info = SourceInfo::concat(vec![(a, 10), (b, 10)]); - assert_eq!(info.preimage_in(FileId(0)), None); - } - - #[test] - fn test_preimage_in_concat_mixed_files_returns_none() { - // One piece in file 0, another in file 1 → resolving in file 0 fails - // because the file-1 piece can't be resolved. - let a = SourceInfo::original(FileId(0), 10, 15); - let b = SourceInfo::original(FileId(1), 15, 25); - let info = SourceInfo::concat(vec![(a, 5), (b, 10)]); - assert_eq!(info.preimage_in(FileId(0)), None); - } - - #[test] - fn test_preimage_in_generated_no_anchors_returns_none() { - // Sectionize-style wrapper, footnotes-container, etc.: Generated with - // empty `from`. No Invocation anchor → no preimage. - let info = SourceInfo::generated(By::sectionize()); - assert_eq!(info.preimage_in(FileId(0)), None); - } - - #[test] - fn test_preimage_in_generated_with_invocation_in_target() { - // Shortcode resolution: Generated with an Invocation anchor pointing - // at the {{< meta foo >}} token bytes. - let token = SourceInfo::original(FileId(0), 50, 70); - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor(AnchorRole::Invocation, Arc::new(token)); - assert_eq!(info.preimage_in(FileId(0)), Some(50..70)); - } - - #[test] - fn test_preimage_in_generated_with_invocation_outside_target() { - // Invocation anchor points at file 0; query asks about file 1 → None. - let token = SourceInfo::original(FileId(0), 50, 70); - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor(AnchorRole::Invocation, Arc::new(token)); - assert_eq!(info.preimage_in(FileId(1)), None); - } - - #[test] - fn test_preimage_in_generated_walks_through_substring_in_invocation() { - // Invocation anchor is itself a Substring chain. preimage_in must - // walk through it correctly. - let root = SourceInfo::original(FileId(0), 100, 200); - let token = SourceInfo::substring(root, 10, 30); - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor(AnchorRole::Invocation, Arc::new(token)); - assert_eq!(info.preimage_in(FileId(0)), Some(110..130)); - } - - // ------------------------------------------------------------------------- - // Plan 7 — preimage_in role-asymmetry: only Invocation is walked. - // ------------------------------------------------------------------------- - - #[test] - fn test_preimage_in_generated_value_source_only_returns_none() { - // Plan 9-shape: Generated whose only anchor is ValueSource (points at - // YAML metadata bytes). The writer must NOT copy those bytes into the - // body — preimage_in returns None. - let meta_si = SourceInfo::original(FileId(0), 10, 25); - let mut info = SourceInfo::generated(By::appendix()); - info.append_anchor(AnchorRole::ValueSource, Arc::new(meta_si)); - assert_eq!(info.preimage_in(FileId(0)), None); - } - - #[test] - fn test_preimage_in_generated_other_only_returns_none() { - // Extension-defined Other role. preimage_in must not walk it. - let lua_si = SourceInfo::original(FileId(0), 10, 25); - let mut info = SourceInfo::generated(By::filter("upper.lua", 14)); - info.append_anchor( - AnchorRole::Other("ext/my-ext/dispatch".to_string()), - Arc::new(lua_si), - ); - assert_eq!(info.preimage_in(FileId(0)), None); - } - - #[test] - fn test_preimage_in_generated_invocation_plus_value_source_walks_invocation_only() { - // Plan 2/Plan 9 mixed shape: Invocation in file 0 + ValueSource in - // file 1. Query file 0 → Invocation resolves → Some(token range). - // Query file 1 → Invocation resolves to file 0 (not 1) → None. - // (The writer must not see the value-source range when asked about - // any file, even the file the ValueSource points into.) - let token = SourceInfo::original(FileId(0), 50, 70); - let value = SourceInfo::original(FileId(1), 200, 215); - let mut info = SourceInfo::generated(By::shortcode("meta")); - info.append_anchor(AnchorRole::Invocation, Arc::new(token)); - info.append_anchor(AnchorRole::ValueSource, Arc::new(value)); - - assert_eq!(info.preimage_in(FileId(0)), Some(50..70)); - assert_eq!(info.preimage_in(FileId(1)), None); - } -} diff --git a/crates/quarto-source-map/src/types.rs b/crates/quarto-source-map/src/types.rs deleted file mode 100644 index 12bd564d7..000000000 --- a/crates/quarto-source-map/src/types.rs +++ /dev/null @@ -1,169 +0,0 @@ -//! Core types for source mapping - -use serde::{Deserialize, Serialize}; - -/// A unique identifier for a source file -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub struct FileId(pub usize); - -/// A location in source text (0-indexed) -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] -pub struct Location { - /// Byte offset from start of source - pub offset: usize, - /// Row number (0-indexed) - pub row: usize, - /// Column number (0-indexed, in characters not bytes) - pub column: usize, -} - -/// A range in source text from start to end -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct Range { - /// Start location (inclusive) - pub start: Location, - /// End location (exclusive) - pub end: Location, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_file_id_equality() { - let id1 = FileId(0); - let id2 = FileId(0); - let id3 = FileId(1); - - assert_eq!(id1, id2); - assert_ne!(id1, id3); - } - - #[test] - fn test_location_ordering() { - let loc1 = Location { - offset: 0, - row: 0, - column: 0, - }; - let loc2 = Location { - offset: 5, - row: 0, - column: 5, - }; - let loc3 = Location { - offset: 10, - row: 1, - column: 0, - }; - - assert!(loc1 < loc2); - assert!(loc2 < loc3); - assert!(loc1 < loc3); - } - - #[test] - fn test_location_equality() { - let loc1 = Location { - offset: 5, - row: 0, - column: 5, - }; - let loc2 = Location { - offset: 5, - row: 0, - column: 5, - }; - let loc3 = Location { - offset: 6, - row: 0, - column: 6, - }; - - assert_eq!(loc1, loc2); - assert_ne!(loc1, loc3); - } - - #[test] - fn test_range_equality() { - let range1 = Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 5, - row: 0, - column: 5, - }, - }; - let range2 = Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 5, - row: 0, - column: 5, - }, - }; - let range3 = Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 10, - row: 0, - column: 10, - }, - }; - - assert_eq!(range1, range2); - assert_ne!(range1, range3); - } - - #[test] - fn test_serialization_file_id() { - let id = FileId(42); - let json = serde_json::to_string(&id).unwrap(); - let deserialized: FileId = serde_json::from_str(&json).unwrap(); - assert_eq!(id, deserialized); - } - - #[test] - fn test_serialization_location() { - let loc = Location { - offset: 100, - row: 5, - column: 10, - }; - let json = serde_json::to_string(&loc).unwrap(); - let deserialized: Location = serde_json::from_str(&json).unwrap(); - assert_eq!(loc, deserialized); - } - - #[test] - fn test_serialization_range() { - let range = Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 50, - row: 2, - column: 10, - }, - }; - let json = serde_json::to_string(&range).unwrap(); - let deserialized: Range = serde_json::from_str(&json).unwrap(); - assert_eq!(range, deserialized); - } -} diff --git a/crates/quarto-source-map/src/utils.rs b/crates/quarto-source-map/src/utils.rs deleted file mode 100644 index 895058bce..000000000 --- a/crates/quarto-source-map/src/utils.rs +++ /dev/null @@ -1,211 +0,0 @@ -//! Utility functions for working with source positions - -use crate::types::{Location, Range}; - -/// Convert a byte offset to a Location with line and column info -/// -/// Returns None if the offset is out of bounds. -pub fn offset_to_location(source: &str, offset: usize) -> Option { - if offset > source.len() { - return None; - } - - let mut row = 0; - let mut column = 0; - let mut current_offset = 0; - - for ch in source.chars() { - if current_offset >= offset { - break; - } - - if ch == '\n' { - row += 1; - column = 0; - } else { - column += 1; - } - - current_offset += ch.len_utf8(); - } - - Some(Location { - offset, - row, - column, - }) -} - -/// Convert line and column numbers to a byte offset -/// -/// Line and column are 0-indexed. Returns None if out of bounds. -pub fn line_col_to_offset(source: &str, line: usize, col: usize) -> Option { - let mut current_line = 0; - let mut current_col = 0; - let mut offset = 0; - - for ch in source.chars() { - if current_line == line && current_col == col { - return Some(offset); - } - - if ch == '\n' { - current_line += 1; - current_col = 0; - } else { - current_col += 1; - } - - offset += ch.len_utf8(); - } - - // Check if we're at the end position - if current_line == line && current_col == col { - return Some(offset); - } - - None -} - -/// Create a Range from start and end byte offsets -/// -/// This is a helper that creates a Range with Location structs -/// that only have offsets filled in (row and column are 0). -/// Use `offset_to_location` to get full Location info. -pub fn range_from_offsets(start: usize, end: usize) -> Range { - Range { - start: Location { - offset: start, - row: 0, - column: 0, - }, - end: Location { - offset: end, - row: 0, - column: 0, - }, - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_offset_to_location_simple() { - let source = "hello\nworld"; - - // Beginning - let loc = offset_to_location(source, 0).unwrap(); - assert_eq!(loc.offset, 0); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 0); - - // Middle of first line - let loc = offset_to_location(source, 3).unwrap(); - assert_eq!(loc.offset, 3); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 3); - - // After newline (beginning of second line) - let loc = offset_to_location(source, 6).unwrap(); - assert_eq!(loc.offset, 6); - assert_eq!(loc.row, 1); - assert_eq!(loc.column, 0); - - // Middle of second line - let loc = offset_to_location(source, 9).unwrap(); - assert_eq!(loc.offset, 9); - assert_eq!(loc.row, 1); - assert_eq!(loc.column, 3); - } - - #[test] - fn test_offset_to_location_out_of_bounds() { - let source = "hello"; - assert!(offset_to_location(source, 100).is_none()); - } - - #[test] - fn test_offset_to_location_end() { - let source = "hello"; - let loc = offset_to_location(source, 5).unwrap(); - assert_eq!(loc.offset, 5); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 5); - } - - #[test] - fn test_line_col_to_offset_simple() { - let source = "hello\nworld"; - - // Beginning - let offset = line_col_to_offset(source, 0, 0).unwrap(); - assert_eq!(offset, 0); - - // Middle of first line - let offset = line_col_to_offset(source, 0, 3).unwrap(); - assert_eq!(offset, 3); - - // Beginning of second line - let offset = line_col_to_offset(source, 1, 0).unwrap(); - assert_eq!(offset, 6); - - // Middle of second line - let offset = line_col_to_offset(source, 1, 3).unwrap(); - assert_eq!(offset, 9); - } - - #[test] - fn test_line_col_to_offset_out_of_bounds() { - let source = "hello\nworld"; - assert!(line_col_to_offset(source, 10, 0).is_none()); - assert!(line_col_to_offset(source, 0, 100).is_none()); - } - - #[test] - fn test_line_col_to_offset_end() { - let source = "hello"; - let offset = line_col_to_offset(source, 0, 5).unwrap(); - assert_eq!(offset, 5); - } - - #[test] - fn test_roundtrip() { - let source = "hello\nworld\ntest"; - - // Test various positions - for test_offset in [0, 3, 6, 10, 16] { - let loc = offset_to_location(source, test_offset).unwrap(); - let back_to_offset = line_col_to_offset(source, loc.row, loc.column).unwrap(); - assert_eq!(test_offset, back_to_offset); - } - } - - #[test] - fn test_range_from_offsets() { - let range = range_from_offsets(10, 20); - assert_eq!(range.start.offset, 10); - assert_eq!(range.end.offset, 20); - assert_eq!(range.start.row, 0); - assert_eq!(range.start.column, 0); - } - - #[test] - fn test_offset_to_location_multiline() { - let source = "line1\nline2\nline3"; - - // Test each line start - let loc = offset_to_location(source, 0).unwrap(); - assert_eq!(loc.row, 0); - assert_eq!(loc.column, 0); - - let loc = offset_to_location(source, 6).unwrap(); - assert_eq!(loc.row, 1); - assert_eq!(loc.column, 0); - - let loc = offset_to_location(source, 12).unwrap(); - assert_eq!(loc.row, 2); - assert_eq!(loc.column, 0); - } -} diff --git a/crates/quarto-xml/Cargo.toml b/crates/quarto-xml/Cargo.toml index 5104b1171..47a1ab634 100644 --- a/crates/quarto-xml/Cargo.toml +++ b/crates/quarto-xml/Cargo.toml @@ -10,7 +10,7 @@ description = "Source-tracked XML parsing for Quarto" [dependencies] quick-xml = { workspace = true } quarto-error-reporting = { path = "../quarto-error-reporting" } -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = { workspace = true } thiserror = { workspace = true } [dev-dependencies] diff --git a/crates/quarto-yaml/Cargo.toml b/crates/quarto-yaml/Cargo.toml index 7e34ba128..7e034a687 100644 --- a/crates/quarto-yaml/Cargo.toml +++ b/crates/quarto-yaml/Cargo.toml @@ -10,7 +10,7 @@ repository.workspace = true yaml-rust2 = { workspace = true } serde = { workspace = true } thiserror = { workspace = true } -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = { workspace = true } [dev-dependencies] regex = "1" diff --git a/crates/wasm-quarto-hub-client/Cargo.lock b/crates/wasm-quarto-hub-client/Cargo.lock index b20a2fef1..d5804cf97 100644 --- a/crates/wasm-quarto-hub-client/Cargo.lock +++ b/crates/wasm-quarto-hub-client/Cargo.lock @@ -377,7 +377,7 @@ dependencies = [ [[package]] name = "comrak-to-pandoc" -version = "0.5.0" +version = "0.7.0" dependencies = [ "comrak", "hashlink", @@ -1820,6 +1820,7 @@ dependencies = [ "quarto-source-map", "quarto-system-runtime", "quarto-treesitter-ast", + "quarto-util", "quarto-yaml", "regex", "serde", @@ -2072,7 +2073,7 @@ dependencies = [ [[package]] name = "quarto-analysis" -version = "0.5.0" +version = "0.7.0" dependencies = [ "quarto-error-reporting", "quarto-pandoc-types", @@ -2094,7 +2095,7 @@ dependencies = [ [[package]] name = "quarto-brand" -version = "0.5.0" +version = "0.7.0" dependencies = [ "serde", "serde_yaml", @@ -2103,7 +2104,7 @@ dependencies = [ [[package]] name = "quarto-citeproc" -version = "0.5.0" +version = "0.7.0" dependencies = [ "glob", "hashlink", @@ -2134,7 +2135,7 @@ dependencies = [ [[package]] name = "quarto-core" -version = "0.5.0" +version = "0.7.0" dependencies = [ "anyhow", "async-trait", @@ -2184,7 +2185,7 @@ dependencies = [ [[package]] name = "quarto-csl" -version = "0.5.0" +version = "0.7.0" dependencies = [ "quarto-error-reporting", "quarto-source-map", @@ -2220,7 +2221,7 @@ dependencies = [ [[package]] name = "quarto-error-reporting" -version = "0.5.0" +version = "0.7.0" dependencies = [ "ariadne", "once_cell", @@ -2234,7 +2235,7 @@ dependencies = [ [[package]] name = "quarto-highlight" -version = "0.5.0" +version = "0.7.0" dependencies = [ "once_cell", "quarto-highlight-encoding", @@ -2260,7 +2261,7 @@ dependencies = [ [[package]] name = "quarto-highlight-encoding" -version = "0.5.0" +version = "0.7.0" dependencies = [ "serde", "serde_json", @@ -2268,7 +2269,7 @@ dependencies = [ [[package]] name = "quarto-lsp-core" -version = "0.5.0" +version = "0.7.0" dependencies = [ "pampa", "pollster", @@ -2289,7 +2290,7 @@ dependencies = [ [[package]] name = "quarto-navigation" -version = "0.5.0" +version = "0.7.0" dependencies = [ "quarto-config", "quarto-pandoc-types", @@ -2324,7 +2325,7 @@ dependencies = [ [[package]] name = "quarto-project-create" -version = "0.5.0" +version = "0.7.0" dependencies = [ "quarto-doctemplate", "serde", @@ -2333,7 +2334,7 @@ dependencies = [ [[package]] name = "quarto-sass" -version = "0.5.0" +version = "0.7.0" dependencies = [ "include_dir", "once_cell", @@ -2351,7 +2352,9 @@ dependencies = [ [[package]] name = "quarto-source-map" -version = "0.5.0" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e3d29b1ef6088d016dfe84d831e5aa3daaba4624e23621364c2aef6d593114f" dependencies = [ "serde", "serde_json", @@ -2360,7 +2363,7 @@ dependencies = [ [[package]] name = "quarto-system-runtime" -version = "0.5.0" +version = "0.7.0" dependencies = [ "async-trait", "base64", @@ -2377,7 +2380,7 @@ dependencies = [ [[package]] name = "quarto-trace" -version = "0.5.0" +version = "0.7.0" dependencies = [ "flate2", "serde", @@ -2395,7 +2398,7 @@ dependencies = [ [[package]] name = "quarto-util" -version = "0.5.0" +version = "0.7.0" dependencies = [ "serde", "thiserror 2.0.18", @@ -2403,7 +2406,7 @@ dependencies = [ [[package]] name = "quarto-xml" -version = "0.5.0" +version = "0.7.0" dependencies = [ "quarto-error-reporting", "quarto-source-map", @@ -2413,7 +2416,7 @@ dependencies = [ [[package]] name = "quarto-yaml" -version = "0.5.0" +version = "0.7.0" dependencies = [ "quarto-source-map", "serde", @@ -4000,7 +4003,7 @@ dependencies = [ [[package]] name = "wasm-printf-fmt" -version = "0.5.0" +version = "0.7.0" [[package]] name = "wasm-quarto-hub-client" diff --git a/crates/wasm-quarto-hub-client/Cargo.toml b/crates/wasm-quarto-hub-client/Cargo.toml index 8d49172fa..a4228c2f0 100644 --- a/crates/wasm-quarto-hub-client/Cargo.toml +++ b/crates/wasm-quarto-hub-client/Cargo.toml @@ -22,7 +22,7 @@ quarto-highlight = { path = "../quarto-highlight" } quarto-lsp-core = { path = "../quarto-lsp-core" } quarto-pandoc-types = { path = "../quarto-pandoc-types" } quarto-sass = { path = "../quarto-sass" } -quarto-source-map = { path = "../quarto-source-map" } +quarto-source-map = "0.1.0" quarto-system-runtime = { path = "../quarto-system-runtime" } quarto-project-create = { path = "../quarto-project-create" } quarto-trace = { path = "../quarto-trace" }