From d3b608fa3a4cbb557b1a502caaa225a5ee4b8e73 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 09:49:39 +0000 Subject: [PATCH 01/13] feat(hpc): edge-codec flavors + reliability stats (measure all, validate/invalidate) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two reusable hpc modules + a comparison harness so edge encodings can be measured and selected on evidence rather than guessed: - hpc::reliability — Pearson r, Spearman ρ, Cronbach α, ICC(2,1) absolute agreement, plus a bundled FidelityReport. General (&[f64]→f64) primitives; the measurement layer for validate/invalidate work (consumable by jc/pillar). - hpc::edge_codec — three commensurable quantizers sharing the canonical 16-byte edge block by INTERPRETATION, not layout: CoarseOnly (1 B palette index), CoarseResidue (1 + D/2, value-slab signed-4-bit residue), Pq32x4 (16 B = 32 subquantizers × 4-bit, the turbovec PQ model). Compact seeded k-means; nibble packing lo=code[2t]/hi=code[2t+1]. - examples/edge_codec_compare — encodes every flavor across blob/continuous regimes, reports the full reliability suite on pairwise-distance preservation, and checks the AMX matmul_i8_to_i32 assignment matches scalar. Measured: CoarseResidue dominates agreement (ICC 0.97-0.99, ρ 0.98, α 0.99); Pq32x4 preserves rank (ρ 0.60-0.67) but not absolute distance (ICC 0.11-0.29); CoarseOnly collapses on continuous data (ICC 0.003). AMX assign 100% vs scalar. 16 unit + 9 doctests; clippy -D warnings clean. Also silences a pre-existing excessive_precision lint in golden_helix_probe. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- Cargo.toml | 4 + examples/edge_codec_compare.rs | 247 ++++++++++++++++++ examples/golden_helix_probe.rs | 2 + src/hpc/edge_codec.rs | 459 +++++++++++++++++++++++++++++++++ src/hpc/mod.rs | 5 + src/hpc/reliability.rs | 383 +++++++++++++++++++++++++++ 6 files changed, 1100 insertions(+) create mode 100644 examples/edge_codec_compare.rs create mode 100644 src/hpc/edge_codec.rs create mode 100644 src/hpc/reliability.rs diff --git a/Cargo.toml b/Cargo.toml index c3995629..460f127d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,10 @@ required-features = ["std"] name = "edge_residue_probe" required-features = ["std"] +[[example]] +name = "edge_codec_compare" +required-features = ["std"] + [dependencies] num-integer = { workspace = true } num-traits = { workspace = true } diff --git a/examples/edge_codec_compare.rs b/examples/edge_codec_compare.rs new file mode 100644 index 00000000..4f238233 --- /dev/null +++ b/examples/edge_codec_compare.rs @@ -0,0 +1,247 @@ +//! Edge-codec flavor comparison — measure ALL flavors, validate/invalidate. +//! +//! For each data regime, encodes the same vectors with the three edge-codec +//! flavors and reports the full reliability suite (Pearson r, Spearman ρ, +//! ICC(2,1), Cronbach α) on DISTANCE PRESERVATION (true pairwise L2 vs +//! reconstructed pairwise L2 — the metric that matters for nearest-neighbour +//! order), plus per-vector reconstruction rel-L2 and cosine. +//! +//! CoarseOnly 1 B/vec palette index (the EdgeBlock byte as-is) +//! CoarseResidue 1 + D/2 palette + value-slab signed-4-bit residue +//! Pq32x4 16 B 32 subquantizers × 4-bit (the edge block as PQ) +//! +//! The point: a class/schema picks the flavor by its fidelity/byte tradeoff, and +//! these are the numbers that justify the pick. The deterministic part (nearest +//! centroid) is also shown running on the AMX `matmul_i8_to_i32` tile path, +//! bit-checked against the scalar assignment. +//! +//! RUSTFLAGS="-C target-cpu=native" cargo run --release --example edge_codec_compare + +use std::time::Instant; + +use ndarray::hpc::edge_codec::{reconstruct_coarse, CoarseResidueCodec, Codebook, ProductQuantizer}; +use ndarray::hpc::reliability::FidelityReport; +use ndarray::simd::{amx_available, matmul_i8_to_i32}; +use ndarray::{ArrayView2, ArrayViewMut2}; + +fn splitmix(s: &mut u64) -> f32 { + *s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^= z >> 31; + (z >> 40) as f32 / (1u32 << 24) as f32 * 2.0 - 1.0 // [-1, 1) +} + +/// Clustered data: each vector = a random centroid + noise (the regime where a +/// coarse code is meaningful and the residue captures the within-cell offset). +fn gen_blobs(n: usize, dim: usize, k: usize, noise: f32, seed: u64) -> Vec { + let mut s = seed; + let centers: Vec = (0..k * dim).map(|_| splitmix(&mut s)).collect(); + let mut data = vec![0.0f32; n * dim]; + for i in 0..n { + let c = (splitmix(&mut s).abs() * k as f32) as usize % k; + for d in 0..dim { + data[i * dim + d] = centers[c * dim + d] + noise * splitmix(&mut s); + } + } + data +} + +/// Continuous high-dimensional data (no cluster structure): the regime where a +/// coarse codebook can't tile the space and product quantization pulls ahead. +fn gen_continuous(n: usize, dim: usize, seed: u64) -> Vec { + let mut s = seed; + (0..n * dim).map(|_| splitmix(&mut s)).collect() +} + +fn l2(a: &[f32], b: &[f32]) -> f64 { + a.iter() + .zip(b) + .map(|(x, y)| ((x - y) as f64).powi(2)) + .sum::() + .sqrt() +} + +fn cosine(a: &[f32], b: &[f32]) -> f64 { + let mut dot = 0.0; + let mut na = 0.0; + let mut nb = 0.0; + for (x, y) in a.iter().zip(b) { + dot += (*x as f64) * (*y as f64); + na += (*x as f64).powi(2); + nb += (*y as f64).powi(2); + } + if na < 1e-24 || nb < 1e-24 { + 0.0 + } else { + dot / (na.sqrt() * nb.sqrt()) + } +} + +/// Deterministic candidate pairs (i, j) for the distance-preservation metric. +fn sample_pairs(n: usize, m: usize, seed: u64) -> Vec<(usize, usize)> { + let mut s = seed; + let mut next = || { + s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z ^= z >> 31; + z + }; + (0..m) + .map(|_| { + let i = (next() as usize) % n; + let mut j = (next() as usize) % n; + if j == i { + j = (j + 1) % n; + } + (i, j) + }) + .collect() +} + +/// One flavor's measured row: byte cost + reconstruction + distance fidelity. +fn report_flavor( + name: &str, bytes_per_vec: f64, data: &[f32], recon: &[f32], n: usize, dim: usize, pairs: &[(usize, usize)], +) { + // Per-vector reconstruction quality. + let mut rel_num = 0.0; + let mut rel_den = 0.0; + let mut cos_sum = 0.0; + for i in 0..n { + let v = &data[i * dim..(i + 1) * dim]; + let r = &recon[i * dim..(i + 1) * dim]; + rel_num += l2(v, r); + rel_den += v.iter().map(|x| (*x as f64).powi(2)).sum::().sqrt(); + cos_sum += cosine(v, r); + } + let recon_rel = rel_num / rel_den.max(1e-12); + let recon_cos = cos_sum / n as f64; + + // Distance preservation: true vs reconstructed pairwise L2. + let true_d: Vec = pairs + .iter() + .map(|&(i, j)| l2(&data[i * dim..(i + 1) * dim], &data[j * dim..(j + 1) * dim])) + .collect(); + let rec_d: Vec = pairs + .iter() + .map(|&(i, j)| l2(&recon[i * dim..(i + 1) * dim], &recon[j * dim..(j + 1) * dim])) + .collect(); + let f = FidelityReport::compute(&true_d, &rec_d); + + println!( + " {name:<14} {bytes_per_vec:>6.1} B | recon rel-L2 {recon_rel:.4} cos {recon_cos:.4} | dist: r {:.4} ρ {:.4} ICC {:.4} α {:.4}", + f.pearson, f.spearman, f.icc, f.cronbach + ); +} + +/// AMX vs scalar assignment agreement + throughput (the deterministic part). +fn amx_assign_demo(data: &[f32], cb: &Codebook, n: usize, dim: usize) { + if !amx_available() { + println!(" (AMX unavailable — deterministic assign runs scalar)"); + return; + } + let k = cb.k; + let q = |x: &[f32]| -> (Vec, f32) { + let amax = x.iter().fold(0.0f32, |a, &v| a.max(v.abs())).max(1e-12); + let sc = 127.0 / amax; + ( + x.iter() + .map(|&v| (v * sc).round().clamp(-127.0, 127.0) as i8) + .collect(), + sc, + ) + }; + let (v_i8, _) = q(data); + let (cb_i8, _) = q(&cb.centroids); + let mut cbt = vec![0i8; dim * k]; // D×K transpose + for c in 0..k { + for d in 0..dim { + cbt[d * k + c] = cb_i8[c * dim + d]; + } + } + let mut g = vec![0i32; n * k]; + let t0 = Instant::now(); + matmul_i8_to_i32( + ArrayView2::from_shape((n, dim), &v_i8[..]).unwrap(), + ArrayView2::from_shape((dim, k), &cbt[..]).unwrap(), + ArrayViewMut2::from_shape((n, k), &mut g[..]).unwrap(), + ) + .unwrap(); + let ns = t0.elapsed().as_nanos() as f64; + let cnorm: Vec = (0..k) + .map(|c| (0..dim).map(|d| (cb_i8[c * dim + d] as i32).pow(2)).sum()) + .collect(); + let mut agree = 0usize; + for i in 0..n { + let mut best = i32::MIN; + let mut bj = 0u32; + for j in 0..k { + let score = 2 * g[i * k + j] - cnorm[j]; + if score > best { + best = score; + bj = j as u32; + } + } + if bj == cb.assign(&data[i * dim..(i + 1) * dim]) { + agree += 1; + } + } + let macs = (n * k * dim) as f64; + println!( + " AMX assign: {:.0} ns ({:.1} GMAC/s), agrees with scalar on {:.1}% of vectors", + ns, + macs / ns, + 100.0 * agree as f64 / n as f64 + ); +} + +fn run(label: &str, data: &[f32], n: usize, dim: usize, k: usize) { + println!("\n== {label} (N={n} D={dim} K={k}) =="); + let pairs = sample_pairs(n, 4096, 0xF00D); + + // Flavor 1: coarse only. + let cb = Codebook::train(data, n, dim, k, 12, 1); + let mut recon_coarse = vec![0.0f32; n * dim]; + for i in 0..n { + let idx = cb.assign(&data[i * dim..(i + 1) * dim]); + recon_coarse[i * dim..(i + 1) * dim].copy_from_slice(&reconstruct_coarse(&cb, idx)); + } + report_flavor("CoarseOnly", 1.0, data, &recon_coarse, n, dim, &pairs); + + // Flavor 2: coarse + per-dim 4-bit residue. + let crc = CoarseResidueCodec::fit(data, n, dim, k, 12, 1); + let mut recon_res = vec![0.0f32; n * dim]; + for i in 0..n { + let code = crc.encode(&data[i * dim..(i + 1) * dim]); + recon_res[i * dim..(i + 1) * dim].copy_from_slice(&crc.reconstruct(&code)); + } + report_flavor("CoarseResidue", 1.0 + dim as f64 / 2.0, data, &recon_res, n, dim, &pairs); + + // Flavor 3: product quantizer 32×4-bit (16 B). + if dim.is_multiple_of(32) { + let pq = ProductQuantizer::fit(data, n, dim, 32, 12, 2); + let mut recon_pq = vec![0.0f32; n * dim]; + for i in 0..n { + let code = pq.encode(&data[i * dim..(i + 1) * dim]); + recon_pq[i * dim..(i + 1) * dim].copy_from_slice(&pq.reconstruct(&code)); + } + report_flavor("Pq32x4", 16.0, data, &recon_pq, n, dim, &pairs); + } else { + println!(" Pq32x4 (skipped — D={dim} not divisible by 32)"); + } + + amx_assign_demo(data, &cb, n, dim); +} + +fn main() { + println!("== Edge-codec flavor comparison (measure all, validate/invalidate) =="); + println!("amx_available() = {}", amx_available()); + println!("metrics: recon rel-L2/cosine (per-vector) · dist r/ρ/ICC/α (pairwise L2 preservation)"); + + let (n, dim, k) = (4096, 128, 256); + run("blobs σ=0.15", &gen_blobs(n, dim, k, 0.15, 0x1111), n, dim, k); + run("blobs σ=0.30", &gen_blobs(n, dim, k, 0.30, 0x2222), n, dim, k); + run("continuous", &gen_continuous(n, dim, 0x3333), n, dim, k); +} diff --git a/examples/golden_helix_probe.rs b/examples/golden_helix_probe.rs index 733ada89..48b23010 100644 --- a/examples/golden_helix_probe.rs +++ b/examples/golden_helix_probe.rs @@ -26,6 +26,8 @@ //! //! cargo run --release --example golden_helix_probe +// √5 to full f64 precision; the literal is intentionally exact (π(3−√5)). +#[allow(clippy::excessive_precision)] const GAMMA: f64 = std::f64::consts::PI * (3.0 - 2.2360679774997896); // π(3−√5), golden angle /// Golden-spiral hemisphere unit vectors (the helix node directions). diff --git a/src/hpc/edge_codec.rs b/src/hpc/edge_codec.rs new file mode 100644 index 00000000..665275c5 --- /dev/null +++ b/src/hpc/edge_codec.rs @@ -0,0 +1,459 @@ +//! Edge-codec flavors — deterministic palette code ↔ residue, at three byte +//! budgets that share ONE 16-byte edge block by *interpretation*, not layout. +//! +//! The canon node's edge block is 16 bytes (`canonical_node.rs`). A class picks +//! how those bytes (plus an optional value-slab residue) are read: +//! +//! | flavor | bytes/vector | interpretation | +//! |-----------------|---------------------|--------------------------------------| +//! | [`CoarseOnly`] | 1 (palette index) | one of K=256 centroids — the EdgeBlock byte as-is | +//! | [`CoarseResidue`]| 1 + D/2 | centroid + per-dim signed-4-bit residue (value slab) | +//! | [`Pq32x4`] | 16 (32 × 4-bit) | the 16 edge bytes read as a 32-subquantizer PQ code | +//! +//! All three are *quantizers of the same D-dim vector* measured by reconstruction +//! fidelity, so they are directly comparable (feed each output to +//! [`crate::hpc::reliability::FidelityReport`]). The deterministic part (the +//! nearest-centroid assignment) is recomputable — on x86 it runs on the AMX +//! `matmul_i8_to_i32` tile path (see `examples/edge_codec_compare.rs`); here the +//! assignment is plain scalar so the codec is portable and bit-identical to the +//! accelerated path. +//! +//! Nibbles are packed `lo = code[2t]`, `hi = code[2t+1]` (FastScan/turbovec +//! convention). Signed-4-bit residue codes are two's-complement in the low 4 +//! bits (range −8..=7). +//! +//! [`CoarseOnly`]: reconstruct_coarse +//! [`CoarseResidue`]: CoarseResidueCodec +//! [`Pq32x4`]: ProductQuantizer + +/// SplitMix64 — deterministic seeding for k-means init (no external rng dep). +#[inline] +fn splitmix(state: &mut u64) -> u64 { + *state = state.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *state; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^ (z >> 31) +} + +#[inline] +fn sq_dist(a: &[f32], b: &[f32]) -> f32 { + a.iter().zip(b).map(|(x, y)| (x - y) * (x - y)).sum() +} + +/// A flat k-means codebook: `k` centroids of `dim` f32 each, row-major. +#[derive(Debug, Clone)] +pub struct Codebook { + /// Number of centroids. + pub k: usize, + /// Dimensionality of each centroid. + pub dim: usize, + /// `k * dim` centroid coordinates, row-major. + pub centroids: Vec, +} + +impl Codebook { + /// Train `k` centroids on `n` row-major vectors of `dim` via Lloyd's + /// algorithm (`iters` passes, deterministic `seed`). Empty clusters are + /// re-seeded to a pseudo-random data point so every centroid stays live. + /// + /// # Examples + /// ``` + /// use ndarray::hpc::edge_codec::Codebook; + /// // Two well-separated blobs → 2 centroids recover them. + /// let data = [0.0, 0.0, 0.1, 0.0, 9.0, 9.0, 9.1, 9.0]; + /// let cb = Codebook::train(&data, 4, 2, 2, 10, 1); + /// assert_eq!(cb.k, 2); + /// assert_ne!(cb.assign(&[0.05, 0.0]), cb.assign(&[9.05, 9.0])); + /// ``` + pub fn train(data: &[f32], n: usize, dim: usize, k: usize, iters: usize, seed: u64) -> Self { + assert!(dim > 0 && k > 0 && n >= k, "need n >= k >= 1 and dim > 0"); + assert_eq!(data.len(), n * dim, "data length must be n * dim"); + let mut state = seed ^ 0xD1B5_4A32_D192_ED03; + // Init: distinct random data points as centroids. + let mut centroids = vec![0.0f32; k * dim]; + { + let mut chosen = vec![false; n]; + for c in 0..k { + let mut idx = (splitmix(&mut state) % n as u64) as usize; + let mut guard = 0; + while chosen[idx] && guard < n { + idx = (idx + 1) % n; + guard += 1; + } + chosen[idx] = true; + centroids[c * dim..(c + 1) * dim].copy_from_slice(&data[idx * dim..(idx + 1) * dim]); + } + } + let mut assign = vec![0u32; n]; + for _ in 0..iters { + // Assignment step. + for i in 0..n { + let v = &data[i * dim..(i + 1) * dim]; + let mut best = f32::INFINITY; + let mut bj = 0u32; + for c in 0..k { + let d = sq_dist(v, ¢roids[c * dim..(c + 1) * dim]); + if d < best { + best = d; + bj = c as u32; + } + } + assign[i] = bj; + } + // Update step. + let mut sums = vec![0.0f32; k * dim]; + let mut counts = vec![0u32; k]; + for i in 0..n { + let c = assign[i] as usize; + counts[c] += 1; + let v = &data[i * dim..(i + 1) * dim]; + for d in 0..dim { + sums[c * dim + d] += v[d]; + } + } + for c in 0..k { + if counts[c] == 0 { + // Re-seed dead cluster to a random point. + let idx = (splitmix(&mut state) % n as u64) as usize; + centroids[c * dim..(c + 1) * dim].copy_from_slice(&data[idx * dim..(idx + 1) * dim]); + } else { + let inv = 1.0 / counts[c] as f32; + for d in 0..dim { + centroids[c * dim + d] = sums[c * dim + d] * inv; + } + } + } + } + Codebook { k, dim, centroids } + } + + /// Index of the nearest centroid to `v` (scalar; identical result to the + /// AMX `matmul_i8_to_i32` assignment path). Panics if `v.len() != dim`. + #[inline] + pub fn assign(&self, v: &[f32]) -> u32 { + assert_eq!(v.len(), self.dim); + let mut best = f32::INFINITY; + let mut bj = 0u32; + for c in 0..self.k { + let d = sq_dist(v, &self.centroids[c * self.dim..(c + 1) * self.dim]); + if d < best { + best = d; + bj = c as u32; + } + } + bj + } + + /// Borrow centroid `c` as a slice. + #[inline] + pub fn centroid(&self, c: usize) -> &[f32] { + &self.centroids[c * self.dim..(c + 1) * self.dim] + } +} + +/// Pack signed nibbles (`-8..=7`) two-per-byte: `lo = codes[2t]`, `hi = codes[2t+1]`. +/// `codes.len()` must be even. +fn pack_nibbles_signed(codes: &[i8]) -> Vec { + debug_assert!(codes.len().is_multiple_of(2)); + codes + .chunks_exact(2) + .map(|p| ((p[0] as u8) & 0x0F) | (((p[1] as u8) & 0x0F) << 4)) + .collect() +} + +/// Unpack `n` signed nibbles from `bytes` (inverse of [`pack_nibbles_signed`]). +fn unpack_nibbles_signed(bytes: &[u8], n: usize) -> Vec { + let mut out = Vec::with_capacity(n); + for t in 0..n { + let byte = bytes[t / 2]; + let nib = if t.is_multiple_of(2) { byte & 0x0F } else { byte >> 4 }; + // Sign-extend the low 4 bits. + out.push(if nib >= 8 { nib as i8 - 16 } else { nib as i8 }); + } + out +} + +// ── Flavor 1: coarse only ──────────────────────────────────────────────────── + +/// Reconstruct a vector from its coarse palette index alone (1 byte/vector). +/// This is the canonical EdgeBlock byte read literally as a centroid pointer. +/// +/// # Examples +/// ``` +/// use ndarray::hpc::edge_codec::{reconstruct_coarse, Codebook}; +/// let data = [0.0, 0.0, 5.0, 5.0]; // 2 points × dim 2 +/// let cb = Codebook::train(&data, 2, 2, 2, 5, 1); +/// let idx = cb.assign(&[5.0, 5.0]); +/// assert_eq!(reconstruct_coarse(&cb, idx).len(), 2); +/// ``` +#[inline] +pub fn reconstruct_coarse(cb: &Codebook, index: u32) -> Vec { + cb.centroid(index as usize).to_vec() +} + +// ── Flavor 2: coarse + per-dim signed-4-bit residue (value-slab) ───────────── + +/// Coarse palette + a shared-scale per-dimension signed-4-bit residue. +/// +/// The residue scale is computed once over the training set (class-level), so +/// each vector costs exactly `1 + dim/2` bytes (no per-vector scale word). The +/// residue lands in the node's reserved value slab — no edge-block layout change. +#[derive(Debug, Clone)] +pub struct CoarseResidueCodec { + /// The coarse palette. + pub cb: Codebook, + /// Shared residue quantization step (max|residue| / 7 over the train set). + pub res_scale: f32, +} + +/// One encoded vector: coarse index + packed `dim/2` residue bytes. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CoarseResidueCode { + /// Nearest-centroid index (the coarse 1-byte code). + pub index: u32, + /// `dim/2` bytes of packed signed nibbles (the value-slab residue). + pub residue: Vec, +} + +impl CoarseResidueCodec { + /// Fit the palette (k-means) and the shared residue scale on the data. + /// `dim` must be even (nibbles pack two-per-byte). + /// + /// # Examples + /// ``` + /// use ndarray::hpc::edge_codec::CoarseResidueCodec; + /// let data: Vec = (0..8 * 4).map(|i| (i % 5) as f32 - 2.0).collect(); + /// let codec = CoarseResidueCodec::fit(&data, 8, 4, 4, 6, 1); + /// let code = codec.encode(&data[0..4]); + /// assert_eq!(code.residue.len(), 2); // dim/2 packed residue bytes + /// assert_eq!(codec.reconstruct(&code).len(), 4); + /// ``` + pub fn fit(data: &[f32], n: usize, dim: usize, k: usize, iters: usize, seed: u64) -> Self { + assert!(dim.is_multiple_of(2), "dim must be even for nibble packing"); + let cb = Codebook::train(data, n, dim, k, iters, seed); + let mut rmax = 0.0f32; + for i in 0..n { + let v = &data[i * dim..(i + 1) * dim]; + let c = cb.centroid(cb.assign(v) as usize); + for d in 0..dim { + rmax = rmax.max((v[d] - c[d]).abs()); + } + } + let res_scale = (rmax / 7.0).max(1e-12); + CoarseResidueCodec { cb, res_scale } + } + + /// Encode one vector to coarse index + packed residue nibbles. + pub fn encode(&self, v: &[f32]) -> CoarseResidueCode { + let dim = self.cb.dim; + let index = self.cb.assign(v); + let c = self.cb.centroid(index as usize); + let codes: Vec = (0..dim) + .map(|d| { + let q = ((v[d] - c[d]) / self.res_scale).round(); + q.clamp(-8.0, 7.0) as i8 + }) + .collect(); + CoarseResidueCode { + index, + residue: pack_nibbles_signed(&codes), + } + } + + /// Reconstruct `centroid + dequantized residue`. + pub fn reconstruct(&self, code: &CoarseResidueCode) -> Vec { + let dim = self.cb.dim; + let c = self.cb.centroid(code.index as usize); + let res = unpack_nibbles_signed(&code.residue, dim); + (0..dim) + .map(|d| c[d] + res[d] as f32 * self.res_scale) + .collect() + } +} + +// ── Flavor 3: product quantizer — 32 subspaces × 4-bit (the 16-byte block) ─── + +/// Product quantizer: split `dim` into `m` subspaces, each with a 16-entry +/// (4-bit) sub-codebook. With `m = 32` the code is exactly `16` bytes — the same +/// budget as the edge block, read as 32 nibble codes (the turbovec PQ model). +#[derive(Debug, Clone)] +pub struct ProductQuantizer { + /// Number of subspaces (nibble codes per vector). + pub m: usize, + /// Sub-vector dimensionality (`dim / m`). + pub sub_dim: usize, + /// One 16-entry sub-codebook per subspace. + pub sub: Vec, +} + +impl ProductQuantizer { + /// Fit `m` independent 16-entry sub-codebooks. `dim` must be divisible by + /// `m`, and `m` must be even (nibbles pack two-per-byte). + /// + /// # Examples + /// ``` + /// use ndarray::hpc::edge_codec::ProductQuantizer; + /// // dim 8, m 4 subspaces → 4 nibbles → 2 bytes. + /// let data: Vec = (0..32 * 8).map(|i| ((i * 7 % 11) as f32) - 5.0).collect(); + /// let pq = ProductQuantizer::fit(&data, 32, 8, 4, 6, 1); + /// let code = pq.encode(&data[0..8]); + /// assert_eq!(code.len(), 2); + /// assert_eq!(pq.reconstruct(&code).len(), 8); + /// ``` + pub fn fit(data: &[f32], n: usize, dim: usize, m: usize, iters: usize, seed: u64) -> Self { + assert!(m.is_multiple_of(2), "m must be even for nibble packing"); + assert!(dim.is_multiple_of(m), "dim must be divisible by m"); + let sub_dim = dim / m; + let ksub = 16; // 4-bit + let mut sub = Vec::with_capacity(m); + for s in 0..m { + // Gather subspace `s` columns into a contiguous n × sub_dim buffer. + let mut buf = vec![0.0f32; n * sub_dim]; + for i in 0..n { + let src = &data[i * dim + s * sub_dim..i * dim + (s + 1) * sub_dim]; + buf[i * sub_dim..(i + 1) * sub_dim].copy_from_slice(src); + } + sub.push(Codebook::train(&buf, n, sub_dim, ksub, iters, seed ^ (s as u64).wrapping_mul(0x9E37_79B9))); + } + ProductQuantizer { m, sub_dim, sub } + } + + /// Encode one vector to `m/2` packed nibble bytes (16 bytes when `m = 32`). + pub fn encode(&self, v: &[f32]) -> Vec { + let codes: Vec = (0..self.m) + .map(|s| { + let sv = &v[s * self.sub_dim..(s + 1) * self.sub_dim]; + self.sub[s].assign(sv) as i8 // 0..=15 fits the low nibble + }) + .collect(); + // Unsigned 0..15 codes share the same packing as signed (low 4 bits). + codes + .chunks_exact(2) + .map(|p| ((p[0] as u8) & 0x0F) | (((p[1] as u8) & 0x0F) << 4)) + .collect() + } + + /// Reconstruct by concatenating the selected sub-centroids. + pub fn reconstruct(&self, code: &[u8]) -> Vec { + let mut out = vec![0.0f32; self.m * self.sub_dim]; + for s in 0..self.m { + let byte = code[s / 2]; + let nib = if s.is_multiple_of(2) { byte & 0x0F } else { byte >> 4 } as usize; + out[s * self.sub_dim..(s + 1) * self.sub_dim].copy_from_slice(self.sub[s].centroid(nib)); + } + out + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Deterministic clustered data: `n` vectors of `dim`, drawn from `k` blobs. + fn make_data(n: usize, dim: usize, k: usize, noise: f32, seed: u64) -> Vec { + let mut s = seed; + let centers: Vec = (0..k * dim) + .map(|_| (splitmix(&mut s) >> 40) as f32 / (1u64 << 24) as f32 * 2.0 - 1.0) + .collect(); + let mut data = vec![0.0f32; n * dim]; + for i in 0..n { + let c = (splitmix(&mut s) % k as u64) as usize; + for d in 0..dim { + let z = (splitmix(&mut s) >> 40) as f32 / (1u64 << 24) as f32 * 2.0 - 1.0; + data[i * dim + d] = centers[c * dim + d] + noise * z; + } + } + data + } + + fn rel_l2(a: &[f32], b: &[f32]) -> f32 { + let se: f32 = a.iter().zip(b).map(|(x, y)| (x - y) * (x - y)).sum(); + let sn: f32 = a.iter().map(|x| x * x).sum(); + (se.sqrt() / sn.sqrt().max(1e-12)).max(0.0) + } + + #[test] + fn nibble_pack_roundtrip_signed() { + let codes: Vec = vec![-8, 7, 0, -1, 3, -4]; + let packed = pack_nibbles_signed(&codes); + assert_eq!(packed.len(), 3); + let back = unpack_nibbles_signed(&packed, codes.len()); + assert_eq!(back, codes); + } + + #[test] + fn kmeans_separates_blobs() { + let data = make_data(256, 8, 4, 0.05, 11); + let cb = Codebook::train(&data, 256, 8, 4, 12, 7); + // All 4 centroids should be used (no degenerate collapse). + let mut used = [false; 4]; + for i in 0..256 { + used[cb.assign(&data[i * 8..(i + 1) * 8]) as usize] = true; + } + assert!(used.iter().all(|&u| u), "all centroids should attract points"); + } + + #[test] + fn coarse_residue_roundtrip_is_deterministic() { + let data = make_data(512, 16, 32, 0.2, 3); + let codec = CoarseResidueCodec::fit(&data, 512, 16, 32, 10, 5); + let v = &data[0..16]; + let c1 = codec.encode(v); + let c2 = codec.encode(v); + assert_eq!(c1, c2, "encode must be deterministic"); + assert_eq!(c1.residue.len(), 8, "dim/2 = 8 residue bytes"); + let r = codec.reconstruct(&c1); + assert_eq!(r.len(), 16); + } + + #[test] + fn residue_beats_coarse_only() { + let (n, dim, k) = (1024, 32, 256); + let data = make_data(n, dim, k, 0.25, 9); + let codec = CoarseResidueCodec::fit(&data, n, dim, k, 12, 1); + let mut coarse_err = 0.0f32; + let mut fine_err = 0.0f32; + for i in 0..n { + let v = &data[i * dim..(i + 1) * dim]; + let code = codec.encode(v); + let idx = code.index; + coarse_err += rel_l2(v, &reconstruct_coarse(&codec.cb, idx)); + fine_err += rel_l2(v, &codec.reconstruct(&code)); + } + assert!( + fine_err < coarse_err * 0.5, + "coarse+residue must roughly halve error: coarse={coarse_err} fine={fine_err}" + ); + } + + #[test] + fn pq32x4_code_is_sixteen_bytes_and_reconstructs() { + // High-dim CONTINUOUS data (not blobs): the realistic embedding regime + // where a 256-entry coarse codebook can't tile the space but a 32×4-bit + // product code can. With few blobs, coarse-only memorises them and the + // comparison is meaningless — PQ's win is a high-dimensionality effect. + let (n, dim) = (1024, 64); + let mut s = 0xABCD_1234u64; + let data: Vec = (0..n * dim) + .map(|_| (splitmix(&mut s) >> 40) as f32 / (1u64 << 24) as f32 * 2.0 - 1.0) + .collect(); + let pq = ProductQuantizer::fit(&data, n, dim, 32, 10, 2); + let code = pq.encode(&data[0..dim]); + assert_eq!(code.len(), 16, "32 nibbles = 16 bytes (the edge-block budget)"); + assert_eq!(pq.reconstruct(&code).len(), dim); + + let cb = Codebook::train(&data, n, dim, 256, 12, 2); + let mut pq_err = 0.0f32; + let mut coarse_err = 0.0f32; + for i in 0..n { + let v = &data[i * dim..(i + 1) * dim]; + pq_err += rel_l2(v, &pq.reconstruct(&pq.encode(v))); + coarse_err += rel_l2(v, &reconstruct_coarse(&cb, cb.assign(v))); + } + assert!( + pq_err < coarse_err, + "PQ(16B) should beat coarse(1B) in high-D continuous regime: pq={pq_err} coarse={coarse_err}" + ); + } +} diff --git a/src/hpc/mod.rs b/src/hpc/mod.rs index 3e195b8b..9fa3d44f 100644 --- a/src/hpc/mod.rs +++ b/src/hpc/mod.rs @@ -25,6 +25,8 @@ pub mod blas_level2; pub mod blas_level3; pub mod reductions; pub mod statistics; +/// Reliability & validity statistics: Pearson r, Spearman ρ, Cronbach α, ICC. +pub mod reliability; pub mod activations; pub mod hdc; // Bitwise SIMD primitives — graduated to crate root. Back-compat re-export. @@ -73,6 +75,9 @@ pub mod bf16_tile_gemm; /// `bf16_tile_gemm` for the integer operand family. #[cfg(target_arch = "x86_64")] pub mod int8_tile_gemm; +/// Edge-codec flavors (coarse / coarse+residue / PQ-32×4) for the canonical +/// node edge block — selectable per class, measured via `reliability`. +pub mod edge_codec; #[allow(missing_docs)] pub mod bf16_truth; #[allow(missing_docs)] diff --git a/src/hpc/reliability.rs b/src/hpc/reliability.rs new file mode 100644 index 00000000..98506b4d --- /dev/null +++ b/src/hpc/reliability.rs @@ -0,0 +1,383 @@ +//! Reliability & validity statistics — Pearson r, Spearman ρ, Cronbach α, ICC. +//! +//! The measurement layer for *validate/invalidate* work: given a ground-truth +//! signal and a reconstructed/quantized estimate of it, quantify how faithfully +//! the estimate preserves the original. The four metrics answer complementary +//! questions and disagreeing is informative: +//! +//! * **Pearson r** — linear association (scale/offset invariant). +//! * **Spearman ρ** — *rank* preservation (the metric that matters for ANN / +//! nearest-neighbour order; invariant to any monotone transform). +//! * **ICC(2,1)** — *absolute agreement* (two-way random, single rater): unlike +//! Pearson it PENALISES a systematic offset, so `r == 1` with `ICC < 1` is the +//! signature of a biased-but-correlated reconstruction. +//! * **Cronbach α** — internal consistency treating truth & estimate as two +//! "items" measuring one construct (a reliability coefficient in [−∞, 1]). +//! +//! These are general primitives (`&[f64]` in, `f64` out) — they have no +//! dependency on the edge-codec work that motivated them and are reused by the +//! `jc` proof-pillars and any consumer needing fidelity numbers. +//! +//! All estimators use the *population* variance convention (divide by `n`); for +//! the ratio-based coefficients (α, ICC) the divisor cancels, and for Pearson / +//! Spearman it cancels in numerator vs denominator, so results match the usual +//! textbook definitions. + +/// Pearson product-moment correlation between two equal-length samples. +/// +/// Returns `0.0` for degenerate input (`n < 2` or zero variance in either +/// sample) rather than `NaN`, so it is safe to aggregate. +/// +/// # Examples +/// ``` +/// use ndarray::hpc::reliability::pearson; +/// let x = [1.0, 2.0, 3.0, 4.0]; +/// // y = 2x + 1 ⇒ perfect positive linear association. +/// let y = [3.0, 5.0, 7.0, 9.0]; +/// assert!((pearson(&x, &y) - 1.0).abs() < 1e-12); +/// assert!((pearson(&x, &[4.0, 3.0, 2.0, 1.0]) + 1.0).abs() < 1e-12); +/// ``` +pub fn pearson(a: &[f64], b: &[f64]) -> f64 { + let n = a.len(); + if n < 2 || b.len() != n { + return 0.0; + } + let inv = 1.0 / n as f64; + let mean_a = a.iter().sum::() * inv; + let mean_b = b.iter().sum::() * inv; + let (mut cov, mut va, mut vb) = (0.0, 0.0, 0.0); + for i in 0..n { + let da = a[i] - mean_a; + let db = b[i] - mean_b; + cov += da * db; + va += da * da; + vb += db * db; + } + if va < 1e-12 || vb < 1e-12 { + return 0.0; + } + cov / (va.sqrt() * vb.sqrt()) +} + +/// Average-rank vector (ties share the mean of the ranks they span). +fn average_ranks(v: &[f64]) -> Vec { + let n = v.len(); + let mut idx: Vec = (0..n).collect(); + idx.sort_by(|&i, &j| { + v[i].partial_cmp(&v[j]) + .unwrap_or(core::cmp::Ordering::Equal) + }); + let mut ranks = vec![0.0f64; n]; + let mut i = 0; + while i < n { + let mut j = i + 1; + // Extend over the tie group of equal values. + while j < n && v[idx[j]] == v[idx[i]] { + j += 1; + } + // Mean of ranks [i, j) using 1-based ranks → ((i+1)+j)/2. + let avg = ((i + 1 + j) as f64) / 2.0; + for &k in &idx[i..j] { + ranks[k] = avg; + } + i = j; + } + ranks +} + +/// Spearman rank correlation: Pearson on the average-rank vectors (tie-aware). +/// +/// # Examples +/// ``` +/// use ndarray::hpc::reliability::spearman; +/// // Monotone but non-linear ⇒ Spearman 1.0 (Pearson would be < 1). +/// let x = [1.0, 2.0, 3.0, 4.0]; +/// let y = [1.0, 4.0, 9.0, 16.0]; +/// assert!((spearman(&x, &y) - 1.0).abs() < 1e-12); +/// ``` +pub fn spearman(a: &[f64], b: &[f64]) -> f64 { + let n = a.len(); + if n < 2 || b.len() != n { + return 0.0; + } + pearson(&average_ranks(a), &average_ranks(b)) +} + +/// Cronbach's α over `k` items × `n` subjects (each inner slice is one item's +/// scores across the shared subjects; all slices must have the same length). +/// +/// α = (k / (k−1)) · (1 − Σ Var(itemᵢ) / Var(Σ itemᵢ)). Returns `0.0` for +/// `k < 2`, mismatched lengths, `n < 2`, or zero total variance. +/// +/// # Examples +/// ``` +/// use ndarray::hpc::reliability::cronbach_alpha; +/// // Two identical items ⇒ perfectly consistent ⇒ α = 1. +/// let item = [1.0, 2.0, 3.0, 4.0, 5.0]; +/// assert!((cronbach_alpha(&[&item, &item]) - 1.0).abs() < 1e-12); +/// ``` +pub fn cronbach_alpha(items: &[&[f64]]) -> f64 { + let k = items.len(); + if k < 2 { + return 0.0; + } + let n = items[0].len(); + if n < 2 || items.iter().any(|it| it.len() != n) { + return 0.0; + } + // Population variance of a slice. + let var = |xs: &[f64]| -> f64 { + let m = xs.iter().sum::() / n as f64; + xs.iter().map(|&x| (x - m) * (x - m)).sum::() / n as f64 + }; + let sum_item_var: f64 = items.iter().map(|it| var(it)).sum(); + let totals: Vec = (0..n).map(|j| items.iter().map(|it| it[j]).sum()).collect(); + let total_var = var(&totals); + if total_var < 1e-12 { + return 0.0; + } + (k as f64 / (k as f64 - 1.0)) * (1.0 - sum_item_var / total_var) +} + +/// Intraclass correlation ICC(2,1) — two-way random effects, single rater, +/// **absolute agreement** (Shrout & Fleiss 1979; McGraw & Wong 1996). +/// +/// `ratings` is `k` raters × `n` subjects (each inner slice is one rater's +/// scores across the shared subjects). For the common "ground-truth vs +/// reconstruction" case pass two slices. Unlike Pearson, this penalises a +/// constant offset between raters, so it detects systematic bias. +/// +/// Returns `0.0` for degenerate input (`k < 2`, `n < 2`, mismatched lengths, or +/// a zero denominator). +/// +/// # Examples +/// ``` +/// use ndarray::hpc::reliability::icc_a1; +/// let truth = [1.0, 2.0, 3.0, 4.0, 5.0]; +/// // Perfect agreement ⇒ ICC = 1. +/// assert!((icc_a1(&[&truth, &truth]) - 1.0).abs() < 1e-9); +/// // A constant +10 offset is perfectly *correlated* but not in *agreement*: +/// let biased: Vec = truth.iter().map(|x| x + 10.0).collect(); +/// let icc = icc_a1(&[&truth, &biased]); +/// assert!(icc < 0.5, "absolute-agreement ICC must penalise the offset, got {icc}"); +/// ``` +pub fn icc_a1(ratings: &[&[f64]]) -> f64 { + let k = ratings.len(); + if k < 2 { + return 0.0; + } + let n = ratings[0].len(); + if n < 2 || ratings.iter().any(|r| r.len() != n) { + return 0.0; + } + let (kf, nf) = (k as f64, n as f64); + let total: f64 = ratings.iter().map(|r| r.iter().sum::()).sum(); + let grand = total / (kf * nf); + + // Between-subjects (rows) sum of squares: k · Σ_s (subject_mean − grand)². + let mut ss_subj = 0.0; + for s in 0..n { + let sm = ratings.iter().map(|r| r[s]).sum::() / kf; + ss_subj += (sm - grand) * (sm - grand); + } + ss_subj *= kf; + + // Between-raters (columns) sum of squares: n · Σ_r (rater_mean − grand)². + let mut ss_rater = 0.0; + for r in ratings { + let rm = r.iter().sum::() / nf; + ss_rater += (rm - grand) * (rm - grand); + } + ss_rater *= nf; + + // Total SS, then residual by subtraction. + let mut ss_total = 0.0; + for r in ratings { + for &x in r.iter() { + ss_total += (x - grand) * (x - grand); + } + } + let ss_err = ss_total - ss_subj - ss_rater; + + let msr = ss_subj / (nf - 1.0); // mean square, subjects + let msc = ss_rater / (kf - 1.0); // mean square, raters + let mse = ss_err / ((nf - 1.0) * (kf - 1.0)); // mean square, residual + + let denom = msr + (kf - 1.0) * mse + (kf / nf) * (msc - mse); + if denom.abs() < 1e-12 { + return 0.0; + } + (msr - mse) / denom +} + +/// Bundled fidelity of a reconstruction against ground truth. +/// +/// One call computes all four coefficients plus the relative-L2 error and +/// cosine similarity, so a harness can print a row per codec/flavor without +/// recomputing means four times. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct FidelityReport { + /// Pearson product-moment correlation (linear association). + pub pearson: f64, + /// Spearman rank correlation (order preservation). + pub spearman: f64, + /// ICC(2,1) absolute agreement (bias-sensitive). + pub icc: f64, + /// Cronbach α treating {truth, estimate} as two items. + pub cronbach: f64, + /// Relative L2 error ‖estimate − truth‖ / ‖truth‖. + pub rel_l2: f64, + /// Cosine similarity between truth and estimate. + pub cosine: f64, +} + +impl FidelityReport { + /// Compute every coefficient for `(truth, estimate)` (equal-length samples). + /// + /// # Examples + /// ``` + /// use ndarray::hpc::reliability::FidelityReport; + /// let truth = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]; + /// let est = [0.1, 0.9, 2.1, 2.9, 4.2, 4.8]; + /// let r = FidelityReport::compute(&truth, &est); + /// assert!(r.pearson > 0.99 && r.spearman > 0.99); + /// assert!(r.rel_l2 < 0.1); + /// ``` + pub fn compute(truth: &[f64], estimate: &[f64]) -> Self { + let n = truth.len().min(estimate.len()); + let (t, e) = (&truth[..n], &estimate[..n]); + let mut se = 0.0; + let mut st = 0.0; + let mut dot = 0.0; + let mut ne = 0.0; + for i in 0..n { + let d = e[i] - t[i]; + se += d * d; + st += t[i] * t[i]; + dot += t[i] * e[i]; + ne += e[i] * e[i]; + } + let rel_l2 = if st > 1e-24 { se.sqrt() / st.sqrt() } else { 0.0 }; + let cosine = if st > 1e-24 && ne > 1e-24 { + dot / (st.sqrt() * ne.sqrt()) + } else { + 0.0 + }; + FidelityReport { + pearson: pearson(t, e), + spearman: spearman(t, e), + icc: icc_a1(&[t, e]), + cronbach: cronbach_alpha(&[t, e]), + rel_l2, + cosine, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn pearson_perfect_and_anti() { + let x = [1.0, 2.0, 3.0, 4.0, 5.0]; + let y: Vec = x.iter().map(|v| 3.0 * v - 2.0).collect(); + assert!((pearson(&x, &y) - 1.0).abs() < 1e-12); + let z: Vec = x.iter().rev().copied().collect(); + assert!((pearson(&x, &z) + 1.0).abs() < 1e-12); + } + + #[test] + fn pearson_degenerate_is_zero_not_nan() { + assert_eq!(pearson(&[1.0], &[2.0]), 0.0); + assert_eq!(pearson(&[1.0, 1.0, 1.0], &[2.0, 3.0, 4.0]), 0.0); + assert_eq!(pearson(&[1.0, 2.0], &[3.0, 4.0, 5.0]), 0.0); + } + + #[test] + fn spearman_monotone_nonlinear_is_one() { + let x = [1.0, 2.0, 3.0, 4.0, 5.0]; + let y = [1.0, 8.0, 27.0, 64.0, 125.0]; // x³, monotone + assert!((spearman(&x, &y) - 1.0).abs() < 1e-12); + // Pearson is strictly below 1 for the same data. + assert!(pearson(&x, &y) < 0.99); + } + + #[test] + fn spearman_handles_ties() { + // Tie group in `a`; average-rank must keep ρ well-defined (no NaN). + let a = [1.0, 1.0, 2.0, 3.0]; + let b = [10.0, 20.0, 30.0, 40.0]; + let rho = spearman(&a, &b); + assert!(rho.is_finite() && rho > 0.7); + } + + #[test] + fn cronbach_identical_items_is_one() { + let item = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]; + assert!((cronbach_alpha(&[&item, &item]) - 1.0).abs() < 1e-12); + assert!((cronbach_alpha(&[&item, &item, &item]) - 1.0).abs() < 1e-12); + } + + #[test] + fn cronbach_uncorrelated_items_is_low() { + let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]; + let b = [6.0, 1.0, 5.0, 2.0, 4.0, 3.0]; + // Poorly-consistent items → α well below the 0.7 conventional floor. + assert!(cronbach_alpha(&[&a, &b]) < 0.7); + } + + #[test] + fn icc_perfect_agreement_is_one() { + let t = [2.0, 4.0, 6.0, 8.0, 10.0, 12.0]; + assert!((icc_a1(&[&t, &t]) - 1.0).abs() < 1e-9); + } + + #[test] + fn icc_penalises_offset_pearson_does_not() { + let t = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]; + let biased: Vec = t.iter().map(|x| x + 5.0).collect(); + // Correlated perfectly... + assert!((pearson(&t, &biased) - 1.0).abs() < 1e-12); + // ...but absolute-agreement ICC is dragged down by the systematic bias. + let icc = icc_a1(&[&t, &biased]); + assert!(icc < 0.7, "icc should penalise offset, got {icc}"); + } + + #[test] + fn icc_scaled_agreement_below_perfect() { + let t = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]; + let scaled: Vec = t.iter().map(|x| x * 2.0).collect(); + let icc = icc_a1(&[&t, &scaled]); + assert!(icc.is_finite() && icc < 1.0); + } + + #[test] + fn fidelity_report_clean_reconstruction() { + let truth: Vec = (0..32).map(|i| (i as f64).sin()).collect(); + let est: Vec = truth.iter().map(|x| x + 0.001).collect(); + let r = FidelityReport::compute(&truth, &est); + assert!(r.pearson > 0.999); + assert!(r.spearman > 0.999); + assert!(r.icc > 0.99); + assert!(r.cosine > 0.999); + assert!(r.rel_l2 < 0.05); + } + + #[test] + fn fidelity_report_degrades_with_noise() { + let truth: Vec = (0..64).map(|i| ((i * 7 % 13) as f64) - 6.0).collect(); + let clean: Vec = truth.iter().map(|x| x + 0.01).collect(); + let noisy: Vec = truth + .iter() + .enumerate() + .map(|(i, x)| x + ((i % 5) as f64 - 2.0)) + .collect(); + let rc = FidelityReport::compute(&truth, &clean); + let rn = FidelityReport::compute(&truth, &noisy); + // Every coefficient should rank the clean reconstruction above the noisy. + assert!(rc.pearson > rn.pearson); + assert!(rc.icc > rn.icc); + assert!(rc.rel_l2 < rn.rel_l2); + } +} From 83be7c35f6146dd1e28870b5145465b6f4b9a37d Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 10:16:50 +0000 Subject: [PATCH 02/13] =?UTF-8?q?feat(hpc):=20entropy=5Fladder=20=E2=80=94?= =?UTF-8?q?=20Staunen=E2=86=94Wisdom=20coordinate=20over=20NARS=20truth=20?= =?UTF-8?q?+=20Pearl-2=C2=B3=20SPO?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unifying foundation for the SPO rung ladder: a fact's position on the cognitive ladder IS an entropy level. Reads a CausalEdge64's EXISTING fields (3×palette-256 S/P/O indices + Pearl 2³ mask + packed NARS f/c) — NO re-quantization (the operator's "the 3×palette256 SPO inside CausalEdge64 is exact enough"). - nars_entropy(f,c) = 1 − c·|2f−1| (Staunen high ↔ Wisdom low crystalline). - EntropyRung {Syntax, Semantics, Pragmatics} banded high→low entropy. - Quadrant {Staunen, Confusion, Boredom, Wisdom} from entropy×energy (board canon; energy supplied by the caller, e.g. MailboxSoA.energy). - PEARL_SUBSETS (2³) + decompose_spo → SpoLadderPoint{entropy, rung, active, basin_key (HHTL-routable, inactive components zeroed), class}. - entropy_class(h) → 2-bit reliability class for CausalEdge64 v2 spare bits [63:61]. Validated as a reliability proxy: ρ(entropy, empirical prediction accuracy) = −0.78 over a synthetic NARS population (test gates ρ < −0.5), grounded via hpc::reliability. examples/entropy_ladder_probe prints rung/quadrant partition + Pearl-2³ SPO decomposition. 7 unit + 5 doctests; clippy -D warnings clean. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- Cargo.toml | 4 + examples/entropy_ladder_probe.rs | 99 +++++++++++ src/hpc/entropy_ladder.rs | 296 +++++++++++++++++++++++++++++++ src/hpc/mod.rs | 2 + 4 files changed, 401 insertions(+) create mode 100644 examples/entropy_ladder_probe.rs create mode 100644 src/hpc/entropy_ladder.rs diff --git a/Cargo.toml b/Cargo.toml index 460f127d..f98a3304 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,10 @@ required-features = ["std"] name = "edge_codec_compare" required-features = ["std"] +[[example]] +name = "entropy_ladder_probe" +required-features = ["std"] + [dependencies] num-integer = { workspace = true } num-traits = { workspace = true } diff --git a/examples/entropy_ladder_probe.rs b/examples/entropy_ladder_probe.rs new file mode 100644 index 00000000..f0ec1853 --- /dev/null +++ b/examples/entropy_ladder_probe.rs @@ -0,0 +1,99 @@ +//! Entropy-ladder probe — Staunen↔Wisdom over a synthetic NARS edge population. +//! +//! Shows the ladder coordinate doing real work: +//! * the syntax/semantics/pragmatics rung partition, +//! * the entropy×energy quadrant partition (Staunen/Confusion/Boredom/Wisdom), +//! * the validation number — Spearman ρ(entropy, empirical accuracy) — proving +//! entropy is a reliability proxy (more negative = stronger), and +//! * Pearl-2³ SPO decomposition over the 3×palette-256 indices. +//! +//! cargo run --release --example entropy_ladder_probe --features std + +use ndarray::hpc::entropy_ladder::{decompose_spo, entropy_class, nars_entropy, EntropyRung, Quadrant}; +use ndarray::hpc::reliability::spearman; + +fn splitmix(s: &mut u64) -> f64 { + *s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^= z >> 31; + (z >> 11) as f64 / (1u64 << 53) as f64 +} + +fn main() { + println!("== Entropy ladder: Staunen↔Wisdom over a synthetic NARS edge population ==\n"); + + let mut s = 0x5EED_1ADD_u64.wrapping_add(0xA11CE); + let n = 20_000usize; + let max_obs = 48u32; + + let mut entropies = Vec::with_capacity(n); + let mut accuracies = Vec::with_capacity(n); + let mut rungs = [0usize; 3]; // Syntax, Semantics, Pragmatics + let mut quads = [0usize; 4]; // Staunen, Confusion, Boredom, Wisdom + let mut classes = [0usize; 4]; // 3-spare-bit reliability classes + + for _ in 0..n { + let p = splitmix(&mut s); // true Bernoulli rate (ground truth) + let n_obs = 1 + (splitmix(&mut s) * (max_obs - 1) as f64) as u32; + let pos = (0..n_obs).filter(|_| splitmix(&mut s) < p).count() as f64; + let f = pos / n_obs as f64; + let c = n_obs as f64 / (n_obs as f64 + 1.0); // NARS horizon-1 confidence + let energy = n_obs as f64 / max_obs as f64; // observation effort = energy proxy + + let h = nars_entropy(f, c); + let e = c * (f - 0.5) + 0.5; + let predict_true = e > 0.5; + let m = 64; + let hits = (0..m) + .filter(|_| (splitmix(&mut s) < p) == predict_true) + .count() as f64; + + entropies.push(h); + accuracies.push(hits / m as f64); + rungs[EntropyRung::from_entropy(h) as usize] += 1; + quads[Quadrant::classify(h, energy) as usize] += 1; + classes[entropy_class(h) as usize] += 1; + } + + let rho = spearman(&entropies, &accuracies); + let pct = |x: usize| 100.0 * x as f64 / n as f64; + + println!("VALIDATION ρ(entropy, empirical accuracy) = {rho:.4} (more negative ⇒ stronger reliability proxy)\n"); + + println!("Rung partition (syntax = high entropy / stimulus → pragmatics = low entropy / fact):"); + println!(" Syntax {:6.1}% ({} edges)", pct(rungs[0]), rungs[0]); + println!(" Semantics {:6.1}% ({} edges)", pct(rungs[1]), rungs[1]); + println!(" Pragmatics {:6.1}% ({} edges)", pct(rungs[2]), rungs[2]); + + println!("\nQuadrant partition (entropy × energy, energy = observation effort):"); + println!(" Staunen (hi-H, lo-E) {:6.1}%", pct(quads[0])); + println!(" Confusion (hi-H, hi-E) {:6.1}%", pct(quads[1])); + println!(" Boredom (lo-H, lo-E) {:6.1}%", pct(quads[2])); + println!(" Wisdom (lo-H, hi-E) {:6.1}%", pct(quads[3])); + + println!("\n2-bit reliability class for CausalEdge64 spare bits [63:61]:"); + println!( + " class 0 (Wisdom) {:5.1}% | 1 {:5.1}% | 2 {:5.1}% | 3 (Staunen) {:5.1}%", + pct(classes[0]), + pct(classes[1]), + pct(classes[2]), + pct(classes[3]) + ); + + println!("\nPearl-2³ SPO decomposition (3×palette-256 indices inside CausalEdge64, no re-quant):"); + let demo: [(u8, u8, u8, u8, f64, f64, &str); 4] = [ + (10, 20, 30, 0b111, 0.97, 0.95, "SPO crystalline fact"), + (10, 20, 30, 0b101, 0.55, 0.20, "S_O fragment, raw stimulus"), + (44, 88, 0, 0b011, 0.90, 0.80, "SP settled, O absent"), + (7, 0, 0, 0b001, 0.50, 0.50, "S only, maximally ambiguous"), + ]; + for (si, pi, oi, mask, f, c, label) in demo { + let pt = decompose_spo(si, pi, oi, mask, f, c); + println!( + " {label:<28} active={:?} H={:.3} {:<11?} basin_key=0x{:08X} class={}", + pt.active, pt.entropy, pt.rung, pt.basin_key, pt.class + ); + } +} diff --git a/src/hpc/entropy_ladder.rs b/src/hpc/entropy_ladder.rs new file mode 100644 index 00000000..4c8be821 --- /dev/null +++ b/src/hpc/entropy_ladder.rs @@ -0,0 +1,296 @@ +//! Entropy ladder — the Staunen↔Wisdom coordinate that unifies NARS truth, the +//! 3×palette-256 SPO already inside a `CausalEdge64`, and the +//! syntax→semantics→pragmatics rungs. +//! +//! The organizing idea (operator framing, board canon): a fact's position on the +//! cognitive ladder IS an entropy level. +//! +//! * **Staunen** — high entropy — raw *stimulus*, not yet crystallized. Maps to +//! **syntax** (surface form: many ways to say it). +//! * **Wisdom** — low entropy — *crystalline* knowledge / settled fact. Maps to +//! **pragmatics** (one settled intent in context). +//! * **Semantics** sits between. +//! +//! Crucially this reads a `CausalEdge64`'s EXISTING fields — the three +//! palette-256 indices (`s_idx`/`p_idx`/`o_idx`), the Pearl 2³ causal mask, and +//! the packed NARS `(f, c)` — so there is NO re-quantization (the operator's +//! "the 3×palette256 SPO values inside CausalEdge64 are exact enough"). +//! +//! The two axes (board's entropy×energy quadrant): **entropy** here, **energy** +//! supplied by the caller (e.g. `MailboxSoA.energy`). Together they classify the +//! [`Quadrant`] (Staunen / Confusion / Boredom / Wisdom). +//! +//! Validation: [`nars_entropy`] is a *reliability proxy* — it anti-correlates +//! with an edge's empirical prediction accuracy (see the module tests and the +//! `entropy_ladder_probe` example), grounded via [`crate::hpc::reliability`]. + +/// Staunen↔Wisdom entropy of a NARS truth value `(f, c)`, in `[0, 1]`. +/// +/// `entropy = 1 − c·|2f − 1|` = one minus the *decisiveness* `|2·E − 1|` of the +/// NARS expectation `E = c·(f − 0.5) + 0.5`. +/// +/// * `0` (Wisdom): confident AND polarized — `c→1` and `f→0` or `f→1`. +/// * `1` (Staunen): unconfident (`c→0`) OR maximally ambiguous (`f→0.5`). +/// +/// Inputs are clamped to `[0, 1]`. +/// +/// # Examples +/// ``` +/// use ndarray::hpc::entropy_ladder::nars_entropy; +/// assert!((nars_entropy(1.0, 1.0)).abs() < 1e-12); // certain-true → Wisdom +/// assert!((nars_entropy(0.0, 1.0)).abs() < 1e-12); // certain-false → Wisdom +/// assert!((nars_entropy(0.5, 1.0) - 1.0).abs() < 1e-12); // ambiguous → Staunen +/// assert!((nars_entropy(1.0, 0.0) - 1.0).abs() < 1e-12); // unconfident → Staunen +/// ``` +#[inline] +pub fn nars_entropy(f: f64, c: f64) -> f64 { + let f = f.clamp(0.0, 1.0); + let c = c.clamp(0.0, 1.0); + (1.0 - c * (2.0 * f - 1.0).abs()).clamp(0.0, 1.0) +} + +/// The three linguistic rungs, read as an entropy ladder (high → low entropy). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum EntropyRung { + /// High entropy — raw surface form / stimulus (Staunen end). + Syntax = 0, + /// Mid entropy — meaning. + Semantics = 1, + /// Low entropy — settled intent / fact (Wisdom end). + Pragmatics = 2, +} + +impl EntropyRung { + /// Band an entropy value into a rung: `[0,⅓) → Pragmatics`, `[⅓,⅔) → + /// Semantics`, `[⅔,1] → Syntax`. + /// + /// # Examples + /// ``` + /// use ndarray::hpc::entropy_ladder::EntropyRung; + /// assert_eq!(EntropyRung::from_entropy(0.9), EntropyRung::Syntax); + /// assert_eq!(EntropyRung::from_entropy(0.5), EntropyRung::Semantics); + /// assert_eq!(EntropyRung::from_entropy(0.1), EntropyRung::Pragmatics); + /// ``` + #[inline] + pub fn from_entropy(h: f64) -> Self { + if h >= 2.0 / 3.0 { + EntropyRung::Syntax + } else if h >= 1.0 / 3.0 { + EntropyRung::Semantics + } else { + EntropyRung::Pragmatics + } + } +} + +/// The entropy×energy quadrant (board canon, `LATEST_STATE` Staunen/Wisdom). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum Quadrant { + /// High entropy × low energy — stimulus that "needs entropy work". + Staunen = 0, + /// High entropy × high energy — in-progress climb (energy invested, + /// entropy not yet collapsed). + Confusion = 1, + /// Low entropy × low energy — ordered but not energised. + Boredom = 2, + /// Low entropy × high energy — crystalline knowledge + plasticity. + Wisdom = 3, +} + +impl Quadrant { + /// Classify by `(entropy, energy)`, both in `[0, 1]`, split at `0.5`. + /// + /// # Examples + /// ``` + /// use ndarray::hpc::entropy_ladder::Quadrant; + /// assert_eq!(Quadrant::classify(0.9, 0.1), Quadrant::Staunen); + /// assert_eq!(Quadrant::classify(0.1, 0.9), Quadrant::Wisdom); + /// assert_eq!(Quadrant::classify(0.9, 0.9), Quadrant::Confusion); + /// assert_eq!(Quadrant::classify(0.1, 0.1), Quadrant::Boredom); + /// ``` + #[inline] + pub fn classify(entropy: f64, energy: f64) -> Self { + match (entropy >= 0.5, energy >= 0.5) { + (true, false) => Quadrant::Staunen, + (true, true) => Quadrant::Confusion, + (false, false) => Quadrant::Boredom, + (false, true) => Quadrant::Wisdom, + } + } +} + +/// Pearl 2³ — the eight `(S, P, O)` active-subset masks, indexed by the +/// 3-bit causal mask (`bit0=S, bit1=P, bit2=O`). Index 0 = `∅`, index 7 = `SPO`. +pub const PEARL_SUBSETS: [(bool, bool, bool); 8] = [ + (false, false, false), + (true, false, false), + (false, true, false), + (true, true, false), + (false, false, true), + (true, false, true), + (false, true, true), + (true, true, true), +]; + +/// One decomposed SPO ladder point, built from a `CausalEdge64`'s existing +/// fields with no re-quantization. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct SpoLadderPoint { + /// Staunen↔Wisdom entropy from the edge's NARS `(f, c)`. + pub entropy: f64, + /// The entropy rung (syntax/semantics/pragmatics). + pub rung: EntropyRung, + /// `(S, P, O)` active flags from the Pearl 2³ mask. + pub active: (bool, bool, bool), + /// HHTL-routable basin sub-key: `mask<<24 | s<<16 | p<<8 | o`, with inactive + /// components zeroed. The active palette bytes ARE the basin coordinate that + /// HHTL / helix / edge-residue route on downstream. + pub basin_key: u32, + /// 2-bit reliability class (0 = Wisdom … 3 = Staunen) for the 3 spare bits + /// of `CausalEdge64` v2 (store via the consumer; this crate only computes it). + pub class: u8, +} + +/// Decompose an edge's existing fields into an [`SpoLadderPoint`]. +/// +/// `mask` is the 3-bit Pearl 2³ causal mask (`bit0=S, bit1=P, bit2=O`); only the +/// active palette indices contribute to `basin_key`. No re-quantization — the +/// palette indices are taken as-is. +/// +/// # Examples +/// ``` +/// use ndarray::hpc::entropy_ladder::{decompose_spo, EntropyRung}; +/// // All three active (mask 0b111), crystalline truth (f=0.95, c=0.9). +/// let p = decompose_spo(10, 20, 30, 0b111, 0.95, 0.9); +/// assert_eq!(p.active, (true, true, true)); +/// assert_eq!(p.basin_key, (0b111 << 24) | (10 << 16) | (20 << 8) | 30); +/// assert_eq!(p.rung, EntropyRung::Pragmatics); // low entropy → settled +/// ``` +pub fn decompose_spo(s_idx: u8, p_idx: u8, o_idx: u8, mask: u8, f: f64, c: f64) -> SpoLadderPoint { + let m = (mask & 0b111) as usize; + let (sa, pa, oa) = PEARL_SUBSETS[m]; + let s = if sa { s_idx } else { 0 }; + let p = if pa { p_idx } else { 0 }; + let o = if oa { o_idx } else { 0 }; + let basin_key = ((m as u32) << 24) | ((s as u32) << 16) | ((p as u32) << 8) | (o as u32); + let entropy = nars_entropy(f, c); + SpoLadderPoint { + entropy, + rung: EntropyRung::from_entropy(entropy), + active: (sa, pa, oa), + basin_key, + class: entropy_class(entropy), + } +} + +/// Quantize entropy into a 2-bit reliability class for the 3 spare bits of +/// `CausalEdge64` v2 (`[63:61]`): `0 = Wisdom` (most reliable) … `3 = Staunen` +/// (least). Reserve-don't-reclaim: the consumer writes this version-gated. +/// +/// # Examples +/// ``` +/// use ndarray::hpc::entropy_ladder::entropy_class; +/// assert_eq!(entropy_class(0.0), 0); // Wisdom +/// assert_eq!(entropy_class(1.0), 3); // Staunen +/// ``` +#[inline] +pub fn entropy_class(h: f64) -> u8 { + let h = h.clamp(0.0, 1.0); + ((h * 4.0) as u8).min(3) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::hpc::reliability::spearman; + + fn splitmix(s: &mut u64) -> f64 { + *s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^= z >> 31; + (z >> 11) as f64 / (1u64 << 53) as f64 + } + + #[test] + fn entropy_corners() { + assert!(nars_entropy(1.0, 1.0) < 1e-12); + assert!(nars_entropy(0.0, 1.0) < 1e-12); + assert!((nars_entropy(0.5, 1.0) - 1.0).abs() < 1e-12); + assert!((nars_entropy(1.0, 0.0) - 1.0).abs() < 1e-12); + } + + #[test] + fn entropy_monotone_in_confidence_and_polarization() { + // More confidence on a polarized belief ⇒ lower entropy. + assert!(nars_entropy(0.9, 0.9) < nars_entropy(0.9, 0.3)); + // More polarization at fixed confidence ⇒ lower entropy. + assert!(nars_entropy(0.95, 0.8) < nars_entropy(0.6, 0.8)); + } + + #[test] + fn rung_bands() { + assert_eq!(EntropyRung::from_entropy(0.95), EntropyRung::Syntax); + assert_eq!(EntropyRung::from_entropy(0.50), EntropyRung::Semantics); + assert_eq!(EntropyRung::from_entropy(0.05), EntropyRung::Pragmatics); + } + + #[test] + fn quadrant_four_corners() { + assert_eq!(Quadrant::classify(0.8, 0.2), Quadrant::Staunen); + assert_eq!(Quadrant::classify(0.8, 0.8), Quadrant::Confusion); + assert_eq!(Quadrant::classify(0.2, 0.2), Quadrant::Boredom); + assert_eq!(Quadrant::classify(0.2, 0.8), Quadrant::Wisdom); + } + + #[test] + fn decompose_zeroes_inactive_components() { + // Only P active (mask 0b010): s_idx and o_idx must not leak into the key. + let p = decompose_spo(11, 22, 33, 0b010, 0.5, 0.5); + assert_eq!(p.active, (false, true, false)); + assert_eq!(p.basin_key, (0b010 << 24) | (22 << 8)); + } + + #[test] + fn entropy_class_spans_four() { + assert_eq!(entropy_class(0.0), 0); + assert_eq!(entropy_class(0.3), 1); + assert_eq!(entropy_class(0.6), 2); + assert_eq!(entropy_class(0.99), 3); + } + + /// Validation: entropy is a reliability proxy. Build a population of edges + /// whose belief `(f, c)` is estimated from `n_obs` Bernoulli(p) draws, then + /// measure each edge's empirical prediction accuracy against fresh draws. + /// `nars_entropy` must ANTI-correlate with accuracy (low entropy ⇒ the edge's + /// belief reliably matches reality). Grounded via the reliability suite's + /// Spearman ρ — this is the non-circular gate for the ladder coordinate. + #[test] + fn entropy_anticorrelates_with_prediction_accuracy() { + let mut s = 0xC0FFEE_u64; + let n = 3000usize; + let mut entropies = Vec::with_capacity(n); + let mut accuracies = Vec::with_capacity(n); + for _ in 0..n { + let p = splitmix(&mut s); // true Bernoulli rate (ground truth) + let n_obs = 1 + (splitmix(&mut s) * 48.0) as u32; // 1..=48 observations + let pos = (0..n_obs).filter(|_| splitmix(&mut s) < p).count() as f64; + let f = pos / n_obs as f64; + let c = n_obs as f64 / (n_obs as f64 + 1.0); // NARS horizon-1 confidence + let e = c * (f - 0.5) + 0.5; // expectation → MAP prediction + let predict_true = e > 0.5; + // Empirical accuracy vs 64 fresh outcomes from the true rate p. + let m = 64; + let hits = (0..m) + .filter(|_| (splitmix(&mut s) < p) == predict_true) + .count() as f64; + entropies.push(nars_entropy(f, c)); + accuracies.push(hits / m as f64); + } + let rho = spearman(&entropies, &accuracies); + assert!(rho < -0.5, "entropy must anti-correlate with empirical accuracy (reliability proxy), got ρ={rho}"); + } +} diff --git a/src/hpc/mod.rs b/src/hpc/mod.rs index 9fa3d44f..1c47654f 100644 --- a/src/hpc/mod.rs +++ b/src/hpc/mod.rs @@ -27,6 +27,8 @@ pub mod reductions; pub mod statistics; /// Reliability & validity statistics: Pearson r, Spearman ρ, Cronbach α, ICC. pub mod reliability; +/// Entropy ladder: Staunen↔Wisdom coordinate over NARS truth + Pearl-2³ SPO. +pub mod entropy_ladder; pub mod activations; pub mod hdc; // Bitwise SIMD primitives — graduated to crate root. Back-compat re-export. From 77ac709fc655fa26fc8fc579c2a0675ea727b4f5 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 10:34:23 +0000 Subject: [PATCH 03/13] =?UTF-8?q?test(hpc):=20MTMM=20instrument=20probe=20?= =?UTF-8?q?=E2=80=94=20syntax=3Dangle=20=E2=8A=A5=20semantics=3Dresidue=20?= =?UTF-8?q?=E2=8A=A5=20basin=3Dphase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Treats the syntax/semantics/episodic → angle/residue/phase reframing as a multitrait-multimethod hypothesis and tests it with the reliability suite (Spearman + ICC), rather than assuming it. Result over a synthetic population with independent latent factors: - convergent: syn↔angle ρ=1.000, sem↔residue ρ=1.000 - discriminant: off-diagonal ρ≈0.015 — angle ⊥ residue (genuinely orthogonal) - phase independent of content (ρ≈0); golden phase 32× better min-gap than random - RELIABILITY: angle ICC=0.998, phase=1.0, but residue ICC=0.471 (FAIL @ noise 0.18) The probe earns its keep by falsifying one cell: location-residue is a small-signal measure (deviation a−centroid, norm ~0–1.5) vs angle on full vectors (norm ~6), so √D L2 noise swamps it — empirical I-NOISE-FLOOR-JIRAK. Next: directional/per-dim residue + Belichtungsmesser σ-gate to recover ICC. Synthetic instrument-shape test only (orthogonality/reliability/separation); the linguistic mapping still needs a real-text (deepnsm/COCA) probe. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- Cargo.toml | 4 + examples/instrument_mtmm_probe.rs | 211 ++++++++++++++++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 examples/instrument_mtmm_probe.rs diff --git a/Cargo.toml b/Cargo.toml index f98a3304..ad7dc2fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,6 +77,10 @@ required-features = ["std"] name = "entropy_ladder_probe" required-features = ["std"] +[[example]] +name = "instrument_mtmm_probe" +required-features = ["std"] + [dependencies] num-integer = { workspace = true } num-traits = { workspace = true } diff --git a/examples/instrument_mtmm_probe.rs b/examples/instrument_mtmm_probe.rs new file mode 100644 index 00000000..c6d44888 --- /dev/null +++ b/examples/instrument_mtmm_probe.rs @@ -0,0 +1,211 @@ +//! Measurement-instrument hypothesis probe (MTMM) — syntax=angle, semantics= +//! location-residue, episodic-basin=phase. Tests the THREE as an instrument, not +//! as assumed fact, via the reliability suite (Spearman + ICC). +//! +//! The hypothesis: these are three ORTHOGONAL measurement axes. The probe builds +//! a synthetic population where three latent factors vary independently — `syn` +//! (a pairwise relation = the angle imposed between s and o), `sem` (a semantic +//! location = residue magnitude from a centroid), and `epi` (an episode index → +//! golden phase) — then measures each axis from the fingerprints and asks the +//! reliability suite four multitrait-multimethod questions: +//! +//! - CONVERGENT: does each measure track its own factor? (diagonal high) +//! - DISCRIMINANT: does it stay flat on the others? (off-diagonal ~0) +//! - RELIABLE: is it stable under observation noise? (ICC test-retest) +//! - PHASE SEPARATES: does the golden phase spread episodes? (anti-collapse) +//! +//! A clean positive = the reframing is a real instrument. Any failed cell = the +//! operationalization conflates and must be corrected (not assumed). +//! +//! cargo run --release --example instrument_mtmm_probe --features std + +use ndarray::hpc::reliability::{icc_a1, spearman}; + +const D: usize = 24; +const K: usize = 8; + +fn splitmix(s: &mut u64) -> f64 { + *s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^= z >> 31; + (z >> 11) as f64 / (1u64 << 53) as f64 +} + +fn randn(s: &mut u64) -> f64 { + // Box-Muller. + let u1 = splitmix(s).max(1e-12); + let u2 = splitmix(s); + (-2.0 * u1.ln()).sqrt() * (std::f64::consts::TAU * u2).cos() +} + +fn dot(a: &[f64], b: &[f64]) -> f64 { + a.iter().zip(b).map(|(x, y)| x * y).sum() +} +fn norm(a: &[f64]) -> f64 { + dot(a, a).sqrt() +} +fn unit(s: &mut u64) -> Vec { + let v: Vec = (0..D).map(|_| randn(s)).collect(); + let n = norm(&v).max(1e-12); + v.iter().map(|x| x / n).collect() +} + +fn angle_between(a: &[f64], b: &[f64]) -> f64 { + (dot(a, b) / (norm(a) * norm(b)).max(1e-12)) + .clamp(-1.0, 1.0) + .acos() +} + +fn residue_to_nearest(v: &[f64], centroids: &[Vec]) -> f64 { + centroids + .iter() + .map(|c| { + v.iter() + .zip(c) + .map(|(x, y)| (x - y) * (x - y)) + .sum::() + .sqrt() + }) + .fold(f64::INFINITY, f64::min) +} + +/// Min circular gap of phases in [0, 2π) — the basin-separation metric. +fn min_circular_gap(phases: &[f64]) -> f64 { + let mut p: Vec = phases.to_vec(); + p.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mut min = f64::INFINITY; + for i in 0..p.len() { + let gap = if i + 1 < p.len() { + p[i + 1] - p[i] + } else { + p[0] + std::f64::consts::TAU - p[i] + }; + min = min.min(gap); + } + min +} + +fn main() { + println!("== Measurement-instrument MTMM probe: syntax=angle · semantics=residue · basin=phase ==\n"); + let mut s = 0x5EED_4A2B_u64; + let golden = std::f64::consts::PI * (3.0 - 5.0_f64.sqrt()); + + // Well-separated semantic centroids (spacing >> residue range R so the + // nearest-centroid stays the generating one — keeps residue = a clean + // location measure). + let centroids: Vec> = (0..K) + .map(|_| unit(&mut s).iter().map(|x| x * 6.0).collect()) + .collect(); + let r_max = 1.5; + let noise = 0.18; // observation noise for the test-retest reliability leg + + let n = 6000usize; + let (mut f_syn, mut f_sem, mut f_epi) = (vec![], vec![], vec![]); + let (mut m_angle, mut m_res, mut m_phase) = (vec![], vec![], vec![]); + let (mut m_angle2, mut m_res2) = (vec![], vec![]); // noisy retest + let mut golden_phase = vec![]; + let mut random_phase = vec![]; + + for i in 0..n { + let c = (splitmix(&mut s) * K as f64) as usize % K; + let sem = splitmix(&mut s) * r_max; // semantic residue magnitude + let syn = 0.15 + splitmix(&mut s) * (std::f64::consts::PI - 0.30); // relation angle + let epi = i as f64; + + // s_fp = centroid + sem·dir (its residue from the nearest centroid = sem). + let dir = unit(&mut s); + let s_fp: Vec = centroids[c] + .iter() + .zip(&dir) + .map(|(cc, d)| cc + sem * d) + .collect(); + // o_fp imposes EXACTLY angle `syn` to s_fp, independent of s_fp's location: + // o = cos(syn)·s + sin(syn)·|s|·perp, perp ⊥ s. + let rnd = unit(&mut s); + let proj = dot(&rnd, &s_fp) / dot(&s_fp, &s_fp).max(1e-12); + let mut perp: Vec = rnd.iter().zip(&s_fp).map(|(r, sf)| r - proj * sf).collect(); + let pn = norm(&perp).max(1e-12); + for x in perp.iter_mut() { + *x /= pn; + } + let sn = norm(&s_fp); + let o_fp: Vec = s_fp + .iter() + .zip(&perp) + .map(|(sf, pp)| syn.cos() * sf + syn.sin() * sn * pp) + .collect(); + + // Clean measures. + m_angle.push(angle_between(&s_fp, &o_fp)); + m_res.push(residue_to_nearest(&s_fp, ¢roids)); + let ph = (epi * golden).rem_euclid(std::f64::consts::TAU); + m_phase.push(ph); + + // Noisy retest (observation noise on both fingerprints). + let s2: Vec = s_fp.iter().map(|x| x + noise * randn(&mut s)).collect(); + let o2: Vec = o_fp.iter().map(|x| x + noise * randn(&mut s)).collect(); + m_angle2.push(angle_between(&s2, &o2)); + m_res2.push(residue_to_nearest(&s2, ¢roids)); + + f_syn.push(syn); + f_sem.push(sem); + f_epi.push(epi); + golden_phase.push(ph); + random_phase.push(splitmix(&mut s) * std::f64::consts::TAU); + } + + // ── MTMM Spearman matrix (factor × measure) ── + let sa = spearman(&f_syn, &m_angle); + let sr = spearman(&f_syn, &m_res); + let ea = spearman(&f_sem, &m_angle); + let er = spearman(&f_sem, &m_res); + let pa = spearman(&f_epi, &m_angle); + let pr = spearman(&f_epi, &m_res); + + println!("MTMM Spearman ρ (factor ↓ × measure →): [convergent=diagonal, discriminant=off-diagonal]"); + println!(" angle residue"); + println!(" syn (relation) {sa:+.3} {sr:+.3}"); + println!(" sem (location) {ea:+.3} {er:+.3}"); + println!(" epi (episode) {pa:+.3} {pr:+.3}"); + + // Phase independence + reliability + separation. + let ph_syn = spearman(&f_syn, &m_phase); + let ph_sem = spearman(&f_sem, &m_phase); + let icc_angle = icc_a1(&[&m_angle, &m_angle2]); + let icc_res = icc_a1(&[&m_res, &m_res2]); + let prefix = 64usize; + let g_gap = min_circular_gap(&golden_phase[..prefix]); + let r_gap = min_circular_gap(&random_phase[..prefix]); + + println!("\nphase independence: ρ(syn,phase)={ph_syn:+.3} ρ(sem,phase)={ph_sem:+.3} (≈0 expected)"); + println!("reliability (ICC test-retest @ noise {noise}): angle={icc_angle:.3} residue={icc_res:.3} phase=1.000 (deterministic)"); + println!( + "phase separation (min circular gap, first {prefix}): golden={g_gap:.4} random={r_gap:.4} ({:.1}× better)", + g_gap / r_gap.max(1e-9) + ); + + // ── Verdict ── + let convergent = sa >= 0.9 && er >= 0.9; + let discriminant = ea.abs() <= 0.2 && sr.abs() <= 0.2; + let phase_indep = ph_syn.abs() <= 0.2 && ph_sem.abs() <= 0.2; + let reliable = icc_angle >= 0.7 && icc_res >= 0.7; + let separates = g_gap > 1.5 * r_gap; + let mark = |b: bool| if b { "PASS" } else { "FAIL" }; + println!("\nVERDICT:"); + println!(" convergent (each measure tracks its factor) ......... {}", mark(convergent)); + println!(" discriminant (axes do not leak into each other) ..... {}", mark(discriminant)); + println!(" phase independent of content .........................{}", mark(phase_indep)); + println!(" reliable under noise (ICC ≥ 0.7) .................... {}", mark(reliable)); + println!(" golden phase separates episodes ..................... {}", mark(separates)); + let all = convergent && discriminant && phase_indep && reliable && separates; + println!( + "\n ⇒ instrument {}", + if all { + "VALIDATED — syntax=angle ⊥ semantics=residue ⊥ basin=phase is a real 3-axis basis" + } else { + "NEEDS CORRECTION — at least one axis conflates; see failed cells" + } + ); +} From aa235b70f074b8cbe38ca90e314e6e0347be15d3 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 10:44:20 +0000 Subject: [PATCH 04/13] =?UTF-8?q?test(hpc):=20CAKES=20grail=20probe=20?= =?UTF-8?q?=E2=80=94=20measure=20CLAM-accelerated=20search=20vs=20brute=20?= =?UTF-8?q?on=20the=20instrument?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "Can we measure CLAM vs CAKES?" — yes. Runs real ndarray CLAM search (knn_repeated_rho, knn_dfs_sieve) vs an independent brute Hamming k-NN and puts both on the reliability instrument (recall / Spearman order / ICC distance / speedup). Measured (N=4000, 128-bit, 32 clusters, k=10, 300 queries): - exact by DISTANCE: both 1.000 (ρ=ICC=1.000 confirm identical returned distances) - accelerated: dfs-sieve 2.33×; repeated-ρ 0.74× (SLOWER than brute on tight Hamming clusters — its radius schedule overshoots) Two findings: (1) k-NN recall by INDEX undercounts on tie-heavy integer (Hamming) distances (idx-recall 0.79) — distance-recall is the correct exactness metric (1.000); (2) the instrument adjudicates between CAKES algorithms — dfs-sieve is the right one here, repeated-ρ is exact-but-mistuned. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- Cargo.toml | 4 + examples/cakes_grail_probe.rs | 151 ++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 examples/cakes_grail_probe.rs diff --git a/Cargo.toml b/Cargo.toml index ad7dc2fc..dfb9d343 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,6 +81,10 @@ required-features = ["std"] name = "instrument_mtmm_probe" required-features = ["std"] +[[example]] +name = "cakes_grail_probe" +required-features = ["std"] + [dependencies] num-integer = { workspace = true } num-traits = { workspace = true } diff --git a/examples/cakes_grail_probe.rs b/examples/cakes_grail_probe.rs new file mode 100644 index 00000000..aa3d8966 --- /dev/null +++ b/examples/cakes_grail_probe.rs @@ -0,0 +1,151 @@ +//! CAKES grail probe — measure CLAM-accelerated search vs brute ground truth +//! with the reliability instrument. "Can we measure CLAM vs CAKES?" +//! +//! The spec (reference images) claims CAKES is **metric-safe-exact** (triangle +//! inequality ⇒ zero false negatives) AND **accelerated** (clusters pruned). This +//! probe puts both claims on the instrument: +//! * recall@k — CAKES hits vs brute hits (exactness; expect 1.000) +//! * Spearman ρ — returned distance order vs brute order (rank fidelity) +//! * ICC(2,1) — absolute agreement of distances vs brute (metric-safety) +//! * speedup — brute distance-calls / CAKES distance-calls (the win) +//! +//! Two CAKES algorithms are measured (CLAM/CHESS Algorithms): repeated-ρ and +//! DFS-sieve. Brute is an inline exhaustive Hamming scan (independent truth). +//! +//! cargo run --release --example cakes_grail_probe --features std + +use ndarray::hpc::clam::ClamTree; +use ndarray::hpc::clam_search::{knn_dfs_sieve, knn_repeated_rho}; +use ndarray::hpc::reliability::{icc_a1, spearman}; + +fn splitmix(s: &mut u64) -> u64 { + *s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^ (z >> 31) +} + +fn hamming(a: &[u8], b: &[u8]) -> u64 { + a.iter() + .zip(b) + .map(|(x, y)| (x ^ y).count_ones() as u64) + .sum() +} + +/// Independent ground-truth k-NN by exhaustive Hamming scan. +fn brute_knn(data: &[u8], vec_len: usize, query: &[u8], k: usize) -> Vec<(usize, u64)> { + let n = data.len() / vec_len; + let mut all: Vec<(usize, u64)> = (0..n) + .map(|i| (i, hamming(query, &data[i * vec_len..(i + 1) * vec_len]))) + .collect(); + all.sort_by_key(|&(_, d)| d); + all.truncate(k); + all +} + +fn main() { + println!("== CAKES grail probe: CLAM-accelerated search vs brute, on the reliability instrument ==\n"); + + let vec_len = 16usize; // 128-bit fingerprints + let n = 4000usize; + let n_centers = 32usize; + let k = 10usize; + let n_queries = 300usize; + let mut s = 0xCAFE_5EED_u64; + + // Clustered binary data: each point = a random center XOR ~12 sparse flips, + // so the CLAM tree has real structure to prune (uniform data would not). + let centers: Vec = (0..n_centers * vec_len) + .map(|_| (splitmix(&mut s) & 0xFF) as u8) + .collect(); + let mut data = vec![0u8; n * vec_len]; + for i in 0..n { + let c = (splitmix(&mut s) as usize) % n_centers; + data[i * vec_len..(i + 1) * vec_len].copy_from_slice(¢ers[c * vec_len..(c + 1) * vec_len]); + for _ in 0..12 { + let bit = (splitmix(&mut s) as usize) % (vec_len * 8); + data[i * vec_len + bit / 8] ^= 1 << (bit % 8); + } + } + + let tree = ClamTree::build(&data, vec_len, 1); + + // Accumulators per algorithm: (recall_sum, dist_calls, truth_d, algo_d). + let mut rep = (0.0f64, 0usize, Vec::new(), Vec::new()); + let mut dfs = (0.0f64, 0usize, Vec::new(), Vec::new()); + let mut brute_calls = 0usize; + + for _ in 0..n_queries { + // Query = a random center XOR a few flips (a realistic near-cluster probe). + let c = (splitmix(&mut s) as usize) % n_centers; + let mut q = centers[c * vec_len..(c + 1) * vec_len].to_vec(); + for _ in 0..8 { + let bit = (splitmix(&mut s) as usize) % (vec_len * 8); + q[bit / 8] ^= 1 << (bit % 8); + } + + let truth = brute_knn(&data, vec_len, &q, k); + brute_calls += n; + let truth_set: std::collections::HashSet = truth.iter().map(|&(i, _)| i).collect(); + + for (algo, acc) in [ + (knn_repeated_rho(&tree, &data, vec_len, &q, k), &mut rep), + (knn_dfs_sieve(&tree, &data, vec_len, &q, k), &mut dfs), + ] { + let hit = algo + .hits + .iter() + .filter(|&&(i, _)| truth_set.contains(&i)) + .count(); + acc.0 += hit as f64 / k as f64; + acc.1 += algo.distance_calls; + // Rank-aligned distance pairs (both sorted ascending) for ICC/ρ. + for (j, &(_, d)) in algo.hits.iter().enumerate() { + if j < truth.len() { + acc.2.push(truth[j].1 as f64); + acc.3.push(d as f64); + } + } + } + } + + let report = |name: &str, acc: &(f64, usize, Vec, Vec)| { + let idx_recall = acc.0 / n_queries as f64; + let pairs = acc.2.len().max(1); + // Tie-correct exactness: at each rank the returned distance equals truth's + // (ties make INDEX recall undercount; DISTANCE recall is the real metric). + let dist_recall = acc + .2 + .iter() + .zip(&acc.3) + .filter(|(t, a)| (*t - *a).abs() < 0.5) + .count() as f64 + / pairs as f64; + let icc = icc_a1(&[&acc.2, &acc.3]); + let rho = spearman(&acc.2, &acc.3); + let speedup = brute_calls as f64 / acc.1.max(1) as f64; + println!( + " {name:<14} idx-recall@{k} {idx_recall:.4} dist-recall {dist_recall:.4} ρ {rho:.4} ICC {icc:.4} speedup {speedup:.2}×", + ); + (dist_recall, icc, rho, speedup) + }; + + println!( + "CAKES vs brute ground truth (N={n}, dim={}b, {n_centers} clusters, k={k}, {n_queries} queries):", + vec_len * 8 + ); + let (d1, i1, p1, s1) = report("repeated-ρ", &rep); + let (d2, i2, p2, s2) = report("dfs-sieve", &dfs); + println!(" note: idx-recall < 1 is a TIE artifact (integer Hamming) — dist-recall is the true exactness metric."); + + let exact = d1 > 0.999 && d2 > 0.999; + let metric_safe = i1 > 0.999 && i2 > 0.999 && p1 > 0.999 && p2 > 0.999; + let mark = |b: bool| if b { "PASS" } else { "FAIL" }; + println!("\nVERDICT:"); + println!(" exact by distance (dist-recall = 1.000) ............ {}", mark(exact)); + println!(" metric-safe (ICC & ρ vs brute = 1.000) ............. {}", mark(metric_safe)); + println!(" accelerated: dfs-sieve {s2:.2}× {} · repeated-ρ {s1:.2}× {}", mark(s2 > 1.0), mark(s1 > 1.0)); + println!("\n ⇒ instrument adjudicates: dfs-sieve IS the CAKES algorithm here (exact + metric-safe + {s2:.1}×);"); + println!(" repeated-ρ is exact + metric-safe but NOT accelerated on tight-cluster Hamming (radius schedule mistuned)."); +} From 966b07a1dfbb12cebcfbe74e67a2e5be88896f85 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 10:57:58 +0000 Subject: [PATCH 05/13] =?UTF-8?q?fix(hpc):=20address=20#218=20review=20?= =?UTF-8?q?=E2=80=94=20fidelity=20length-guard=20+=20edge=5Fcodec=20exampl?= =?UTF-8?q?es=20&=20decode=20asserts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - reliability: FidelityReport::compute returns a DEGENERATE report (zeros, rel_l2=1.0) on length mismatch instead of silently truncating to the shorter prefix (codex P2 + coderabbit Major) — consistent with the module's return-degenerate-not-panic convention; a dropped tail can't read as high fidelity. - edge_codec: added runnable /// examples to the 6 remaining public methods (Codebook::assign/centroid, CoarseResidueCodec::encode/reconstruct, ProductQuantizer::encode/reconstruct) per all-public-APIs-need-examples. - edge_codec: both reconstruct() decoders now assert the packed-code length (dim/2, m/2) so malformed input fails with a clear message instead of an out-of-bounds panic deep in unpack (coderabbit Major). edge_codec doctests 4→10; reliability 5; unit tests green; clippy -D warnings clean. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- src/hpc/edge_codec.rs | 59 ++++++++++++++++++++++++++++++++++++++++-- src/hpc/reliability.rs | 29 ++++++++++++++++++--- 2 files changed, 83 insertions(+), 5 deletions(-) diff --git a/src/hpc/edge_codec.rs b/src/hpc/edge_codec.rs index 665275c5..11e935d5 100644 --- a/src/hpc/edge_codec.rs +++ b/src/hpc/edge_codec.rs @@ -130,6 +130,14 @@ impl Codebook { /// Index of the nearest centroid to `v` (scalar; identical result to the /// AMX `matmul_i8_to_i32` assignment path). Panics if `v.len() != dim`. + /// + /// # Examples + /// ``` + /// use ndarray::hpc::edge_codec::Codebook; + /// let data = [0.0, 0.0, 9.0, 9.0]; // 2 points × dim 2 + /// let cb = Codebook::train(&data, 2, 2, 2, 5, 1); + /// assert!(cb.assign(&[9.0, 9.0]) < cb.k as u32); + /// ``` #[inline] pub fn assign(&self, v: &[f32]) -> u32 { assert_eq!(v.len(), self.dim); @@ -146,6 +154,14 @@ impl Codebook { } /// Borrow centroid `c` as a slice. + /// + /// # Examples + /// ``` + /// use ndarray::hpc::edge_codec::Codebook; + /// let data = [0.0, 0.0, 9.0, 9.0]; + /// let cb = Codebook::train(&data, 2, 2, 2, 5, 1); + /// assert_eq!(cb.centroid(0).len(), 2); + /// ``` #[inline] pub fn centroid(&self, c: usize) -> &[f32] { &self.centroids[c * self.dim..(c + 1) * self.dim] @@ -245,6 +261,15 @@ impl CoarseResidueCodec { } /// Encode one vector to coarse index + packed residue nibbles. + /// + /// # Examples + /// ``` + /// use ndarray::hpc::edge_codec::CoarseResidueCodec; + /// let data: Vec = (0..8 * 4).map(|i| (i % 5) as f32 - 2.0).collect(); + /// let codec = CoarseResidueCodec::fit(&data, 8, 4, 4, 6, 1); + /// let code = codec.encode(&data[0..4]); + /// assert_eq!(code.residue.len(), 2); // dim/2 packed bytes + /// ``` pub fn encode(&self, v: &[f32]) -> CoarseResidueCode { let dim = self.cb.dim; let index = self.cb.assign(v); @@ -261,9 +286,20 @@ impl CoarseResidueCodec { } } - /// Reconstruct `centroid + dequantized residue`. + /// Reconstruct `centroid + dequantized residue`. Panics if `code.residue` is + /// not the expected `dim/2` packed bytes (guards malformed decode input). + /// + /// # Examples + /// ``` + /// use ndarray::hpc::edge_codec::CoarseResidueCodec; + /// let data: Vec = (0..8 * 4).map(|i| (i % 5) as f32 - 2.0).collect(); + /// let codec = CoarseResidueCodec::fit(&data, 8, 4, 4, 6, 1); + /// let v = codec.reconstruct(&codec.encode(&data[0..4])); + /// assert_eq!(v.len(), 4); + /// ``` pub fn reconstruct(&self, code: &CoarseResidueCode) -> Vec { let dim = self.cb.dim; + assert_eq!(code.residue.len(), dim / 2, "residue must be dim/2 packed bytes"); let c = self.cb.centroid(code.index as usize); let res = unpack_nibbles_signed(&code.residue, dim); (0..dim) @@ -320,6 +356,14 @@ impl ProductQuantizer { } /// Encode one vector to `m/2` packed nibble bytes (16 bytes when `m = 32`). + /// + /// # Examples + /// ``` + /// use ndarray::hpc::edge_codec::ProductQuantizer; + /// let data: Vec = (0..32 * 8).map(|i| ((i * 7 % 11) as f32) - 5.0).collect(); + /// let pq = ProductQuantizer::fit(&data, 32, 8, 4, 6, 1); + /// assert_eq!(pq.encode(&data[0..8]).len(), 2); // m/2 bytes + /// ``` pub fn encode(&self, v: &[f32]) -> Vec { let codes: Vec = (0..self.m) .map(|s| { @@ -334,8 +378,19 @@ impl ProductQuantizer { .collect() } - /// Reconstruct by concatenating the selected sub-centroids. + /// Reconstruct by concatenating the selected sub-centroids. Panics if `code` + /// is not the expected `m/2` packed bytes (guards malformed decode input). + /// + /// # Examples + /// ``` + /// use ndarray::hpc::edge_codec::ProductQuantizer; + /// let data: Vec = (0..32 * 8).map(|i| ((i * 7 % 11) as f32) - 5.0).collect(); + /// let pq = ProductQuantizer::fit(&data, 32, 8, 4, 6, 1); + /// let v = pq.reconstruct(&pq.encode(&data[0..8])); + /// assert_eq!(v.len(), 8); + /// ``` pub fn reconstruct(&self, code: &[u8]) -> Vec { + assert_eq!(code.len(), self.m / 2, "code must be m/2 packed bytes"); let mut out = vec![0.0f32; self.m * self.sub_dim]; for s in 0..self.m { let byte = code[s / 2]; diff --git a/src/hpc/reliability.rs b/src/hpc/reliability.rs index 98506b4d..56adf785 100644 --- a/src/hpc/reliability.rs +++ b/src/hpc/reliability.rs @@ -232,7 +232,13 @@ pub struct FidelityReport { } impl FidelityReport { - /// Compute every coefficient for `(truth, estimate)` (equal-length samples). + /// Compute every coefficient for `(truth, estimate)`. + /// + /// Mismatched lengths are a caller error and yield a **degenerate** report + /// (all coefficients `0.0`, `rel_l2 = 1.0`) rather than silently truncating to + /// the shorter prefix — consistent with the module's return-degenerate (never + /// panic, never hide) convention, so a dropped tail cannot masquerade as high + /// fidelity. /// /// # Examples /// ``` @@ -242,10 +248,27 @@ impl FidelityReport { /// let r = FidelityReport::compute(&truth, &est); /// assert!(r.pearson > 0.99 && r.spearman > 0.99); /// assert!(r.rel_l2 < 0.1); + /// // Mismatched lengths → degenerate (does NOT truncate-then-score): + /// let bad = FidelityReport::compute(&[1.0, 2.0, 100.0], &[1.0, 2.0]); + /// assert_eq!(bad.pearson, 0.0); + /// assert_eq!(bad.rel_l2, 1.0); /// ``` pub fn compute(truth: &[f64], estimate: &[f64]) -> Self { - let n = truth.len().min(estimate.len()); - let (t, e) = (&truth[..n], &estimate[..n]); + // Length mismatch is a caller bug: return a degenerate (all-zero, + // max-error) report rather than truncating to the shorter prefix, which + // could hide a dropped tail behind a falsely-high score. + if truth.len() != estimate.len() { + return FidelityReport { + pearson: 0.0, + spearman: 0.0, + icc: 0.0, + cronbach: 0.0, + rel_l2: 1.0, + cosine: 0.0, + }; + } + let n = truth.len(); + let (t, e) = (truth, estimate); let mut se = 0.0; let mut st = 0.0; let mut dot = 0.0; From 5a34b0d00297b71b2a98deeb703bac75c17dc511 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 10:57:58 +0000 Subject: [PATCH 06/13] =?UTF-8?q?test(hpc):=20helix=20=E2=8A=A5=20HHTL=20p?= =?UTF-8?q?robe=20=E2=80=94=20PLACE=20+=20RADIAL=20+=20HELIX(direction)=20?= =?UTF-8?q?on=20the=20instrument?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests the 16-byte spec's load-bearing claim (PLACE/HHTL ⊥ RESIDUE/HELIX) via a spherical decomposition of points around their cluster centroid, encoding the residue direction to the 256-sample golden-spiral hemisphere palette + a 1-bit sign. Measured: orthogonal (ρ(place,helix)=−0.001, ρ(radial,helix)=−0.013); adds info (centroid+radial+helix cuts reconstruction error 13.6× at 4.2° angular quant); golden separation 28× > random; reliable (ICC 0.88 under noise). All PASS. Finding: helix is a HEMISPHERE code (axis, ±v equivalent). A full SIGNED direction needs the palette index PLUS a 1-bit sign — without it reconstruction fails for half the sphere (fine_err ≈ coarse_err). The sign bit is the difference between adds-info FAIL and 13.6× PASS. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- Cargo.toml | 4 + examples/helix_orthogonality_probe.rs | 232 ++++++++++++++++++++++++++ 2 files changed, 236 insertions(+) create mode 100644 examples/helix_orthogonality_probe.rs diff --git a/Cargo.toml b/Cargo.toml index dfb9d343..863d91f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,6 +85,10 @@ required-features = ["std"] name = "cakes_grail_probe" required-features = ["std"] +[[example]] +name = "helix_orthogonality_probe" +required-features = ["std"] + [dependencies] num-integer = { workspace = true } num-traits = { workspace = true } diff --git a/examples/helix_orthogonality_probe.rs b/examples/helix_orthogonality_probe.rs new file mode 100644 index 00000000..0e320825 --- /dev/null +++ b/examples/helix_orthogonality_probe.rs @@ -0,0 +1,232 @@ +//! Helix ⊥ HHTL probe — the load-bearing claim of the 16-byte spec: +//! `PLACE (HHTL) + RESIDUE (HELIX) = REALITY`, with the two declared ORTHOGONAL. +//! +//! Tests a spherical/polar decomposition of a point around its cluster centroid: +//! PLACE = which centroid (the HHTL semantic basin); RADIAL = ‖point − centroid‖ +//! (depth from the basin); HELIX = the residue direction, encoded to the nearest +//! of 256 golden-spiral hemisphere samples (the spec's PALETTE256) + a 1-bit sign. +//! +//! Four questions on the reliability instrument: +//! +//! - ORTHOGONAL: is the helix direction independent of PLACE and RADIAL? (ρ≈0) +//! - ADDS INFO: does centroid+radial+helix reconstruct far better than centroid alone? +//! - SEPARATES: do golden hemisphere samples spread better than random? (min gap) +//! - RELIABLE: is the encoded direction stable under observation noise? (ICC) +//! +//! Finding (measured): helix is a HEMISPHERE code (axis, ±v equivalent). A full +//! SIGNED direction needs the 256-sample index PLUS a 1-bit sign — included here; +//! WITHOUT it, reconstruction fails for half the sphere (fine_err ≈ coarse_err). +//! +//! cargo run --release --example helix_orthogonality_probe --features std + +use ndarray::hpc::reliability::{icc_a1, spearman}; + +fn splitmix(s: &mut u64) -> f64 { + *s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^= z >> 31; + (z >> 11) as f64 / (1u64 << 53) as f64 +} + +type V3 = [f64; 3]; +fn dot(a: V3, b: V3) -> f64 { + a[0] * b[0] + a[1] * b[1] + a[2] * b[2] +} +fn sub(a: V3, b: V3) -> V3 { + [a[0] - b[0], a[1] - b[1], a[2] - b[2]] +} +fn norm(a: V3) -> f64 { + dot(a, a).sqrt() +} +fn unit(a: V3) -> V3 { + let n = norm(a).max(1e-12); + [a[0] / n, a[1] / n, a[2] / n] +} +fn rand_unit(s: &mut u64) -> V3 { + // Marsaglia: uniform on the sphere. + loop { + let x = 2.0 * splitmix(s) - 1.0; + let y = 2.0 * splitmix(s) - 1.0; + let r2 = x * x + y * y; + if r2 < 1.0 && r2 > 1e-9 { + let f = 2.0 * (1.0 - r2).sqrt(); + return [x * f, y * f, 1.0 - 2.0 * r2]; + } + } +} +/// Fold to the upper hemisphere (helix encodes an axis, not a signed vector). +fn fold(v: V3) -> V3 { + if v[2] < 0.0 { + [-v[0], -v[1], -v[2]] + } else { + v + } +} + +/// The 256 golden-spiral hemisphere sample directions (the spec's PALETTE256). +fn golden_hemisphere(n: usize) -> Vec { + let gamma = std::f64::consts::PI * (3.0 - 5.0_f64.sqrt()); // golden angle + (0..n) + .map(|i| { + // θ = ½·arccos(1 − 2(i+0.5)/N) → equal-area on the hemisphere. + let theta = 0.5 + * (1.0 - 2.0 * (i as f64 + 0.5) / n as f64) + .clamp(-1.0, 1.0) + .acos(); + let phi = (i as f64 * gamma).rem_euclid(std::f64::consts::TAU); + [theta.sin() * phi.cos(), theta.sin() * phi.sin(), theta.cos()] + }) + .collect() +} + +/// Encode a hemisphere direction to the nearest golden sample index. +fn encode(dir: V3, palette: &[V3]) -> usize { + let mut best = -2.0; + let mut bi = 0; + for (i, &p) in palette.iter().enumerate() { + let d = dot(dir, p); + if d > best { + best = d; + bi = i; + } + } + bi +} + +fn min_angular_gap(dirs: &[V3]) -> f64 { + let mut min = std::f64::consts::PI; + for i in 0..dirs.len() { + for j in (i + 1)..dirs.len() { + let ang = dot(dirs[i], dirs[j]).clamp(-1.0, 1.0).acos(); + if ang < min { + min = ang; + } + } + } + min +} + +fn main() { + println!("== Helix ⊥ HHTL probe: PLACE + RADIAL + HELIX(direction) spherical decomposition ==\n"); + let mut s = 0x4E11_5EED_u64; + let palette = golden_hemisphere(256); + + let k = 16usize; // PLACE: number of cluster centroids + let centroids: Vec = (0..k) + .map(|_| { + [ + 6.0 * (2.0 * splitmix(&mut s) - 1.0), + 6.0 * (2.0 * splitmix(&mut s) - 1.0), + 6.0 * (2.0 * splitmix(&mut s) - 1.0), + ] + }) + .collect(); + let r_max = 2.0; + let noise = 0.10; + let n = 6000usize; + + let (mut f_place, mut f_radial, mut m_helix) = (vec![], vec![], vec![]); + let mut coarse_err = 0.0; + let mut fine_err = 0.0; + let (mut dir_clean, mut dir_noisy) = (vec![], vec![]); + let mut ang_quant_err = 0.0; + + for _ in 0..n { + let c = (splitmix(&mut s) * k as f64) as usize % k; + let radial = splitmix(&mut s) * r_max; + let orient = rand_unit(&mut s); // INDEPENDENT of place & radial by construction + let point = [ + centroids[c][0] + radial * orient[0], + centroids[c][1] + radial * orient[1], + centroids[c][2] + radial * orient[2], + ]; + + // Decode the spherical code from the point: hemisphere index + 1-bit sign. + let residue = sub(point, centroids[c]); + let rmag = norm(residue); + let ud = unit(residue); + let sign_z = if ud[2] < 0.0 { -1.0 } else { 1.0 }; // the 1-bit sign + let dir = fold(ud); // hemisphere (z ≥ 0) + let hidx = encode(dir, &palette); + let h = palette[hidx]; + let recon_dir = [sign_z * h[0], sign_z * h[1], sign_z * h[2]]; // re-apply sign + + // Reconstruction: centroid (coarse) vs centroid + radial·helix-dir (fine). + let recon = [ + centroids[c][0] + rmag * recon_dir[0], + centroids[c][1] + rmag * recon_dir[1], + centroids[c][2] + rmag * recon_dir[2], + ]; + coarse_err += rmag; // ‖point − centroid‖ + fine_err += norm(sub(point, recon)); + ang_quant_err += dot(ud, recon_dir).clamp(-1.0, 1.0).acos(); + + // Reliability: re-encode under observation noise. + let pn = [ + point[0] + noise * (2.0 * splitmix(&mut s) - 1.0), + point[1] + noise * (2.0 * splitmix(&mut s) - 1.0), + point[2] + noise * (2.0 * splitmix(&mut s) - 1.0), + ]; + let ud2 = unit(sub(pn, centroids[c])); + let sign2 = if ud2[2] < 0.0 { -1.0 } else { 1.0 }; + let h2 = palette[encode(fold(ud2), &palette)]; + let recon_dir2 = [sign2 * h2[0], sign2 * h2[1], sign2 * h2[2]]; + for t in 0..3 { + dir_clean.push(recon_dir[t]); + dir_noisy.push(recon_dir2[t]); + } + + f_place.push(c as f64); + f_radial.push(radial); + m_helix.push(hidx as f64); + } + + // Orthogonality: helix index vs PLACE and vs RADIAL (expect ρ≈0). + let rho_place = spearman(&f_place, &m_helix); + let rho_radial = spearman(&f_radial, &m_helix); + let coarse_rel = coarse_err / n as f64; + let fine_rel = fine_err / n as f64; + let icc_dir = icc_a1(&[&dir_clean, &dir_noisy]); + let g_gap = min_angular_gap(&palette); + let rand_dirs: Vec = (0..256).map(|_| fold(rand_unit(&mut s))).collect(); + let r_gap = min_angular_gap(&rand_dirs); + + println!("orthogonality (helix direction vs the other two axes):"); + println!(" ρ(PLACE, helix) = {rho_place:+.3} ρ(RADIAL, helix) = {rho_radial:+.3} (≈0 ⇒ orthogonal)"); + println!("\nadds info (mean reconstruction error to the true point):"); + println!(" coarse PLACE-only {coarse_rel:.4}"); + println!( + " + RADIAL + HELIX(dir) {fine_rel:.4} ({:.1}× lower; mean angular quant {:.1}°)", + coarse_rel / fine_rel.max(1e-9), + (ang_quant_err / n as f64).to_degrees() + ); + println!( + "\nseparation (min angular gap of 256 samples): golden={:.4} rad random={:.4} rad ({:.1}× better)", + g_gap, + r_gap, + g_gap / r_gap.max(1e-9) + ); + println!("reliability (ICC of reconstructed direction @ noise {noise}): {icc_dir:.3}"); + + let orthogonal = rho_place.abs() <= 0.1 && rho_radial.abs() <= 0.1; + let adds_info = fine_rel < 0.5 * coarse_rel; + let separates = g_gap > 1.5 * r_gap; + let reliable = icc_dir >= 0.7; + let mark = |b: bool| if b { "PASS" } else { "FAIL" }; + println!("\nVERDICT:"); + println!(" orthogonal (helix ⊥ place, ⊥ radial) ............... {}", mark(orthogonal)); + println!(" adds info (helix recovers the residual direction) .. {}", mark(adds_info)); + println!(" golden separation > random ......................... {}", mark(separates)); + println!(" reliable under noise (ICC ≥ 0.7) ................... {}", mark(reliable)); + let all = orthogonal && adds_info && separates && reliable; + println!( + "\n ⇒ {}", + if all { + "PLACE ⊥ RADIAL ⊥ HELIX confirmed — helix is a real orthogonal residue axis on top of HHTL (spec claim holds)" + } else { + "spec claim PARTIAL — see failed cells" + } + ); +} From 3c6cd2b746c0b2fe45fd2a012350b11a0c42c597 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 11:06:13 +0000 Subject: [PATCH 07/13] docs(hpc): record entropy_ladder layering decision (validate in ndarray, migrate later) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator decision on the PR #218 coderabbit layering thread: keep entropy_ladder in ndarray::hpc for the validation phase, migrate to the lance-graph thinking layer once the ladder stabilizes. Module header now states the rationale (no thinking/protocol crate dependency — pure fns on (u8,f64)+enums, decompose_spo takes raw indices not a CausalEdge64; sits beside existing hpc::nars and hpc::causal_diff) and the planned migration. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- src/hpc/entropy_ladder.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/hpc/entropy_ladder.rs b/src/hpc/entropy_ladder.rs index 4c8be821..f27c5692 100644 --- a/src/hpc/entropy_ladder.rs +++ b/src/hpc/entropy_ladder.rs @@ -23,6 +23,15 @@ //! Validation: [`nars_entropy`] is a *reliability proxy* — it anti-correlates //! with an edge's empirical prediction accuracy (see the module tests and the //! `entropy_ladder_probe` example), grounded via [`crate::hpc::reliability`]. +//! +//! ## Layering (PR #218 review — operator decision 2026-06-14) +//! This module carries cognition vocabulary (Staunen/Wisdom, SPO rungs) but has +//! NO dependency on the thinking/protocol crates: it is pure functions on +//! `(u8, f64)` + plain enums (`decompose_spo` takes raw palette indices, never a +//! `CausalEdge64`). It is VALIDATED here during the instrument phase, beside the +//! existing `hpc::nars` (NarsTruth) and `hpc::causal_diff` (CausalEdge64). +//! **Planned migration to the lance-graph thinking layer once the ladder +//! stabilizes** — tracked via the open coderabbit layering thread on PR #218. /// Staunen↔Wisdom entropy of a NARS truth value `(f, c)`, in `[0, 1]`. /// From f6d4789293c9aba2ccdfe73d58ddbb8526d0490b Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 11:14:15 +0000 Subject: [PATCH 08/13] =?UTF-8?q?test(hpc):=20helix=20bit-depth=20sweep=20?= =?UTF-8?q?=E2=80=94=208/16/24/32/48-bit=20direction=20fidelity?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Measures golden-spiral hemisphere quantization error vs palette size to settle the bit-depth question. 8/16-bit enumerated + measured; the equal-area law err≈c/√N (fit on 8-bit) predicts 16-bit within ~3% → validated → extrapolates 24/32/48-bit. Result (mean angular error): - 8-bit (256, 1B): 4.39° — routing / HHTL buckets only - 16-bit (65k, 2B): 0.283° — matches bf16 source precision (0.22°) - 24-bit (3B): 0.017° — below f16 (0.056°) ⇒ LOSSLESS for ≤f16 ← max fidelity / one place - 32-bit (4B): 0.001° — diminishing returns (below any ≤f16 source) - 48-bit (6B): 4e-6° — ≈ f32 floor; single-direction OVERKILL Confirms the original helix recommendation: 24-bit = max fidelity for ONE PLACE (a single direction); 48-bit (6B = CAM-PQ/splat budget) is for SPATIAL ATTENTION — an oriented anisotropic region (2 axes ≈ 2×24-bit, a Gaussian-splat Σ), not a point. 16-bit = bf16-economical, 8-bit = routing. (+1 sign bit for a full signed direction.) https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- Cargo.toml | 4 + examples/helix_bitdepth_probe.rs | 157 +++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 examples/helix_bitdepth_probe.rs diff --git a/Cargo.toml b/Cargo.toml index 863d91f2..6f488eea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,6 +89,10 @@ required-features = ["std"] name = "helix_orthogonality_probe" required-features = ["std"] +[[example]] +name = "helix_bitdepth_probe" +required-features = ["std"] + [dependencies] num-integer = { workspace = true } num-traits = { workspace = true } diff --git a/examples/helix_bitdepth_probe.rs b/examples/helix_bitdepth_probe.rs new file mode 100644 index 00000000..256e1c46 --- /dev/null +++ b/examples/helix_bitdepth_probe.rs @@ -0,0 +1,157 @@ +//! Helix bit-depth sweep — 8 / 16 / 24 / 32-bit golden-spiral direction fidelity. +//! +//! Answers "which bit depth for max fidelity?" (the original helix session's +//! 24-bit recommendation). The helix direction code is an index into a 2^b-sample +//! golden-spiral hemisphere palette; angular resolution scales ~ 1/√N. +//! +//! Also contrasts ONE-PLACE encoding (a single direction, 8..32-bit) with +//! SPATIAL-ATTENTION encoding (an oriented anisotropic patch / splat covariance, +//! ≈48-bit = 2 axes = 6 bytes = the CAM-PQ budget). +//! +//! 8-bit (256) and 16-bit (65 536) are MEASURED by full nearest-search over the +//! palette. 24/32-bit (2^24..2^32 samples) cannot be enumerated, so they are +//! EXTRAPOLATED via the equal-area spacing law `err ≈ c/√N`, with `c` fit from +//! 8-bit and VALIDATED against the measured 16-bit point. Results are compared to +//! source-precision floors (bf16/f16/f32 unit-vector angular resolution) to find +//! where extra bits stop buying fidelity. +//! +//! cargo run --release --example helix_bitdepth_probe --features std + +fn splitmix(s: &mut u64) -> f64 { + *s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^= z >> 31; + (z >> 11) as f64 / (1u64 << 53) as f64 +} + +type V3 = [f64; 3]; +fn dot(a: V3, b: V3) -> f64 { + a[0] * b[0] + a[1] * b[1] + a[2] * b[2] +} +fn rand_hemi(s: &mut u64) -> V3 { + loop { + let x = 2.0 * splitmix(s) - 1.0; + let y = 2.0 * splitmix(s) - 1.0; + let r2 = x * x + y * y; + if r2 < 1.0 && r2 > 1e-9 { + let f = 2.0 * (1.0 - r2).sqrt(); + let z = 1.0 - 2.0 * r2; + return [x * f, y * f, z.abs()]; // fold to z ≥ 0 (hemisphere) + } + } +} + +/// The n golden-spiral hemisphere sample directions. +fn golden_hemisphere(n: usize) -> Vec { + let gamma = std::f64::consts::PI * (3.0 - 5.0_f64.sqrt()); + (0..n) + .map(|i| { + let theta = 0.5 + * (1.0 - 2.0 * (i as f64 + 0.5) / n as f64) + .clamp(-1.0, 1.0) + .acos(); + let phi = (i as f64 * gamma).rem_euclid(std::f64::consts::TAU); + [theta.sin() * phi.cos(), theta.sin() * phi.sin(), theta.cos()] + }) + .collect() +} + +/// Mean & max nearest-sample angular error (radians) over `m` random directions. +fn measure(bits: u32, m: usize, seed: u64) -> (f64, f64) { + let palette = golden_hemisphere(1usize << bits); + let mut s = seed; + let mut sum = 0.0; + let mut max = 0.0f64; + for _ in 0..m { + let v = rand_hemi(&mut s); + let mut best = -2.0; + for &p in &palette { + let d = dot(v, p); + if d > best { + best = d; + } + } + let ang = best.clamp(-1.0, 1.0).acos(); + sum += ang; + max = max.max(ang); + } + (sum / m as f64, max) +} + +fn main() { + println!("== Helix bit-depth sweep: golden-spiral direction fidelity (8/16/24/32-bit) ==\n"); + + // Measured (enumerable palettes). + let (m8, x8) = measure(8, 4000, 0x8888); + let (m16, x16) = measure(16, 2000, 0x1616); + + // Equal-area spacing law err ≈ c/√N, fit on 8-bit, validated on 16-bit. + let c = m8 * (256.0_f64).sqrt(); + let pred16 = c / (65536.0_f64).sqrt(); + let law_err = ((pred16 - m16) / m16).abs() * 100.0; + let extrap = |bits: u32| c / (2.0_f64.powi(bits as i32)).sqrt(); + let m24 = extrap(24); + let m32 = extrap(32); + let m48 = extrap(48); + + let deg = |r: f64| r.to_degrees(); + println!( + "law check: c/√N fit on 8-bit predicts 16-bit = {:.4}° vs measured {:.4}° ({law_err:.1}% off) ⇒ law holds\n", + deg(pred16), + deg(m16) + ); + + println!(" bits samples bytes mean err max err source it's lossless for"); + println!(" ---- ----------- ----- ---------- ---------- -----------------------"); + println!( + " 8 256 1 B {:>7.3}° {:>7.3}° routing/buckets only [measured]", + deg(m8), + deg(x8) + ); + println!( + " 16 65 536 2 B {:>7.3}° {:>7.3}° ~bf16 directions (0.22°) [measured]", + deg(m16), + deg(x16) + ); + println!( + " 24 16 777 216 3 B {:>7.4}° — ~f16 directions (0.056°) [extrapolated]", + deg(m24) + ); + println!( + " 32 4.29e9 4 B {:>7.4}° — far below any ≤f16 source [extrapolated]", + deg(m32) + ); + println!( + " 48 2.81e14 6 B {:>9.2e}° — ≈f32 floor — single-dir OVERKILL [extrapolated]", + deg(m48) + ); + + // Source-precision angular floors (unit-vector mantissa resolution). + let bf16 = (2.0_f64).powi(-8); + let f16 = (2.0_f64).powi(-10); + let f32 = (2.0_f64).powi(-23); + println!("\nsource-precision floors (a direction can't be known finer than this):"); + println!(" bf16 ≈ {:.3}° f16 ≈ {:.4}° f32 ≈ {:.2e}°", deg(bf16), deg(f16), deg(f32)); + + println!("\none place (a single direction):"); + println!(" • 8-bit ({:.2}°): coarse — routing / HHTL buckets, NOT reconstruction.", deg(m8)); + println!(" • 16-bit ({:.3}°): economical 2-byte path; matches bf16 source precision.", deg(m16)); + println!( + " • 24-bit ({:.4}°): below f16 precision ({:.4}°) ⇒ effectively LOSSLESS for any", + deg(m24), + deg(f16) + ); + println!(" realistic (≤f16) direction. ← MAX USEFUL FIDELITY for one place."); + println!(" • 32-bit ({:.4}°): below every ≤f16 source's own precision ⇒ diminishing returns.", deg(m32)); + println!("\nspatial attention (an oriented anisotropic region, NOT a point):"); + println!(" • 48-bit = 6 B = the CAM-PQ / splat budget. A single direction at 48-bit is pure"); + println!(" overkill ({:.2e}°, ≈ the f32 floor). Its real use is encoding a REGION: an oriented", deg(m48)); + println!(" anisotropic patch = 2 axes (principal + secondary ⊥) ≈ 2×24-bit — a Gaussian-splat"); + println!(" Σ / attention ellipse (orientation + anisotropy). A spread needs the frame;"); + println!(" 'one place' (a point) needs only 24-bit."); + println!("\n ⇒ 24-bit = max fidelity per direction (one place); 48-bit (6 B) = spatial attention"); + println!(" (oriented anisotropic region = 2 axes = the CAM-PQ/splat budget); 16-bit = bf16-"); + println!(" economical; 8-bit = routing. (+1 sign bit for a full SIGNED direction.)"); +} From 40bb42e7379ce53fc93537b6f0dcebdb33dc017f Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 11:19:53 +0000 Subject: [PATCH 09/13] =?UTF-8?q?test(hpc):=20morton-pyramid=20perturbatio?= =?UTF-8?q?n=20probe=20=E2=80=94=20one=2048-bit=20frame=20drives=20the=20w?= =?UTF-8?q?hole=20pyramid?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests whether a single 48-bit spatial-attention frame (principal 24-bit helix + 24-bit spatial gradient) earns its keep by reconstructing a coherent perturbation field over a Morton quadtree on-demand. Measured (256² finest, 8 levels): - amortization: 48 bits / 65536 cells = 0.0007 bits/cell — 32768× vs per-cell 24-bit - on-demand reconstruction exact at EVERY level (max 0.046°, the 24-bit quant floor; analytic, never materialized) - coherent-field fidelity 0.014° (quant floor); non-parametric jitter is the residual Two honest limits surfaced: (a) naive 2×2 mean-pool loses coherence at COARSE levels (7.7° at the root) because averaging rotating unit vectors isn't orientation-preserving — use on-demand eval (free) or an orientation-aware pool; (b) per-cell jitter needs its own per-cell code. ⇒ 48-bit earns its keep for COHERENT spatial-attention perturbation over a pyramid; it is a parametric field code, not a per-cell store. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- Cargo.toml | 4 + examples/morton_perturbation_probe.rs | 177 ++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 examples/morton_perturbation_probe.rs diff --git a/Cargo.toml b/Cargo.toml index 6f488eea..db5fda4a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -93,6 +93,10 @@ required-features = ["std"] name = "helix_bitdepth_probe" required-features = ["std"] +[[example]] +name = "morton_perturbation_probe" +required-features = ["std"] + [dependencies] num-integer = { workspace = true } num-traits = { workspace = true } diff --git a/examples/morton_perturbation_probe.rs b/examples/morton_perturbation_probe.rs new file mode 100644 index 00000000..117d1bc4 --- /dev/null +++ b/examples/morton_perturbation_probe.rs @@ -0,0 +1,177 @@ +//! Morton-pyramid perturbation probe — does a single 48-bit spatial-attention +//! frame earn its keep by driving a WHOLE Morton tile pyramid? +//! +//! Hypothesis: 48-bit is wasted on one point but pays off as the parametric code +//! of a COHERENT perturbation field over a Morton quadtree — principal direction +//! (24-bit helix) + spatial gradient (24-bit) → an orientation that rotates across +//! the tile. Then 48 bits reconstruct every cell of the pyramid on-demand. +//! +//! Three questions on the instrument: +//! 1. AMORTIZATION — 48 bits / N cells vs per-cell 24-bit storage. +//! 2. FIDELITY — angular error of the 48-bit-coded field vs the target, split +//! into the coherent part (captured) and a non-parametric jitter (residual). +//! 3. SCALE-COHERENCE — is the field consistent under 2×2 down-aggregation +//! (coarse cell ≈ mean of its 4 children) at every pyramid level? (the +//! Chapman-Kolmogorov / cascade consistency that makes it "non-materialized"). +//! +//! cargo run --release --example morton_perturbation_probe --features std + +fn splitmix(s: &mut u64) -> f64 { + *s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^= z >> 31; + (z >> 11) as f64 / (1u64 << 53) as f64 +} + +type V3 = [f64; 3]; +fn dot(a: V3, b: V3) -> f64 { + a[0] * b[0] + a[1] * b[1] + a[2] * b[2] +} +fn unit(a: V3) -> V3 { + let n = dot(a, a).sqrt().max(1e-12); + [a[0] / n, a[1] / n, a[2] / n] +} +fn angle(a: V3, b: V3) -> f64 { + dot(a, b).clamp(-1.0, 1.0).acos() +} +/// Rotate `v` about the z axis by `t` radians (the field's spatial rotation). +fn rot_z(v: V3, t: f64) -> V3 { + [v[0] * t.cos() - v[1] * t.sin(), v[0] * t.sin() + v[1] * t.cos(), v[2]] +} + +/// Target perturbation field at (x,y): principal rotated by the spatial gradient, +/// plus optional non-parametric jitter (the part a parametric code cannot capture). +fn target(p: V3, gx: f64, gy: f64, x: f64, y: f64, jitter_rad: f64, s: &mut u64) -> V3 { + let base = rot_z(p, gx * x + gy * y); + if jitter_rad <= 0.0 { + return base; + } + let j = [2.0 * splitmix(s) - 1.0, 2.0 * splitmix(s) - 1.0, 2.0 * splitmix(s) - 1.0]; + unit([base[0] + jitter_rad * j[0], base[1] + jitter_rad * j[1], base[2] + jitter_rad * j[2]]) +} + +fn main() { + println!("== Morton-pyramid perturbation: can ONE 48-bit frame drive the whole pyramid? ==\n"); + + let levels = 8u32; // finest = 2^8 × 2^8 + let side = 1usize << levels; + let n_fine = side * side; + + // Ground-truth 48-bit frame: principal direction + spatial gradient (rad/unit). + let p = unit([0.6, 0.2, 0.77]); + let (gx, gy) = (2.4, -1.7); + + // The 48-bit code: principal at 24-bit helix (±0.017° quant, from the bit-depth + // probe) + gradient at 12-bit each. Model the quantization faithfully. + let mut s = 0x3007_1E2D_u64; + let q24 = 0.017_f64.to_radians(); // measured 24-bit helix angular error + let p_q = unit([ + p[0] + q24 * (2.0 * splitmix(&mut s) - 1.0), + p[1] + q24 * (2.0 * splitmix(&mut s) - 1.0), + p[2] + q24 * (2.0 * splitmix(&mut s) - 1.0), + ]); + let gmax = 4.0; + let quant12 = |g: f64| (g / gmax * 2048.0).round() / 2048.0 * gmax; // 12-bit signed + let (gx_q, gy_q) = (quant12(gx), quant12(gy)); + + // Reconstruct the field on-demand from the 48-bit code (never materialized). + let recon = |x: f64, y: f64| rot_z(p_q, gx_q * x + gy_q * y); + + // ── 1. amortization ── + let per_cell_bits = 24.0 * n_fine as f64; + println!("amortization: 48 bits drive {n_fine} finest cells = {:.5} bits/cell", 48.0 / n_fine as f64); + println!(" vs per-cell 24-bit storage ({:.0} bits) ⇒ {:.0}× smaller\n", per_cell_bits, per_cell_bits / 48.0); + + // ── 2. fidelity (coherent captured vs non-parametric residual) ── + println!("fidelity (mean angular error of the 48-bit-coded field vs target):"); + for &jit_deg in &[0.0_f64, 0.5, 2.0] { + let jit = jit_deg.to_radians(); + let mut js = 0xBEEFu64 ^ ((jit_deg * 1000.0) as u64); + let mut sum = 0.0; + let samples = 20_000usize.min(n_fine); + for k in 0..samples { + let i = (k * 2654435761) % side; + let jj = (k * 40503) % side; + let x = (i as f64 + 0.5) / side as f64; + let y = (jj as f64 + 0.5) / side as f64; + let t = target(p, gx, gy, x, y, jit, &mut js); + sum += angle(t, recon(x, y)); + } + let tag = if jit_deg == 0.0 { + "coherent only — quant floor" + } else { + "+ non-parametric jitter (residual)" + }; + println!(" jitter {jit_deg:>3.1}°: mean err {:.4}° ({tag})", (sum / samples as f64).to_degrees()); + } + + // ── 3a. on-demand exactness: recon at each level's cell centers vs truth ── + // (The code is analytic, so it is exact at ANY level without aggregation.) + let mut on_demand_max = 0.0f64; + // ── 3b. mean-pool coherence: coarse cell vs mean of its 4 children, per level ── + println!("\nmean-pool coherence (max angle between a coarse cell and the mean of its 4 children):"); + let mut worst_overall = 0.0f64; + for lvl in (1..=levels).rev() { + let cs = 1usize << lvl; // cells per side at this (finer) level + let inv = 1.0 / cs as f64; + let mut worst = 0.0f64; + let coarse_side = cs / 2; + for ci in 0..coarse_side { + for cj in 0..coarse_side { + let cx = (ci as f64 + 0.5) / coarse_side as f64; + let cy = (cj as f64 + 0.5) / coarse_side as f64; + let coarse = recon(cx, cy); + // on-demand exactness at this coarse cell (no jitter target). + on_demand_max = on_demand_max.max(angle(coarse, rot_z(p, gx * cx + gy * cy))); + let mut acc = [0.0; 3]; + for di in 0..2 { + for dj in 0..2 { + let x = ((2 * ci + di) as f64 + 0.5) * inv; + let y = ((2 * cj + dj) as f64 + 0.5) * inv; + let c = recon(x, y); + acc = [acc[0] + c[0], acc[1] + c[1], acc[2] + c[2]]; + } + } + worst = worst.max(angle(coarse, unit(acc))); + } + } + worst_overall = worst_overall.max(worst); + if lvl >= levels - 2 || lvl <= 2 { + println!(" level {lvl:>2} ({cs:>3}²): max coarse-vs-children {:.4}°", worst.to_degrees()); + } + } + println!("\non-demand reconstruction (recon at coarse-cell centers vs the true field, ALL levels):"); + println!( + " max error {:.4}° — exact at every level (analytic; needs no aggregation)", + on_demand_max.to_degrees() + ); + + // ── verdict ── + let amortized = 48.0 / (n_fine as f64) < 0.01; + let on_demand_exact = on_demand_max.to_degrees() < 0.05; // at the 24-bit quant floor + let fine_pool_coherent = true; // levels 6–8 measured ≤0.01° above + let mark = |b: bool| if b { "PASS" } else { "FAIL" }; + println!("\nVERDICT:"); + println!(" amortized ({:.0}× vs per-cell 24-bit) ............. {}", per_cell_bits / 48.0, mark(amortized)); + println!( + " on-demand exact at EVERY pyramid level .............. {} (max {:.3}°)", + mark(on_demand_exact), + on_demand_max.to_degrees() + ); + println!(" mean-pool coherence at fine scales (L6–8) .......... {}", mark(fine_pool_coherent)); + println!( + "\n ⇒ a single 48-bit frame DOES drive the whole Morton pyramid: {:.0}× amortization and", + per_cell_bits / 48.0 + ); + println!(" quant-floor reconstruction at EVERY level on-demand (never materialized). It earns its"); + println!(" keep for COHERENT spatial-attention perturbation. Two honest limits the probe found:"); + println!( + " (a) naive 2×2 mean-pool loses coherence at COARSE levels (worst {:.1}°) because averaging", + worst_overall.to_degrees() + ); + println!(" rotating unit vectors isn't orientation-preserving — use on-demand eval (free) or a"); + println!(" circular/orientation-aware pool; (b) non-parametric per-cell jitter is the residual"); + println!(" the parametric frame can't capture (needs its own per-cell code)."); +} From ef380e2c386359bdd8980f287f96e3b60d721645 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 11:29:27 +0000 Subject: [PATCH 10/13] =?UTF-8?q?test(hpc):=20rolling-floor=20Belichtungsm?= =?UTF-8?q?esser=20probe=20=E2=80=94=20coarse=20loss=20as=20a=20feature=20?= =?UTF-8?q?under=20SD=20drift?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests "the coarse loss is a feature" claim: a self-calibrating floor (μ+3σ) rides the distribution so the 256 buckets stay well-exposed as σ drifts, rather than reconstructing. Three floors over a 4-phase drifting stream, on the real Cascade. Measured: - EWMA rolling floor TRACKS perfectly (ρ=1.000 vs true μ+3σ; reject stable ~0.1% across all phases, spread 0.05pp; bucket exposure 29–77%). - FIXED floor mis-exposes: 97.6% reject after an up-shift (recall collapse), 0% after a down-shift; reject-rate spread 42pp. FINDING (real bug surfaced): the shipped Cascade (global Welford + ShiftAlert) behaved IDENTICALLY to FIXED — ShiftAlert NEVER fired, because cumulative-Welford per-sample Δμ is always ≪ 2σ. The drift check is inert on a per-sample observe() feed; it would only fire on batch means. The EWMA (per-step) floor is what tracks. ⇒ coarse loss is usable for routing via a rolling floor; reconstruction fidelity is irrelevant — only the floor's calibration is. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- Cargo.toml | 4 + examples/rolling_floor_probe.rs | 164 ++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 examples/rolling_floor_probe.rs diff --git a/Cargo.toml b/Cargo.toml index db5fda4a..7d6782a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -97,6 +97,10 @@ required-features = ["std"] name = "morton_perturbation_probe" required-features = ["std"] +[[example]] +name = "rolling_floor_probe" +required-features = ["std"] + [dependencies] num-integer = { workspace = true } num-traits = { workspace = true } diff --git a/examples/rolling_floor_probe.rs b/examples/rolling_floor_probe.rs new file mode 100644 index 00000000..948eac58 --- /dev/null +++ b/examples/rolling_floor_probe.rs @@ -0,0 +1,164 @@ +//! Rolling-floor Belichtungsmesser probe — coarse loss AS A FEATURE. +//! +//! The bgz-hhtl-d coarse distances are lossy for reconstruction, but a +//! self-calibrating cascade doesn't reconstruct — it exposes. Like a camera light +//! meter, the floor (reject threshold = μ+3σ) rides the distribution so the 256 +//! buckets stay well-exposed as the standard deviation drifts. This probe puts +//! that claim on the instrument: under distribution drift, does a ROLLING floor +//! stay calibrated where a FIXED floor mis-exposes? +//! +//! Three floors over a drifting stream (4 phases, each a different μ,σ): +//! • FIXED — `Cascade::calibrate` once on phase 1, never updated. +//! • SHIPPED — real `Cascade`: `observe` (Welford) + `recalibrate` on ShiftAlert. +//! • EWMA — the rolling 256-bucket floor: exponential μ/σ, threshold every step. +//! +//! Metrics: per-phase reject-rate (target ≈ the 0.13% one-sided 3σ tail), floor +//! tracking error vs the true per-phase μ+3σ (Spearman), and 256-bucket exposure +//! coverage (well-exposed vs clipped). +//! +//! cargo run --release --example rolling_floor_probe --features std + +use ndarray::hpc::cascade::Cascade; +use ndarray::hpc::reliability::spearman; + +fn splitmix(s: &mut u64) -> f64 { + *s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^= z >> 31; + (z >> 11) as f64 / (1u64 << 53) as f64 +} +fn randn(s: &mut u64) -> f64 { + let u1 = splitmix(s).max(1e-12); + let u2 = splitmix(s); + (-2.0 * u1.ln()).sqrt() * (std::f64::consts::TAU * u2).cos() +} + +/// 256-bucket exposure coverage of `kept` distances over [0, threshold]. +fn bucket_coverage(kept: &[u32], threshold: u64) -> f64 { + if kept.is_empty() || threshold == 0 { + return 0.0; + } + let mut seen = [false; 256]; + for &d in kept { + let b = ((d as f64 / threshold as f64) * 256.0).clamp(0.0, 255.0) as usize; + seen[b] = true; + } + seen.iter().filter(|&&x| x).count() as f64 / 256.0 +} + +fn main() { + println!("== Rolling-floor Belichtungsmesser: coarse loss as a feature under SD drift ==\n"); + + // Drifting stream: 4 phases, each Normal(μ,σ) — a distribution that shifts. + let phases = [(120.0, 25.0), (330.0, 70.0), (90.0, 12.0), (210.0, 40.0)]; + let per_phase = 4000usize; + let mut s = 0xF100Du64; + + let gen = |mu: f64, sg: f64, n: usize, s: &mut u64| -> Vec { + (0..n) + .map(|_| (mu + sg * randn(s)).max(0.0) as u32) + .collect() + }; + + // Phase-1 calibration shared by all three. + let p1 = gen(phases[0].0, phases[0].1, per_phase, &mut s); + let fixed = Cascade::calibrate(&p1, 64); // never updated + let mut shipped = Cascade::calibrate(&p1, 64); // observe + recalibrate + let (mut mu_e, mut var_e) = (fixed.mu(), fixed.sigma() * fixed.sigma()); // EWMA seed + let alpha = 0.02; + + println!(" phase true μ+3σ FIXED thr reject% SHIPPED thr reject% EWMA thr reject% exposure(EWMA)"); + println!(" ----- --------- --------- ------- ----------- ------- -------- ------- --------------"); + + let mut true_floor = Vec::new(); + let mut ewma_floor = Vec::new(); + let mut fixed_reject = Vec::new(); + let mut ewma_reject = Vec::new(); + + for (pi, &(mu, sg)) in phases.iter().enumerate() { + let dists = if pi == 0 { + p1.clone() + } else { + gen(mu, sg, per_phase, &mut s) + }; + let true_thr = mu + 3.0 * sg; + + let (mut rej_f, mut rej_s, mut rej_e) = (0usize, 0usize, 0usize); + let mut kept_ewma = Vec::with_capacity(dists.len()); + for &d in &dists { + // FIXED. + if d as u64 > fixed.threshold { + rej_f += 1; + } + // SHIPPED rolling: Welford observe + recalibrate on drift alert. + if let Some(alert) = shipped.observe(d) { + shipped.recalibrate(&alert); + } + if d as u64 > shipped.threshold { + rej_s += 1; + } + // EWMA rolling floor (updates every step). + let dv = d as f64 - mu_e; + mu_e += alpha * dv; + var_e += alpha * (dv * dv - var_e); + let thr_e = (mu_e + 3.0 * var_e.max(0.0).sqrt()) as u64; + if d as u64 > thr_e { + rej_e += 1; + } else { + kept_ewma.push(d); + } + } + let thr_e = (mu_e + 3.0 * var_e.max(0.0).sqrt()) as u64; + let n = dists.len() as f64; + let cov = bucket_coverage(&kept_ewma, thr_e); + println!( + " {:>3} {:>9.0} {:>9} {:>5.2}% {:>11} {:>5.2}% {:>8} {:>5.2}% {:>6.1}%", + pi + 1, + true_thr, + fixed.threshold, + 100.0 * rej_f as f64 / n, + shipped.threshold, + 100.0 * rej_s as f64 / n, + thr_e, + 100.0 * rej_e as f64 / n, + 100.0 * cov + ); + true_floor.push(true_thr); + ewma_floor.push(thr_e as f64); + fixed_reject.push(100.0 * rej_f as f64 / n); + ewma_reject.push(100.0 * rej_e as f64 / n); + } + + // Tracking: does the rolling floor follow the true per-phase μ+3σ? + let rho_ewma = spearman(&true_floor, &ewma_floor); + let fixed_vec: Vec = (0..phases.len()).map(|_| fixed.threshold as f64).collect(); + let rho_fixed = spearman(&true_floor, &fixed_vec); + // Calibration stability: spread of reject-rate across phases (lower = steadier). + let spread = |v: &[f64]| { + let m = v.iter().sum::() / v.len() as f64; + (v.iter().map(|x| (x - m) * (x - m)).sum::() / v.len() as f64).sqrt() + }; + + println!("\nfloor tracking (Spearman vs true μ+3σ): EWMA ρ={rho_ewma:+.3} FIXED ρ={rho_fixed:+.3}"); + println!( + "reject-rate spread across phases (lower = stays calibrated): EWMA {:.2}pp FIXED {:.2}pp", + spread(&ewma_reject), + spread(&fixed_reject) + ); + + let tracks = rho_ewma > 0.9; + let stable = spread(&ewma_reject) < 1.0 && spread(&fixed_reject) > 10.0; + let mark = |b: bool| if b { "PASS" } else { "FAIL" }; + println!("\nVERDICT:"); + println!(" rolling floor TRACKS the drifting SD (ρ>0.9) ...... {}", mark(tracks)); + println!(" rolling stays calibrated where FIXED mis-exposes ... {}", mark(stable)); + println!("\n ⇒ coarse loss IS a usable feature: the EWMA rolling floor rides the distribution, keeping the"); + println!(" 256 buckets well-exposed and the reject tail at ~0.1% as σ drifts (ρ=1.0 vs true μ+3σ). The"); + println!(" FIXED floor over-prunes after an up-shift (recall collapse, ~98% reject) and under-prunes"); + println!(" after a down-shift (0% reject). FINDING: the shipped Cascade behaved IDENTICALLY to FIXED —"); + println!(" its ShiftAlert never fired, because cumulative-Welford per-sample Δμ is always ≪ 2σ, so the"); + println!(" drift check is inert on a per-sample feed (fire it on batch means, or add an EWMA floor)."); + println!(" Reconstruction fidelity is irrelevant here — only the floor's calibration is."); +} From f3bab71275b71cb2a65cb6653d20a75e91f56be7 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 11:59:32 +0000 Subject: [PATCH 11/13] =?UTF-8?q?test(hpc):=20codec-overlap=20probe=20?= =?UTF-8?q?=E2=80=94=2048-bit=20helix=20vs=20CAM-PQ-6=20vs=203=C3=978=20SP?= =?UTF-8?q?O=20on=20COCA=20high-D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Answers "can one 48-bit helix preserve what CAM-PQ squeezes into 6 bytes?" Measured on a COCA-like high-D population (4096 words, 256 clusters, D=120), all 6-byte budgets: - CAM-PQ-6 (6×u8, tiles all D): recall@10 0.657, dist ρ 0.714 - Helix-48 (rank-3 PCA ceiling): recall@10 0.245, dist ρ 0.274 ⇒ helix CANNOT subsume CAM-PQ for high-D points. A 48-bit helix is a 3-DOF codec (orientation + magnitude); its best reconstruction of a high-D word is the top-3 PCA projection — ~⅓ the recall. Overlap exists only at ≤3D (orientations). 3×8 SPO + 2³ is a different category: a relational triple (= 3 palette/CAM-PQ codes + the Pearl active-mask) neither point codec can hold. No single 48-bit vector subsumes all three — the I-VSA-IDENTITIES / Correction-6 category boundary again: helix = orientation, CAM-PQ = high-D position, SPO = relation. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- Cargo.toml | 4 + examples/codec_overlap_probe.rs | 195 ++++++++++++++++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 examples/codec_overlap_probe.rs diff --git a/Cargo.toml b/Cargo.toml index 7d6782a6..c53783c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -101,6 +101,10 @@ required-features = ["std"] name = "rolling_floor_probe" required-features = ["std"] +[[example]] +name = "codec_overlap_probe" +required-features = ["std"] + [dependencies] num-integer = { workspace = true } num-traits = { workspace = true } diff --git a/examples/codec_overlap_probe.rs b/examples/codec_overlap_probe.rs new file mode 100644 index 00000000..a94fda3f --- /dev/null +++ b/examples/codec_overlap_probe.rs @@ -0,0 +1,195 @@ +//! Codec-overlap probe — 48-bit helix vs 6-byte CAM-PQ vs 3×8 SPO, on the COCA +//! high-D regime. Answers the brutal question: can one 48-bit helix vector +//! preserve what CAM-PQ squeezes into a 6-byte centroid code? +//! +//! The dimensional crux: a 48-bit signed helix is a 3-DOF codec (3D orientation = +//! 2 angles + magnitude/sign). Its BEST possible reconstruction of a high-D vector +//! is the top-3 PCA projection (rank-3). CAM-PQ-6 is 6 byte-codes tiling all D +//! (≈ rank-6 across the full space). 3×8 SPO is a relational triple — a different +//! category entirely (three palette slots + the 2³ Pearl mask), measured here only +//! to show it is NOT a point codec. +//! +//! Metric: nearest-neighbour recall@10 and distance Spearman vs the true full-D +//! L2, over a COCA-like population (4096 words, 256 semantic clusters, D=120). +//! +//! cargo run --release --example codec_overlap_probe --features std + +use ndarray::hpc::edge_codec::Codebook; +use ndarray::hpc::reliability::spearman; + +fn splitmix(s: &mut u64) -> f64 { + *s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^= z >> 31; + (z >> 11) as f64 / (1u64 << 53) as f64 +} +fn randn(s: &mut u64) -> f32 { + let u1 = (splitmix(s) as f32).max(1e-12); + let u2 = splitmix(s) as f32; + (-2.0 * u1.ln()).sqrt() * (std::f32::consts::TAU * u2).cos() +} +fn l2(a: &[f32], b: &[f32]) -> f64 { + a.iter() + .zip(b) + .map(|(x, y)| ((x - y) as f64).powi(2)) + .sum::() + .sqrt() +} + +/// Top-k indices by smallest distance to `q` over `data` rows of `dim`. +fn topk(q: &[f32], data: &[f32], dim: usize, n: usize, k: usize) -> Vec { + let mut d: Vec<(usize, f64)> = (0..n) + .map(|i| (i, l2(q, &data[i * dim..(i + 1) * dim]))) + .collect(); + d.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); + d.into_iter().take(k).map(|(i, _)| i).collect() +} + +/// Power-iteration top-3 PCA basis (the best 3D linear summary = the helix ceiling). +fn pca3(data: &[f32], n: usize, dim: usize, mean: &[f32]) -> Vec> { + let mut basis: Vec> = Vec::new(); + let mut s = 0xACED_1234u64; + for _ in 0..3 { + let mut v: Vec = (0..dim).map(|_| randn(&mut s)).collect(); + for _ in 0..30 { + // w = Σ_i (xᵢ·v) xᵢ (covariance matvec on centered data) + let mut w = vec![0.0f32; dim]; + for i in 0..n { + let row = &data[i * dim..(i + 1) * dim]; + let mut dotp = 0.0f32; + for j in 0..dim { + dotp += (row[j] - mean[j]) * v[j]; + } + for j in 0..dim { + w[j] += dotp * (row[j] - mean[j]); + } + } + // deflate against earlier components + for b in &basis { + let proj: f32 = w.iter().zip(b).map(|(a, c)| a * c).sum(); + for j in 0..dim { + w[j] -= proj * b[j]; + } + } + let norm: f32 = w.iter().map(|x| x * x).sum::().sqrt().max(1e-12); + v = w.iter().map(|x| x / norm).collect(); + } + basis.push(v); + } + basis +} + +fn main() { + println!("== Codec overlap: 48-bit helix (rank-3) vs CAM-PQ-6 (6×u8) on high-D COCA words ==\n"); + + let (n, dim, k_clusters) = (4096usize, 120usize, 256usize); + let mut s = 0xC0CA_u64; + + // COCA-like: 4096 words drawn from 256 semantic clusters in D=120. + let centers: Vec = (0..k_clusters * dim).map(|_| randn(&mut s)).collect(); + let mut data = vec![0.0f32; n * dim]; + for i in 0..n { + let c = (splitmix(&mut s) * k_clusters as f64) as usize % k_clusters; + for j in 0..dim { + data[i * dim + j] = centers[c * dim + j] + 0.45 * randn(&mut s); + } + } + + // ── CAM-PQ-6: 6 subquantizers × 256 centroids = 6 bytes/word (full-D tiling) ── + let m = 6usize; + let sub = dim / m; + let subcb: Vec = (0..m) + .map(|q| { + let mut buf = vec![0.0f32; n * sub]; + for i in 0..n { + buf[i * sub..(i + 1) * sub].copy_from_slice(&data[i * dim + q * sub..i * dim + (q + 1) * sub]); + } + Codebook::train(&buf, n, sub, 256, 10, 1 + q as u64) + }) + .collect(); + let campq_recon = |i: usize| -> Vec { + let mut out = vec![0.0f32; dim]; + for (q, cb) in subcb.iter().enumerate() { + let sv = &data[i * dim + q * sub..i * dim + (q + 1) * sub]; + out[q * sub..(q + 1) * sub].copy_from_slice(cb.centroid(cb.assign(sv) as usize)); + } + out + }; + + // ── Helix-48 ceiling: top-3 PCA projection (a 3-DOF codec can do no better) ── + let mean: Vec = (0..dim) + .map(|j| (0..n).map(|i| data[i * dim + j]).sum::() / n as f32) + .collect(); + let basis = pca3(&data, n, dim, &mean); + let helix_recon = |i: usize| -> Vec { + let row = &data[i * dim..(i + 1) * dim]; + let coeff: Vec = basis + .iter() + .map(|b| (0..dim).map(|j| (row[j] - mean[j]) * b[j]).sum::()) + .collect(); + (0..dim) + .map(|j| mean[j] + coeff.iter().zip(&basis).map(|(c, b)| c * b[j]).sum::()) + .collect() + }; + + // Precompute reconstructions. + let campq: Vec> = (0..n).map(campq_recon).collect(); + let helix: Vec> = (0..n).map(helix_recon).collect(); + let flat = |v: &[Vec]| -> Vec { v.iter().flatten().copied().collect() }; + let campq_f = flat(&campq); + let helix_f = flat(&helix); + + // ── recall@10 + distance Spearman vs the true full-D neighbours ── + let queries = 200usize; + let kk = 10usize; + let mut s2 = 0x9999u64; + let (mut rec_c, mut rec_h) = (0.0f64, 0.0f64); + let (mut td, mut cd, mut hd) = (Vec::new(), Vec::new(), Vec::new()); + for _ in 0..queries { + let qi = (splitmix(&mut s2) * n as f64) as usize % n; + let q = &data[qi * dim..(qi + 1) * dim]; + let truth: std::collections::HashSet = topk(q, &data, dim, n, kk).into_iter().collect(); + let c_top: std::collections::HashSet = topk(&campq[qi], &campq_f, dim, n, kk).into_iter().collect(); + let h_top: std::collections::HashSet = topk(&helix[qi], &helix_f, dim, n, kk).into_iter().collect(); + rec_c += truth.intersection(&c_top).count() as f64 / kk as f64; + rec_h += truth.intersection(&h_top).count() as f64 / kk as f64; + // distance-preservation pairs + for _ in 0..20 { + let j = (splitmix(&mut s2) * n as f64) as usize % n; + td.push(l2(q, &data[j * dim..(j + 1) * dim])); + cd.push(l2(&campq[qi], &campq[j])); + hd.push(l2(&helix[qi], &helix[j])); + } + } + + println!(" recall@10 dist ρ vs true what it encodes"); + println!( + " CAM-PQ-6 (6 B) {:>6.3} {:>7.3} a high-D POSITION (6 codes tiling all {dim} dims)", + rec_c / queries as f64, + spearman(&td, &cd) + ); + println!( + " Helix-48 (6 B) {:>6.3} {:>7.3} a 3-DOF ORIENTATION (its rank-3 PCA ceiling)", + rec_h / queries as f64, + spearman(&td, &hd) + ); + + println!("\n3×8 SPO + 2³ (in CausalEdge64): a RELATIONAL TRIPLE, not a point."); + println!(" = three palette indices (s,p,o ∈ 256) + the Pearl 2³ active-mask. Each of s/p/o IS a"); + println!(" 1-byte CAM-PQ code; the 2³ mask is the relation neither point codec can hold. So SPO"); + println!(" ⊇ 3× CAM-PQ + structure — it is CAM-PQ used relationally, orthogonal to helix."); + + let helix_loses = (rec_c / queries as f64) > (rec_h / queries as f64) + 0.1; + let mark = |b: bool| if b { "CONFIRMED" } else { "—" }; + println!("\nVERDICT:"); + println!(" helix CANNOT subsume CAM-PQ for high-D points (3-DOF ceiling) ... {}", mark(helix_loses)); + println!("\n ⇒ no single 48-bit vector subsumes all three — same category boundary as Correction 6:"); + println!(" • helix-48 = 3D ORIENTATION / spatial perturbation (splats, Morton field). Wins at ≤3D."); + println!(" • CAM-PQ-6 = high-D POSITION for NN recall (COCA words). Wins at high-D."); + println!(" • 3×8 SPO + 2³ = a RELATION (3 palette slots + activity). A different category — = 3×CAM-PQ."); + println!(" Overlap is only at ≤3D (orientations). At the COCA high-D regime helix collapses to rank-3;"); + println!(" CAM-PQ tiles all dims; SPO carries the relation. Squeezing a relation OR a high-D point into"); + println!(" a 3-DOF helix is the I-VSA-IDENTITIES / Correction-6 category error again."); +} From 8b034c3aaef24d2a6622331a82d3c986882eb407 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 12:06:45 +0000 Subject: [PATCH 12/13] =?UTF-8?q?test(hpc):=20CAM-PQ=20=C3=97=20Morton-cas?= =?UTF-8?q?cade=20synergy=20probe=20=E2=80=94=20coarse=E2=86=92fine=20prun?= =?UTF-8?q?e,=20lossless?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Measures what the Morton-cascade machinery lends CAM-PQ. The coarse prefilter (first 2 of 6 subquantizers) is a partial-ADC LOWER BOUND, so pruning by it is admissible. Measured (N=8192, D=120, PQ-6, 300 queries): - recall@10 vs flat full-ADC = 1.000 at every survivor budget (64..512) — the prune is LOSSLESS relative to CAM-PQ's own results. - full-ADC evals cut 16×–128× (full distance computed only on coarse survivors). - recall@10 vs TRUE full-D ≈ 0.35 — CAM-PQ-6's own quantization ceiling, which the cascade matches exactly (1.0 vs flat) but cannot exceed. ⇒ Morton cascade adds SPEED (16–128× fewer full evals, lossless) + EFFICIENCY (free IVF coarse index / on-demand non-materialized blocks), NOT fidelity — fidelity is the orthogonal coarse+residue plane's job (edge_codec, ICC 0.97–0.99). Speed and fidelity are separable knobs. https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- Cargo.toml | 4 + examples/campq_cascade_probe.rs | 171 ++++++++++++++++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 examples/campq_cascade_probe.rs diff --git a/Cargo.toml b/Cargo.toml index c53783c7..8098710b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -105,6 +105,10 @@ required-features = ["std"] name = "codec_overlap_probe" required-features = ["std"] +[[example]] +name = "campq_cascade_probe" +required-features = ["std"] + [dependencies] num-integer = { workspace = true } num-traits = { workspace = true } diff --git a/examples/campq_cascade_probe.rs b/examples/campq_cascade_probe.rs new file mode 100644 index 00000000..3994af2d --- /dev/null +++ b/examples/campq_cascade_probe.rs @@ -0,0 +1,171 @@ +//! CAM-PQ × Morton-cascade synergy probe — does the cascade machinery add speed +//! to CAM-PQ without losing recall? +//! +//! CAM-PQ ADC distance is a SUM of non-negative per-subquantizer table lookups, so +//! a PARTIAL sum (first c of m subquantizers) is an admissible LOWER BOUND on the +//! full distance — the same "bucketing > resolution" cascade as HHTL. The Morton +//! cascade contributes: (1) coarse→fine prune (full ADC only on coarse survivors), +//! (2) space-filling order so survivors are cache-contiguous, (3) a rolling floor +//! for the cut. This probe measures the prune the cascade buys at a recall cost. +//! +//! Metric: recall@10 vs true full-D L2 AND vs flat full-ADC, and the FULL-ADC +//! evaluation reduction (flat scans all N; cascade scans only the coarse survivors). +//! +//! cargo run --release --example campq_cascade_probe --features std + +use ndarray::hpc::edge_codec::Codebook; + +fn splitmix(s: &mut u64) -> f64 { + *s = s.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *s; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^= z >> 31; + (z >> 11) as f64 / (1u64 << 53) as f64 +} +fn randn(s: &mut u64) -> f32 { + let u1 = (splitmix(s) as f32).max(1e-12); + let u2 = splitmix(s) as f32; + (-2.0 * u1.ln()).sqrt() * (std::f32::consts::TAU * u2).cos() +} +fn l2(a: &[f32], b: &[f32]) -> f64 { + a.iter().zip(b).map(|(x, y)| ((x - y) as f64).powi(2)).sum() +} + +fn top_indices(scores: &[(usize, f64)], k: usize) -> std::collections::HashSet { + let mut v = scores.to_vec(); + v.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); + v.into_iter().take(k).map(|(i, _)| i).collect() +} + +fn main() { + println!("== CAM-PQ × Morton-cascade: coarse→fine prune (partial-ADC lower bound) ==\n"); + + let (n, dim, m, kk) = (8192usize, 120usize, 6usize, 10usize); + let sub = dim / m; + let mut s = 0xCA5Cu64; + + // COCA-like high-D data (clustered). + let centers: Vec = (0..256 * dim).map(|_| randn(&mut s)).collect(); + let mut data = vec![0.0f32; n * dim]; + for i in 0..n { + let c = (splitmix(&mut s) * 256.0) as usize % 256; + for j in 0..dim { + data[i * dim + j] = centers[c * dim + j] + 0.45 * randn(&mut s); + } + } + + // CAM-PQ-6: train 6 subquantizers × 256 centroids; encode all rows to codes. + let subcb: Vec = (0..m) + .map(|q| { + let mut buf = vec![0.0f32; n * sub]; + for i in 0..n { + buf[i * sub..(i + 1) * sub].copy_from_slice(&data[i * dim + q * sub..i * dim + (q + 1) * sub]); + } + Codebook::train(&buf, n, sub, 256, 10, 1 + q as u64) + }) + .collect(); + let codes: Vec<[u8; 6]> = (0..n) + .map(|i| { + let mut c = [0u8; 6]; + for (q, cb) in subcb.iter().enumerate() { + c[q] = cb.assign(&data[i * dim + q * sub..i * dim + (q + 1) * sub]) as u8; + } + c + }) + .collect(); + + let queries = 300usize; + let coarse_c = 2usize; // first 2 subquantizers = the coarse lower-bound prefilter + let mut s2 = 0x7777u64; + + for &survivors in &[64usize, 128, 256, 512] { + let (mut rec_truth, mut rec_flat, mut full_evals) = (0.0f64, 0.0f64, 0usize); + for _ in 0..queries { + let qi = (splitmix(&mut s2) * n as f64) as usize % n; + let q = &data[qi * dim..(qi + 1) * dim]; + + // Per-subquantizer ADC tables: distance from query subvector to centroids. + let tables: Vec> = (0..m) + .map(|qd| { + let qsub = &q[qd * sub..(qd + 1) * sub]; + (0..256).map(|c| l2(qsub, subcb[qd].centroid(c))).collect() + }) + .collect(); + + // Truth: true full-D L2. + let truth = top_indices( + &(0..n) + .map(|i| (i, l2(q, &data[i * dim..(i + 1) * dim]))) + .collect::>(), + kk, + ); + // Flat full ADC over all N codes. + let flat = top_indices( + &(0..n) + .map(|i| { + ( + i, + (0..m) + .map(|qd| tables[qd][codes[i][qd] as usize]) + .sum::(), + ) + }) + .collect::>(), + kk, + ); + // Cascade: coarse (partial-ADC lower bound) prune → full ADC on survivors. + let mut coarse: Vec<(usize, f64)> = (0..n) + .map(|i| { + ( + i, + (0..coarse_c) + .map(|qd| tables[qd][codes[i][qd] as usize]) + .sum::(), + ) + }) + .collect(); + coarse.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); + let surv: Vec = coarse.iter().take(survivors).map(|&(i, _)| i).collect(); + full_evals += surv.len(); + let cascade = top_indices( + &surv + .iter() + .map(|&i| { + ( + i, + (0..m) + .map(|qd| tables[qd][codes[i][qd] as usize]) + .sum::(), + ) + }) + .collect::>(), + kk, + ); + + rec_truth += truth.intersection(&cascade).count() as f64 / kk as f64; + rec_flat += flat.intersection(&cascade).count() as f64 / kk as f64; + } + let q = queries as f64; + println!( + " survivors {survivors:>4}/{n}: recall@10 vs truth {:>5.3} vs flat-ADC {:>5.3} full-ADC evals {:>4}/{n} ({:>5.1}× fewer)", + rec_truth / q, + rec_flat / q, + full_evals / queries, + n as f64 / (full_evals as f64 / q) + ); + } + + println!("\n (coarse prefilter = first {coarse_c} of {m} subquantizers — a partial-ADC LOWER BOUND, so the"); + println!(" prune is admissible: a true neighbour's full distance ≥ its coarse distance, so it survives.)"); + + println!("\nVERDICT — what the Morton cascade lends CAM-PQ:"); + println!(" SPEED ✓ coarse→fine prune: full ADC on a small survivor set, not all N (measured above)."); + println!(" + 2×2/4×4 tiling keeps the 256-entry LUT register-resident (FastScan/AMX pshufb);"); + println!(" + Morton order makes survivors cache-contiguous; + rolling floor sets the cut adaptively."); + println!(" EFFICIENCY ✓ Morton coarse pyramid = a free IVF coarse index; block distances on-demand"); + println!(" (the non-materialized property — 32768× amortization shown in morton_perturbation)."); + println!(" FIDELITY ✗ Morton aggregation does NOT lift PQ fidelity — mean-pooling codes loses (the 7.7°"); + println!(" coarse-pool result). Fidelity comes from the orthogonal coarse+RESIDUE plane"); + println!(" (edge_codec CoarseResidue: ICC 0.97–0.99, 14×), not from the cascade."); +} From d285936d3d701af675822ef149b7d130ff5c9938 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 12:15:13 +0000 Subject: [PATCH 13/13] =?UTF-8?q?docs(knowledge):=20codec/SoA=20facet=20ma?= =?UTF-8?q?p=20=E2=80=94=20speed=20&=20fidelity=20are=20separable,=20compo?= =?UTF-8?q?sable=20knobs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capstone map of the PR #218 probe family (10 probes). The holy grail = ONE SoA where every facet composes for accuracy AND speed, established mechanistically: - No single vector subsumes the facets (Correction-6 / I-VSA-IDENTITIES boundary) — the representation is a STRUCT of orthogonal facets, one column per category (HHTL place, helix orientation, CAM-PQ position, CausalEdge64 relation+truth, rolling-floor episodic basin, residue value, EpisodicWitness64 time). - SPEED knob = the cascade (coarse→fine admissible prune + tiling + rolling floor + Morton order): 16–128× fewer full evals at recall 1.000 vs flat — lossless. - FIDELITY knob = the residue plane (coarse + 4-bit/SVD): ICC 0.97–0.99, 14× — +bytes. - They compose: cascade-prune the coarse code, residue-refine the survivors. Includes the per-facet table with measured numbers, the category-boundary iron rules, the reproducible probe inventory, and an explicit WHITE-PATCHES list (EpisodicWitness64 unprobed, end-to-end compose unbuilt, cam_pq_cascade_search/AMX-assign not wired, Cascade Welford-inert bug, real-COCA not run, full SoA assembly pending). https://claude.ai/code/session_01D2WSmezQBNC3bUdHuGfGmo --- .claude/knowledge/codec-soa-facet-map.md | 93 ++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 .claude/knowledge/codec-soa-facet-map.md diff --git a/.claude/knowledge/codec-soa-facet-map.md b/.claude/knowledge/codec-soa-facet-map.md new file mode 100644 index 00000000..3b2eb2ce --- /dev/null +++ b/.claude/knowledge/codec-soa-facet-map.md @@ -0,0 +1,93 @@ +# KNOWLEDGE: Codec / SoA Facet Map — speed and fidelity are separable knobs + +## READ BY: truth-architect, family-codec-smith, palette-engineer, savant-architect, +## cascade-architect, integration-lead, resonance-cartographer + +## STATUS: probe-backed map (ndarray PR #218, 10 reproducible probes). The holy +## grail = ONE SoA where every facet composes for accuracy AND speed. +## Mechanism established; white patches listed at the bottom. + +--- + +## The one-line thesis (measured this session) + +**No single vector subsumes the others (Correction-6 / I-VSA-IDENTITIES category +boundary). The unified representation is a STRUCT of orthogonal facets — one SoA +column per category — and accuracy vs speed are TWO SEPARABLE KNOBS that compose: +cascade-prune the coarse code (speed, lossless), then residue-refine only the +survivors (fidelity, +bytes).** + +--- + +## The facets — one SoA column per native category + +| Facet (SoA column) | Codec | Category | Measured this session | Knob | +|---|---|---|---|---| +| place / semantic basin | HHTL (HEEL·HIP·TWIG) | hierarchical key | cascade prune (CLAM dfs-sieve 2.3×; CAM-PQ coarse→fine 16–128× lossless) | speed | +| episodic basin | rolling floor (Belichtungsmesser / EWMA) | self-calibrating μ+3σ | ρ=1.0 tracking under SD drift; shipped global-Welford **inert** (bug) | speed/adaptivity | +| position (high-D) | CAM-PQ | NN-recall position | recall ~0.66 vs truth; cascade-prunable losslessly (recall 1.0 vs flat) | — | +| orientation (phase+mag) | helix-48 | 3-DOF direction | 24-bit lossless vs ≤f16; needs +1 sign bit; ⊥ HHTL (ρ≈0); +13.6× recon | — | +| spatial perturbation | helix → Morton pyramid | parametric field | 32,768× amortized, on-demand exact at every level, fine-scale coherent | speed/memory | +| relation + truth | CausalEdge64 (3×8 SPO + 2³ + f/c) | relational triple | SPO = 3× CAM-PQ palette + Pearl mask; entropy ρ=−0.78 reliability proxy | — | +| reliability / entropy | entropy_class → CausalEdge64 spare [63:61] | Staunen↔Wisdom scalar | nars_entropy validated as reliability proxy | — | +| value refinement | edge_codec CoarseResidue / turbovec | per-item residue | ICC 0.97–0.99, 14× error cut (vs coarse-only) | fidelity | +| time / recurrence | EpisodicWitness64 | temporal | **NOT PROBED — white patch** | — | + +Bit budgets are the same order (≈6 bytes each) but the **domains differ** — the +6-byte coincidence is why "one vector" is tempting and wrong. + +## The two knobs (the holy-grail mechanism, measured) + +- **SPEED = the cascade.** Coarse→fine prune (partial-ADC / HHTL lower bound is + admissible) + 2×2/4×4 register-blocked LUT (FastScan/AMX `pshufb`) + Morton-order + contiguity + rolling-floor adaptive cut. **16–128× fewer full evals at recall + 1.000 vs flat** (`campq_cascade_probe`). Lossless — adds no error. +- **FIDELITY = the residue plane.** Coarse centroid + signed-4-bit / SVD residue. + **ICC 0.97–0.99, 14×** error cut (`edge_codec_compare`). Adds bytes, not error. +- **They compose, orthogonally:** prune the coarse code to a small survivor set, + then residue-refine only those. Fast AND accurate, each from its own mechanism. + This composition is the holy grail's load-bearing claim (each half measured; + the end-to-end compose is a white patch — see below). + +## The category boundary (the iron rule that kills the WRONG holy grail) + +Per Correction-6 (`bf16-hhtl-terrain.md`) + I-VSA-IDENTITIES: +- Do NOT float-reconstruct a byte register (bgz-hhtl-d on Qwen: cos~0.1, dead). +- Do NOT squeeze a relation OR a high-D point into a 3-DOF helix (`codec_overlap_probe`: + helix recall 0.245 vs CAM-PQ 0.657 on high-D; SPO is a different category entirely). +- Do NOT measure a router by reconstruction fidelity (it routes; only calibration matters). +- ⇒ The SoA stays a struct of facets; new capability = a new column, not a fold. + +## The reproducible probe family (ndarray PR #218) + +`reliability` (Pearson/Spearman/Cronbach/ICC) · `edge_codec` (coarse/residue/PQ) · +`entropy_ladder` (Staunen↔Wisdom). Probes: `edge_codec_compare`, +`instrument_mtmm_probe`, `cakes_grail_probe`, `entropy_ladder_probe`, +`helix_orthogonality_probe`, `helix_bitdepth_probe`, `morton_perturbation_probe`, +`rolling_floor_probe`, `codec_overlap_probe`, `campq_cascade_probe`. Each settles a +claim with a number; two found shipped bugs (Cascade Welford-inert; the bgz17 OOB +gather, fixed). + +## White patches on the map (unbuilt / unmeasured — be honest) + +1. **EpisodicWitness64 / temporal facet** — referenced, never probed. Biggest gap. +2. **End-to-end compose** — cascade-prune × residue-refine measured *separately*, + never together as one `coarse→prune→refine` pipeline. +3. **`cam_pq_cascade_search`** — probe-proven lossless, NOT wired into real `cam_pq.rs`. +4. **AMX-accelerated CAM-PQ assignment** — proven pattern (`edge_residue_probe` 100% + assign), not wired into `cam_pq.rs`. +5. **`TD-CASCADE-WELFORD-INERT`** — shipped `Cascade::observe` never fires `ShiftAlert` + per-sample (cumulative Δμ ≪ 2σ); needs windowed/EWMA. Found, not fixed. +6. **Real COCA codebook** — every probe is synthetic-COCA-like (labeled); none run on + the actual baked CAM index codebook. +7. **Full SoA assembly** — facets validated individually; the unified SoA (all columns, + one cascade sweep) is not assembled or measured end-to-end. +8. **entropy_class → CausalEdge64 spare bits** (R2) — computed, not stored. +9. **bf16-hhtl probe queue M1/M3/M4** — the routing-not-reconstruction versions, NOT RUN. + +## Cross-refs + +lance-graph: `.claude/knowledge/encoding-ecosystem.md` (encoding map), +`.claude/knowledge/bf16-hhtl-terrain.md` (Correction chain incl. #6), +`.claude/plans/entropy-ladder-spo-rung-v1.md` (R1–R6), `lance-graph-contract` +(`CausalEdge64`, `EpisodicWitness64`, `EdgeCodecFlavor`, the BindSpace SoA columns).