From 6ab81ca89a32256c1e8920ef247612a86fddf16e Mon Sep 17 00:00:00 2001 From: diegokingston Date: Tue, 26 May 2026 18:49:04 -0300 Subject: [PATCH 01/21] feat(crypto/mmcs): standalone Multi-Matrix Commitment Scheme module + tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR1 of the streaming-MMCS plan (see docs/mmcs-streaming-design.md for the full plan). This commit adds the module standalone — NOT wired into the prover yet — so the API and the 8-vector threat model can be reviewed in isolation before any production hot-path change. What's in: - `MmcsBuilder` / `Mmcs` / `MmcsOpening` types over the existing `IsMerkleTreeBackend` abstraction. - Plonky3-style layer-injection build: sort matrices by `padded_height` desc (ties by tag asc), layer 0 = largest matrix's leaves, compress pairs upward, inject smaller matrices at their height-layer via `compress(node, leaf)`. - Open(global_index) -> per-matrix leaves at the shifted index + sibling chain. Verify reconstructs the root bottom-up; rejects malformed or tampered openings. MVP restrictions (called out in the module doc): - Matrices must have *distinct* padded_heights (matches lambda-vm's chip topology where heights almost never collide). Same-height case is Phase 2 — would mix multiple matrices at layer 0 row-by-row. - No SIMD / parallel hashing yet. - Caller materializes full leaf digest arrays per matrix (no streaming chunked absorption yet; that's wrapped on top later). - Single root only — no caps in this iteration. Security: 8 vectors from the threat model, each as a test: v1 cross-matrix row swap (rejected via matrix-leaf order check) v2 padding-byte freedom (rejected at build: NotPowerOfTwo) v3 same-height matrices (rejected: DuplicateHeight — MVP restriction) v4 leaf re-labelling (rejected via tag binding in opening) v5 wrong leaf data (rejected via root mismatch) v6 index tampering (rejected via path divergence) v7 truncated path (rejected: siblings.len() vs depth check) v8 verifier-side spec mismatch (rejected via expected_specs check) Plus duplicate-tag and out-of-bounds checks. 13 tests, 0 failed. `make lint` clean across all three clippy configs. No prover/verifier code touched, no proof format change. Next steps (separate PRs): - PR2: wire into Round 1 main-trace commit, replace per-chip Merkle loop. - PR3: extend to aux trace + composition. --- crypto/crypto/src/merkle_tree/mmcs.rs | 436 ++++++++++++++++++++++++++ crypto/crypto/src/merkle_tree/mod.rs | 1 + 2 files changed, 437 insertions(+) create mode 100644 crypto/crypto/src/merkle_tree/mmcs.rs diff --git a/crypto/crypto/src/merkle_tree/mmcs.rs b/crypto/crypto/src/merkle_tree/mmcs.rs new file mode 100644 index 000000000..28ad4fba9 --- /dev/null +++ b/crypto/crypto/src/merkle_tree/mmcs.rs @@ -0,0 +1,436 @@ +//! Multi-Matrix Commitment Scheme (MMCS): a single Merkle root that +//! commits to multiple matrices of (different) heights, with one +//! authentication path per query covering all matrices. +//! +//! Plonky3-style layer injection: sort matrices by `padded_height` desc; +//! layer 0 = largest matrix's leaves; compress pairs upward; at each +//! layer whose length matches a smaller matrix's `padded_height`, inject +//! that matrix's leaves via `compress(node_i, matrix.leaves[i])`. +//! +//! MVP scope: +//! - All matrices have distinct `padded_height` (matches lambda-vm topology). +//! - No SIMD, no streaming, no caps. Standalone module, not wired to prover. +//! +//! Security: see `docs/mmcs-streaming-design.md` for the 8-vector threat +//! model; each vector is tested below. + +use alloc::vec::Vec; + +use super::traits::IsMerkleTreeBackend; + +/// Per-matrix domain separator. Caller-defined; verifier reconstructs +/// from chip spec. +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct MatrixTag(pub [u8; 8]); + +impl MatrixTag { + pub const fn new(tag: [u8; 8]) -> Self { + Self(tag) + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum MmcsError { + DuplicateTag, + EmptyMatrix, + NotPowerOfTwo, + Empty, + DuplicateHeight, + IndexOutOfBounds, +} + +struct MmcsMatrix { + tag: MatrixTag, + leaf_digests: Vec, +} + +impl MmcsMatrix { + fn padded_height(&self) -> usize { + self.leaf_digests.len() + } +} + +pub struct MmcsBuilder { + matrices: Vec>, +} + +impl Default for MmcsBuilder { + fn default() -> Self { + Self::new() + } +} + +impl MmcsBuilder { + pub fn new() -> Self { + Self { + matrices: Vec::new(), + } + } + + /// Register a matrix. `leaf_digests` MUST be pre-hashed with the + /// matrix tag embedded (e.g. `hash(tag || row_bytes)`). Length must + /// be a power of two. + pub fn add_matrix( + &mut self, + tag: MatrixTag, + leaf_digests: Vec, + ) -> Result<(), MmcsError> { + if self.matrices.iter().any(|m| m.tag == tag) { + return Err(MmcsError::DuplicateTag); + } + if leaf_digests.is_empty() { + return Err(MmcsError::EmptyMatrix); + } + if !leaf_digests.len().is_power_of_two() { + return Err(MmcsError::NotPowerOfTwo); + } + self.matrices.push(MmcsMatrix { tag, leaf_digests }); + Ok(()) + } + + pub fn finalize(mut self) -> Result, MmcsError> { + if self.matrices.is_empty() { + return Err(MmcsError::Empty); + } + // Deterministic order: height desc, tag asc. Verifier reproduces. + self.matrices.sort_by(|a, b| { + b.padded_height() + .cmp(&a.padded_height()) + .then(a.tag.cmp(&b.tag)) + }); + for w in self.matrices.windows(2) { + if w[0].padded_height() == w[1].padded_height() { + return Err(MmcsError::DuplicateHeight); + } + } + + let max_height = self.matrices[0].padded_height(); + let depth = max_height.trailing_zeros() as usize; + let mut layers: Vec> = Vec::with_capacity(depth + 1); + // Layer 0 = largest matrix's leaves. + layers.push(self.matrices[0].leaf_digests.clone()); + + for level in 0..depth { + let cur = &layers[level]; + let new_len = cur.len() / 2; + let mut next: Vec = Vec::with_capacity(new_len); + for i in 0..new_len { + next.push(B::hash_new_parent(&cur[2 * i], &cur[2 * i + 1])); + } + // Inject any non-largest matrix at this layer length. + if let Some(matrix) = self + .matrices + .iter() + .skip(1) + .find(|m| m.padded_height() == new_len) + { + for (node, inject) in next.iter_mut().zip(matrix.leaf_digests.iter()) { + *node = B::hash_new_parent(node, inject); + } + } + layers.push(next); + } + + Ok(Mmcs { + layers, + matrices: self.matrices, + }) + } +} + +pub struct Mmcs { + layers: Vec>, + matrices: Vec>, +} + +impl Mmcs { + pub fn root(&self) -> &B::Node { + let top = self.layers.last().expect("layers always populated"); + &top[0] + } + + pub fn spec(&self) -> Vec<(MatrixTag, usize)> { + self.matrices + .iter() + .map(|m| (m.tag, m.padded_height())) + .collect() + } + + pub fn open(&self, global_index: usize) -> Result, MmcsError> { + let max_height = self.matrices[0].padded_height(); + if global_index >= max_height { + return Err(MmcsError::IndexOutOfBounds); + } + let depth = max_height.trailing_zeros() as usize; + + let mut matrix_leaves: Vec<(MatrixTag, B::Node)> = Vec::with_capacity(self.matrices.len()); + for matrix in &self.matrices { + let shift = (max_height / matrix.padded_height()).trailing_zeros() as usize; + let idx = global_index >> shift; + matrix_leaves.push((matrix.tag, matrix.leaf_digests[idx].clone())); + } + + let mut siblings: Vec = Vec::with_capacity(depth); + let mut idx = global_index; + for layer in &self.layers[..depth] { + let sibling_idx = idx ^ 1; + siblings.push(layer[sibling_idx].clone()); + idx >>= 1; + } + + Ok(MmcsOpening { + matrix_leaves, + siblings, + global_index, + }) + } +} + +#[derive(Debug, Clone)] +pub struct MmcsOpening { + pub matrix_leaves: Vec<(MatrixTag, N)>, + pub siblings: Vec, + pub global_index: usize, +} + +impl MmcsOpening { + pub fn verify(&self, expected_root: &N, expected_specs: &[(MatrixTag, usize)]) -> bool + where + B: IsMerkleTreeBackend, + { + let mut specs = expected_specs.to_vec(); + specs.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0))); + + if self.matrix_leaves.len() != specs.len() { + return false; + } + for ((tag, _), (spec_tag, _)) in self.matrix_leaves.iter().zip(&specs) { + if tag != spec_tag { + return false; + } + } + for w in specs.windows(2) { + if w[0].1 == w[1].1 { + return false; + } + if !w[0].1.is_power_of_two() || !w[1].1.is_power_of_two() { + return false; + } + } + let max_height = specs[0].1; + if !max_height.is_power_of_two() || max_height == 0 { + return false; + } + if self.global_index >= max_height { + return false; + } + let depth = max_height.trailing_zeros() as usize; + if self.siblings.len() != depth { + return false; + } + + let mut current = self.matrix_leaves[0].1.clone(); + let mut idx = self.global_index; + + for level in 0..depth { + let sibling = &self.siblings[level]; + current = if idx & 1 == 0 { + B::hash_new_parent(¤t, sibling) + } else { + B::hash_new_parent(sibling, ¤t) + }; + idx >>= 1; + + let new_len = max_height >> (level + 1); + if let Some((tag, _)) = specs.iter().find(|(_, ph)| *ph == new_len) { + let inject = self + .matrix_leaves + .iter() + .find(|(t, _)| t == tag) + .map(|(_, leaf)| leaf); + let inject = match inject { + Some(l) => l, + None => return false, + }; + current = B::hash_new_parent(¤t, inject); + } + } + + ¤t == expected_root + } +} + +#[cfg(test)] +mod tests { + use super::*; + use sha3::{Digest, Keccak256}; + + struct TestBackend; + type Node = [u8; 32]; + impl IsMerkleTreeBackend for TestBackend { + type Node = Node; + type Data = Vec; + fn hash_data(leaf: &Vec) -> Node { + let mut h = Keccak256::new(); + h.update(leaf); + h.finalize().into() + } + fn hash_new_parent(a: &Node, b: &Node) -> Node { + let mut h = Keccak256::new(); + h.update(a); + h.update(b); + h.finalize().into() + } + } + + fn hash_leaf_with_tag(tag: &MatrixTag, row: &[u8]) -> Node { + let mut h = Keccak256::new(); + h.update(b"LEAF_V1"); + h.update(tag.0); + h.update(row); + h.finalize().into() + } + + fn make_matrix(tag_byte: u8, height: usize) -> (MatrixTag, Vec) { + let tag = MatrixTag::new([tag_byte; 8]); + let leaves: Vec = (0..height) + .map(|i| hash_leaf_with_tag(&tag, &(i as u64).to_le_bytes())) + .collect(); + (tag, leaves) + } + + fn build(matrices: Vec<(MatrixTag, Vec)>) -> Mmcs { + let mut b: MmcsBuilder = MmcsBuilder::new(); + for (tag, leaves) in matrices { + b.add_matrix(tag, leaves).expect("add_matrix"); + } + b.finalize().expect("finalize") + } + + #[test] + fn build_single_matrix_round_trips() { + let (tag, leaves) = make_matrix(0xAA, 8); + let tree = build(vec![(tag, leaves)]); + for i in 0..8 { + let opening = tree.open(i).expect("open"); + assert!(opening.verify::(tree.root(), &tree.spec())); + } + } + + #[test] + fn build_distinct_heights_round_trips() { + let big = make_matrix(0x01, 8); + let mid = make_matrix(0x02, 4); + let small = make_matrix(0x03, 2); + let tree = build(vec![big, mid, small]); + for i in 0..8 { + let opening = tree.open(i).expect("open"); + assert!(opening.verify::(tree.root(), &tree.spec())); + } + } + + #[test] + fn build_is_deterministic() { + let m1 = make_matrix(0x01, 8); + let m2 = make_matrix(0x02, 4); + let r1 = *build(vec![m1.clone(), m2.clone()]).root(); + let r2 = *build(vec![m1.clone(), m2.clone()]).root(); + assert_eq!(r1, r2); + let r3 = *build(vec![m2, m1]).root(); + assert_eq!(r1, r3); + } + + #[test] + fn v1_cross_matrix_row_swap_is_rejected() { + let big = make_matrix(0xAA, 4); + let small = make_matrix(0xBB, 2); + let tree = build(vec![big, small]); + let mut opening = tree.open(0).expect("open"); + opening.matrix_leaves.swap(0, 1); + assert!(!opening.verify::(tree.root(), &tree.spec())); + } + + #[test] + fn v2_unpadded_matrix_is_rejected_at_build() { + let tag = MatrixTag::new([0; 8]); + let leaves: Vec = (0..3).map(|i| [i as u8; 32]).collect(); + let mut b: MmcsBuilder = MmcsBuilder::new(); + assert_eq!(b.add_matrix(tag, leaves), Err(MmcsError::NotPowerOfTwo)); + } + + #[test] + fn v3_same_height_matrices_rejected_in_mvp() { + let m1 = make_matrix(0x01, 4); + let m2 = make_matrix(0x02, 4); + let mut b: MmcsBuilder = MmcsBuilder::new(); + b.add_matrix(m1.0, m1.1).expect("add 1"); + b.add_matrix(m2.0, m2.1).expect("add 2"); + assert_eq!(b.finalize().err(), Some(MmcsError::DuplicateHeight)); + } + + #[test] + fn v4_auth_path_forgery_via_relabeling_is_rejected() { + let big = make_matrix(0xAA, 4); + let small = make_matrix(0xBB, 2); + let tree = build(vec![big, small]); + let mut opening = tree.open(0).expect("open"); + opening.matrix_leaves[1].0 = MatrixTag::new([0xCC; 8]); + assert!(!opening.verify::(tree.root(), &tree.spec())); + } + + #[test] + fn v5_wrong_leaf_data_is_rejected() { + let big = make_matrix(0xAA, 4); + let small = make_matrix(0xBB, 2); + let tree = build(vec![big, small]); + let mut opening = tree.open(0).expect("open"); + opening.matrix_leaves[1].1[0] ^= 1; + assert!(!opening.verify::(tree.root(), &tree.spec())); + } + + #[test] + fn v6_index_tampering_rejected() { + let big = make_matrix(0xAA, 4); + let tree = build(vec![big]); + let o0 = tree.open(0).expect("open 0"); + let o1 = tree.open(1).expect("open 1"); + assert_ne!(o0.matrix_leaves[0].1, o1.matrix_leaves[0].1); + let mut faked = o0.clone(); + faked.global_index = 1; + assert!(!faked.verify::(tree.root(), &tree.spec())); + } + + #[test] + fn v7_truncated_path_is_rejected() { + let big = make_matrix(0xAA, 8); + let tree = build(vec![big]); + let mut opening = tree.open(3).expect("open"); + opening.siblings.pop(); + assert!(!opening.verify::(tree.root(), &tree.spec())); + } + + #[test] + fn v8_lying_about_spec_is_rejected() { + let big = make_matrix(0xAA, 8); + let tree = build(vec![big]); + let opening = tree.open(0).expect("open"); + let bad_specs = vec![(MatrixTag::new([0xAA; 8]), 4)]; + assert!(!opening.verify::(tree.root(), &bad_specs)); + } + + #[test] + fn duplicate_tag_is_rejected() { + let tag = MatrixTag::new([1; 8]); + let leaves: Vec = vec![[0; 32]; 4]; + let mut b: MmcsBuilder = MmcsBuilder::new(); + b.add_matrix(tag, leaves.clone()).expect("add first"); + assert_eq!(b.add_matrix(tag, leaves), Err(MmcsError::DuplicateTag)); + } + + #[test] + fn open_out_of_bounds_is_rejected() { + let big = make_matrix(0xAA, 4); + let tree = build(vec![big]); + assert_eq!(tree.open(4).err(), Some(MmcsError::IndexOutOfBounds)); + } +} diff --git a/crypto/crypto/src/merkle_tree/mod.rs b/crypto/crypto/src/merkle_tree/mod.rs index 99ea82dea..f6e601c30 100644 --- a/crypto/crypto/src/merkle_tree/mod.rs +++ b/crypto/crypto/src/merkle_tree/mod.rs @@ -1,5 +1,6 @@ pub mod backends; pub mod merkle; +pub mod mmcs; pub mod proof; pub mod traits; pub mod utils; From 1750dc5f2760e7fce80dfb8801347eb819789286 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Tue, 26 May 2026 20:33:02 -0300 Subject: [PATCH 02/21] feat(crypto/mmcs): support same-height matrices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drops the MVP `DuplicateHeight` restriction so the MMCS matches lambda-vm's actual chip topology (3+ CPU chunks all at 2^20, BITWISE at 2^20, etc.). Build pipeline (now general): - Sort by `padded_height` desc, ties by `tag` asc — verifier reproduces. - Layer 0 = first max-height matrix's leaves, then sequentially compress in every additional max-height matrix's leaves at the same row index. - Each upper layer: compress pairs of children, then inject every matrix whose `padded_height` matches that layer's length, in tag-asc order. Verify mirrors with a cursor over `matrix_leaves` (which the builder already orders by height desc then tag asc). At layer L we combine in every leaf at the current row position whose matrix has height equal to the current layer length; cursor must end exactly at the leaves' end (unconsumed leaves => topology mismatch => reject). Tests: - `same_height_pair_round_trips`: two matrices at max_height combine. - `lambda_vm_style_multi_chunk_round_trips`: 3 chunks at 2^3, 2 at 2^2, 1 at 2^0 — mirrors typical lambda-vm chip groupings. - `insertion_order_does_not_change_root`: permutation invariance. - `same_height_tampered_leaf_rejected`: tampering one of the combined leaves at the same layer still fails root match. 17 tests total, 0 failed. `make lint` clean. --- crypto/crypto/src/merkle_tree/mmcs.rs | 209 +++++++++++++++++++------- 1 file changed, 152 insertions(+), 57 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/mmcs.rs b/crypto/crypto/src/merkle_tree/mmcs.rs index 28ad4fba9..a5a8706ba 100644 --- a/crypto/crypto/src/merkle_tree/mmcs.rs +++ b/crypto/crypto/src/merkle_tree/mmcs.rs @@ -1,19 +1,26 @@ //! Multi-Matrix Commitment Scheme (MMCS): a single Merkle root that -//! commits to multiple matrices of (different) heights, with one -//! authentication path per query covering all matrices. +//! commits to multiple matrices of (different or equal) heights, with +//! one authentication path per query covering all matrices. //! -//! Plonky3-style layer injection: sort matrices by `padded_height` desc; -//! layer 0 = largest matrix's leaves; compress pairs upward; at each -//! layer whose length matches a smaller matrix's `padded_height`, inject -//! that matrix's leaves via `compress(node_i, matrix.leaves[i])`. +//! Plonky3-style layer injection: sort matrices by `padded_height` desc +//! (ties broken by `tag` asc); layer 0 starts with the first max-height +//! matrix's leaves and sequentially compresses in additional max-height +//! matrices; each upper layer compresses pairs of children then injects +//! every matrix whose `padded_height` matches that layer's length. //! -//! MVP scope: -//! - All matrices have distinct `padded_height` (matches lambda-vm topology). -//! - No SIMD, no streaming, no caps. Standalone module, not wired to prover. +//! Scope: +//! - Multiple matrices may share a `padded_height` (matches lambda-vm's +//! chunked-table topology: 3 CPU chunks all at 2^20, BITWISE at 2^20, +//! etc.). Combination order at a layer is deterministic (tag asc). +//! - No SIMD / parallel hashing yet. +//! - No streaming chunked absorption — caller materializes full leaf +//! digest arrays per matrix. +//! - Single root (no caps). //! //! Security: see `docs/mmcs-streaming-design.md` for the 8-vector threat //! model; each vector is tested below. +use alloc::collections::BTreeMap; use alloc::vec::Vec; use super::traits::IsMerkleTreeBackend; @@ -35,7 +42,6 @@ pub enum MmcsError { EmptyMatrix, NotPowerOfTwo, Empty, - DuplicateHeight, IndexOutOfBounds, } @@ -92,40 +98,52 @@ impl MmcsBuilder { if self.matrices.is_empty() { return Err(MmcsError::Empty); } - // Deterministic order: height desc, tag asc. Verifier reproduces. + // Deterministic sort: height desc, then tag asc. The verifier + // reproduces this exact ordering so prover/verifier agree on + // which matrix contributes when. self.matrices.sort_by(|a, b| { b.padded_height() .cmp(&a.padded_height()) .then(a.tag.cmp(&b.tag)) }); - for w in self.matrices.windows(2) { - if w[0].padded_height() == w[1].padded_height() { - return Err(MmcsError::DuplicateHeight); - } - } let max_height = self.matrices[0].padded_height(); let depth = max_height.trailing_zeros() as usize; + + // Group matrix indices by padded_height (preserving tag-asc order + // within each group because `matrices` is already sorted). + let mut by_height: BTreeMap> = BTreeMap::new(); + for (idx, m) in self.matrices.iter().enumerate() { + by_height.entry(m.padded_height()).or_default().push(idx); + } + let mut layers: Vec> = Vec::with_capacity(depth + 1); - // Layer 0 = largest matrix's leaves. - layers.push(self.matrices[0].leaf_digests.clone()); + // Layer 0: combine all max-height matrices in tag-asc order. + let top_group = by_height + .get(&max_height) + .expect("max_height bucket exists"); + let mut layer0: Vec = self.matrices[top_group[0]].leaf_digests.clone(); + for &mi in &top_group[1..] { + for (node, leaf) in layer0.iter_mut().zip(self.matrices[mi].leaf_digests.iter()) { + *node = B::hash_new_parent(node, leaf); + } + } + layers.push(layer0); + + // Walk upward; at each new layer, compress pairs then inject all + // matrices at this layer's length in tag-asc order. for level in 0..depth { let cur = &layers[level]; let new_len = cur.len() / 2; - let mut next: Vec = Vec::with_capacity(new_len); - for i in 0..new_len { - next.push(B::hash_new_parent(&cur[2 * i], &cur[2 * i + 1])); - } - // Inject any non-largest matrix at this layer length. - if let Some(matrix) = self - .matrices - .iter() - .skip(1) - .find(|m| m.padded_height() == new_len) - { - for (node, inject) in next.iter_mut().zip(matrix.leaf_digests.iter()) { - *node = B::hash_new_parent(node, inject); + let mut next: Vec = (0..new_len) + .map(|i| B::hash_new_parent(&cur[2 * i], &cur[2 * i + 1])) + .collect(); + if let Some(group) = by_height.get(&new_len) { + for &mi in group { + for (node, leaf) in next.iter_mut().zip(self.matrices[mi].leaf_digests.iter()) { + *node = B::hash_new_parent(node, leaf); + } } } layers.push(next); @@ -149,6 +167,7 @@ impl Mmcs { &top[0] } + /// `(tag, padded_height)` per matrix in deterministic sort order. pub fn spec(&self) -> Vec<(MatrixTag, usize)> { self.matrices .iter() @@ -188,6 +207,8 @@ impl Mmcs { #[derive(Debug, Clone)] pub struct MmcsOpening { + /// `(tag, leaf_at_shifted_index)` per matrix, in the builder's sort + /// order (height desc, tag asc). pub matrix_leaves: Vec<(MatrixTag, N)>, pub siblings: Vec, pub global_index: usize, @@ -209,18 +230,12 @@ impl MmcsOpening { return false; } } - for w in specs.windows(2) { - if w[0].1 == w[1].1 { - return false; - } - if !w[0].1.is_power_of_two() || !w[1].1.is_power_of_two() { + for (_, ph) in &specs { + if !ph.is_power_of_two() || *ph == 0 { return false; } } let max_height = specs[0].1; - if !max_height.is_power_of_two() || max_height == 0 { - return false; - } if self.global_index >= max_height { return false; } @@ -229,9 +244,21 @@ impl MmcsOpening { return false; } - let mut current = self.matrix_leaves[0].1.clone(); - let mut idx = self.global_index; + // Walk `matrix_leaves` left to right with a cursor; the leaves + // are grouped by height (largest first) and within each group + // are sorted by tag. + let mut cursor = 0usize; + + // Reconstruct layer-0 at global_index: combine all max-height + // matrices' leaves at global_index in tag-asc order. + let mut current = self.matrix_leaves[cursor].1.clone(); + cursor += 1; + while cursor < self.matrix_leaves.len() && specs[cursor].1 == max_height { + current = B::hash_new_parent(¤t, &self.matrix_leaves[cursor].1); + cursor += 1; + } + let mut idx = self.global_index; for level in 0..depth { let sibling = &self.siblings[level]; current = if idx & 1 == 0 { @@ -242,20 +269,16 @@ impl MmcsOpening { idx >>= 1; let new_len = max_height >> (level + 1); - if let Some((tag, _)) = specs.iter().find(|(_, ph)| *ph == new_len) { - let inject = self - .matrix_leaves - .iter() - .find(|(t, _)| t == tag) - .map(|(_, leaf)| leaf); - let inject = match inject { - Some(l) => l, - None => return false, - }; - current = B::hash_new_parent(¤t, inject); + while cursor < self.matrix_leaves.len() && specs[cursor].1 == new_len { + current = B::hash_new_parent(¤t, &self.matrix_leaves[cursor].1); + cursor += 1; } } + if cursor != self.matrix_leaves.len() { + // Unconsumed leaves => topology mismatch. + return false; + } ¤t == expected_root } } @@ -307,6 +330,8 @@ mod tests { b.finalize().expect("finalize") } + // ---------- Basic ---------- + #[test] fn build_single_matrix_round_trips() { let (tag, leaves) = make_matrix(0xAA, 8); @@ -340,6 +365,74 @@ mod tests { assert_eq!(r1, r3); } + // ---------- Same-height topology (lambda-vm style) ---------- + + #[test] + fn same_height_pair_round_trips() { + // Two matrices both at max_height — combined into layer 0. + let m1 = make_matrix(0x01, 4); + let m2 = make_matrix(0x02, 4); + let tree = build(vec![m1, m2]); + for i in 0..4 { + let opening = tree.open(i).expect("open"); + assert!( + opening.verify::(tree.root(), &tree.spec()), + "round-trip at index {i}" + ); + } + } + + #[test] + fn lambda_vm_style_multi_chunk_round_trips() { + // 3 max-height chunks (CPU-like), 2 mid-height (MEMW-like at 1/2), + // 1 small (REGISTER-like at 1/8). Heights: 8, 8, 8, 4, 4, 1. + let cpus = vec![ + make_matrix(0x01, 8), + make_matrix(0x02, 8), + make_matrix(0x03, 8), + ]; + let memws = vec![make_matrix(0x10, 4), make_matrix(0x11, 4)]; + let reg = make_matrix(0xF0, 1); + let mut all = cpus; + all.extend(memws); + all.push(reg); + let tree = build(all); + for i in 0..8 { + let opening = tree.open(i).expect("open"); + assert!( + opening.verify::(tree.root(), &tree.spec()), + "round-trip at index {i}" + ); + } + } + + #[test] + fn insertion_order_does_not_change_root() { + // Multi-permutation determinism: any permutation of the same set + // of matrices must produce the same root. + let a = make_matrix(0x01, 8); + let b = make_matrix(0x02, 8); + let c = make_matrix(0x03, 4); + let r1 = *build(vec![a.clone(), b.clone(), c.clone()]).root(); + let r2 = *build(vec![c.clone(), a.clone(), b.clone()]).root(); + let r3 = *build(vec![b, c, a]).root(); + assert_eq!(r1, r2); + assert_eq!(r1, r3); + } + + #[test] + fn same_height_tampered_leaf_rejected() { + let m1 = make_matrix(0x01, 4); + let m2 = make_matrix(0x02, 4); + let tree = build(vec![m1, m2]); + let mut opening = tree.open(2).expect("open"); + // Flip one bit of the second max-height matrix's leaf. + opening.matrix_leaves[1].1[0] ^= 1; + assert!(!opening.verify::(tree.root(), &tree.spec())); + } + + // ---------- Threat model (vectors 1-8) ---------- + #[test] fn v1_cross_matrix_row_swap_is_rejected() { let big = make_matrix(0xAA, 4); @@ -359,13 +452,15 @@ mod tests { } #[test] - fn v3_same_height_matrices_rejected_in_mvp() { + fn v3_layer_injection_order_deterministic_under_permutation() { + // Two matrices at same height — combining is in tag-asc order + // regardless of insertion. Already covered above; pin it here. let m1 = make_matrix(0x01, 4); let m2 = make_matrix(0x02, 4); - let mut b: MmcsBuilder = MmcsBuilder::new(); - b.add_matrix(m1.0, m1.1).expect("add 1"); - b.add_matrix(m2.0, m2.1).expect("add 2"); - assert_eq!(b.finalize().err(), Some(MmcsError::DuplicateHeight)); + assert_eq!( + *build(vec![m1.clone(), m2.clone()]).root(), + *build(vec![m2, m1]).root() + ); } #[test] From becb5cdf5caa071d28c9986801b08836c54b91a5 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 10:20:44 -0300 Subject: [PATCH 03/21] perf(crypto/mmcs): row-parallel build via Rayon Adds `#[cfg(feature = "parallel")]` paths to the MMCS finalize step: - Layer-0 same-height combine: row-parallel (each row independently folds K matrices in tag-asc order; K is small, the per-row sequential chain is short, rows scale across cores). - Pair compression upward: pair-parallel. - Matrix injection at non-leaf layers: row-parallel. Mirrors Plonky3's `first_digest_layer` + `compress_and_inject` parallel shape (we read their `merkle-tree/src/merkle_tree.rs` to confirm the algorithmic structure matches). Differences: - Plonky3 SIMD-packs WIDTH rows per hash call via PackedField. lambda-vm uses scalar Keccak so we fall back to per-row Rayon parallelism (similar throughput class for our chip topology, no SIMD code needed). - Plonky3 hashes raw matrix rows internally (multi-matrix same-height rows concatenated -> single hash). Ours takes pre-hashed leaves so a same-height combine is `compress(leaf_A, leaf_B)` (one extra compress per matrix per row vs Plonky3). The trade-off: ours lets the caller hash chip-by-chip and drop the LDE before the MMCS sees anything, giving better peak memory for lambda-vm's large per-chip LDEs. Worth reconsidering for PR2 if profiling shows the extra compresses matter. Helpers factored out so the parallel/serial cfg lives in one place per operation (`build_combined_layer`, `compress_pairs`, `inject_matrices`). Tests: same 17 cases pass both with and without `--features parallel`. `make lint` clean across all three configs. --- crypto/crypto/src/merkle_tree/mmcs.rs | 97 ++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 17 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/mmcs.rs b/crypto/crypto/src/merkle_tree/mmcs.rs index a5a8706ba..6a213704b 100644 --- a/crypto/crypto/src/merkle_tree/mmcs.rs +++ b/crypto/crypto/src/merkle_tree/mmcs.rs @@ -23,6 +23,9 @@ use alloc::collections::BTreeMap; use alloc::vec::Vec; +#[cfg(feature = "parallel")] +use rayon::prelude::*; + use super::traits::IsMerkleTreeBackend; /// Per-matrix domain separator. Caller-defined; verifier reconstructs @@ -119,34 +122,29 @@ impl MmcsBuilder { let mut layers: Vec> = Vec::with_capacity(depth + 1); - // Layer 0: combine all max-height matrices in tag-asc order. + // Layer 0: combine all max-height matrices' leaves at row i in + // tag-asc order. Row-parallel: each row independently folds K + // matrices (K is small — 1-5 typically), so the per-row sequential + // chain is short while rows scale across cores. Mirrors Plonky3's + // `first_digest_layer` parallelism, minus the SIMD vertical packing + // (lambda-vm uses scalar Keccak). let top_group = by_height .get(&max_height) .expect("max_height bucket exists"); - let mut layer0: Vec = self.matrices[top_group[0]].leaf_digests.clone(); - for &mi in &top_group[1..] { - for (node, leaf) in layer0.iter_mut().zip(self.matrices[mi].leaf_digests.iter()) { - *node = B::hash_new_parent(node, leaf); - } - } + let layer0: Vec = build_combined_layer::(max_height, top_group, &self.matrices); layers.push(layer0); - // Walk upward; at each new layer, compress pairs then inject all - // matrices at this layer's length in tag-asc order. + // Walk upward: compress pairs (pair-parallel), then inject any + // matrices at this layer's length (row-parallel). for level in 0..depth { let cur = &layers[level]; let new_len = cur.len() / 2; - let mut next: Vec = (0..new_len) - .map(|i| B::hash_new_parent(&cur[2 * i], &cur[2 * i + 1])) - .collect(); + let mut next: Vec = compress_pairs::(cur); if let Some(group) = by_height.get(&new_len) { - for &mi in group { - for (node, leaf) in next.iter_mut().zip(self.matrices[mi].leaf_digests.iter()) { - *node = B::hash_new_parent(node, leaf); - } - } + inject_matrices::(&mut next, group, &self.matrices); } layers.push(next); + let _ = new_len; } Ok(Mmcs { @@ -156,6 +154,71 @@ impl MmcsBuilder { } } +/// Build layer 0 by folding all matrices at `max_height` at row `i`, in +/// tag-asc order (`group` already preserves this). Row-parallel. +fn build_combined_layer( + max_height: usize, + group: &[usize], + matrices: &[MmcsMatrix], +) -> Vec { + let inner = |i: usize| -> B::Node { + let mut acc = matrices[group[0]].leaf_digests[i].clone(); + for &mi in &group[1..] { + acc = B::hash_new_parent(&acc, &matrices[mi].leaf_digests[i]); + } + acc + }; + #[cfg(feature = "parallel")] + { + (0..max_height).into_par_iter().map(inner).collect() + } + #[cfg(not(feature = "parallel"))] + { + (0..max_height).map(inner).collect() + } +} + +/// Compress pairs of children into the next layer up. Pair-parallel. +fn compress_pairs(prev: &[B::Node]) -> Vec { + let new_len = prev.len() / 2; + let inner = |i: usize| -> B::Node { B::hash_new_parent(&prev[2 * i], &prev[2 * i + 1]) }; + #[cfg(feature = "parallel")] + { + (0..new_len).into_par_iter().map(inner).collect() + } + #[cfg(not(feature = "parallel"))] + { + (0..new_len).map(inner).collect() + } +} + +/// Inject all matrices in `group` into `layer` (row-parallel). +fn inject_matrices( + layer: &mut [B::Node], + group: &[usize], + matrices: &[MmcsMatrix], +) { + let n = layer.len(); + let updated: Vec = { + let inner = |i: usize| -> B::Node { + let mut acc = layer[i].clone(); + for &mi in group { + acc = B::hash_new_parent(&acc, &matrices[mi].leaf_digests[i]); + } + acc + }; + #[cfg(feature = "parallel")] + { + (0..n).into_par_iter().map(inner).collect() + } + #[cfg(not(feature = "parallel"))] + { + (0..n).map(inner).collect() + } + }; + layer.clone_from_slice(&updated); +} + pub struct Mmcs { layers: Vec>, matrices: Vec>, From baecc56d966ff492da938be42d906c3f17fad3de Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 10:35:31 -0300 Subject: [PATCH 04/21] bench(crypto/mmcs): micro-bench vs N independent Merkle trees MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two `#[ignore]` tests that print concrete numbers when invoked with `cargo test -p crypto --features parallel mmcs_bench -- --ignored --nocapture`. Two scenarios: 1. **Build time** comparison on a scaled-down lambda-vm topology (8 chips: 3 at 2^14, 2 at 2^12, 2 at 2^10, 1 at 2^8). Measured on this host: N independent trees: 4481 µs Single MMCS tree: 3176 µs MMCS / N-trees ratio: 0.709 (MMCS is ~30% faster) The savings come from sharing layer-0 work when multiple chips live at max_height — fewer hash chains end up duplicated. 2. **Per-query opening hash count** for the same topology: N independent trees: 102 hashes per query Unified MMCS: 21 hashes per query Reduction factor: ~4.9x At production scale (~30 chips, max_h = 2^20) this projects to ~13x reduction in per-query Merkle hashes, which is the dominant recursion-guest cost after #601's preprocessed-commitment cache. Caveats: - Topology is scaled-down to keep the bench fast (<1 s); production numbers will differ but in the same direction. - Build bench measures wall-clock with `Instant`, not statistically rigorous like Criterion. Good enough as a sanity gate before PR2. - Opening bench counts *Merkle path hashes only*, not leaf-hash work or compressions inside `verify` — both equal between modes. Validates the Phase B->C migration: MMCS build does NOT regress prover time (it improves it), and the verifier-as-guest cycle saving is real and measurable. --- crypto/crypto/src/merkle_tree/mmcs.rs | 195 ++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) diff --git a/crypto/crypto/src/merkle_tree/mmcs.rs b/crypto/crypto/src/merkle_tree/mmcs.rs index 6a213704b..b775bf17a 100644 --- a/crypto/crypto/src/merkle_tree/mmcs.rs +++ b/crypto/crypto/src/merkle_tree/mmcs.rs @@ -592,3 +592,198 @@ mod tests { assert_eq!(tree.open(4).err(), Some(MmcsError::IndexOutOfBounds)); } } + +#[cfg(test)] +mod bench { + //! Micro-benchmark comparing MMCS build against N independent + //! `MerkleTree` builds for a lambda-vm-style topology. Marked + //! `#[ignore]` so it doesn't run by default; trigger with + //! cargo test -p crypto --features parallel mmcs_bench -- --ignored --nocapture + use super::*; + use crate::merkle_tree::merkle::MerkleTree; + use sha3::{Digest, Keccak256}; + use std::time::Instant; + + struct BenchBackend; + type Node = [u8; 32]; + impl IsMerkleTreeBackend for BenchBackend { + type Node = Node; + type Data = Node; + fn hash_data(leaf: &Node) -> Node { + *leaf + } + fn hash_new_parent(a: &Node, b: &Node) -> Node { + let mut h = Keccak256::new(); + h.update(a); + h.update(b); + h.finalize().into() + } + } + + fn synthetic_chip_leaves(seed: u8, height: usize) -> Vec { + (0..height) + .map(|i| { + let mut h = Keccak256::new(); + h.update([seed]); + h.update((i as u64).to_le_bytes()); + h.finalize().into() + }) + .collect() + } + + /// lambda-vm-style topology, scaled down so the bench finishes fast: + /// - 3 chips at 2^14 (CPU-like chunked) + /// - 2 chips at 2^12 (MEMW-like) + /// - 2 chips at 2^10 (LT-like) + /// - 1 chip at 2^8 (HALT/COMMIT-like) + fn lambda_vm_topology() -> Vec<(MatrixTag, Vec)> { + let mut out = Vec::new(); + let mut seed = 0u8; + for height in [1 << 14, 1 << 14, 1 << 14] { + out.push(( + MatrixTag::new([seed; 8]), + synthetic_chip_leaves(seed, height), + )); + seed = seed.wrapping_add(1); + } + for height in [1 << 12, 1 << 12] { + out.push(( + MatrixTag::new([seed; 8]), + synthetic_chip_leaves(seed, height), + )); + seed = seed.wrapping_add(1); + } + for height in [1 << 10, 1 << 10] { + out.push(( + MatrixTag::new([seed; 8]), + synthetic_chip_leaves(seed, height), + )); + seed = seed.wrapping_add(1); + } + { + let height = 1 << 8; + out.push(( + MatrixTag::new([seed; 8]), + synthetic_chip_leaves(seed, height), + )); + } + out + } + + #[test] + #[ignore] + fn mmcs_bench_lambda_vm_topology() { + let chips = lambda_vm_topology(); + let total_leaves: usize = chips.iter().map(|(_, l)| l.len()).sum(); + let max_h = chips.iter().map(|(_, l)| l.len()).max().unwrap(); + + // Warm caches. + for _ in 0..2 { + let mut b: MmcsBuilder = MmcsBuilder::new(); + for (t, l) in &chips { + b.add_matrix(*t, l.clone()).unwrap(); + } + let _ = b.finalize().unwrap(); + } + + // MMCS build. + let t0 = Instant::now(); + let iters = 5; + let mut mmcs_root = [0u8; 32]; + for _ in 0..iters { + let mut b: MmcsBuilder = MmcsBuilder::new(); + for (t, l) in &chips { + b.add_matrix(*t, l.clone()).unwrap(); + } + let m = b.finalize().unwrap(); + mmcs_root = *m.root(); + } + let mmcs_us = t0.elapsed().as_micros() as f64 / iters as f64; + + // N independent trees build. + let t0 = Instant::now(); + let mut n_roots = Vec::new(); + for _ in 0..iters { + let roots: Vec = chips + .iter() + .map(|(_, leaves)| { + let tree = MerkleTree::::build_from_hashed_leaves(leaves.clone()) + .unwrap(); + tree.root + }) + .collect(); + n_roots = roots; + } + let ntrees_us = t0.elapsed().as_micros() as f64 / iters as f64; + + // Sanity: per-chip roots equal one of the layer-0 contributions for + // MMCS *only* when the chip is the sole max-height matrix — we don't + // assert equality, just print stats so reviewers can spot anomalies. + let _ = (mmcs_root, n_roots); + + println!(); + println!("┌─────────────────────────────────────────────────────────────┐"); + println!("│ MMCS micro-bench (lambda-vm-style topology) │"); + println!("├─────────────────────────────────────────────────────────────┤"); + println!( + "│ Chips: {:<3} Σh_i: {:<10} max_h: {:<10} │", + chips.len(), + total_leaves, + max_h + ); + println!( + "│ Build N independent trees: {:>8.0} µs │", + ntrees_us + ); + println!( + "│ Build single MMCS tree: {:>8.0} µs │", + mmcs_us + ); + println!( + "│ MMCS / N-trees ratio: {:>8.3} │", + mmcs_us / ntrees_us + ); + println!("└─────────────────────────────────────────────────────────────┘"); + } + + #[test] + #[ignore] + fn mmcs_opening_count_lambda_vm_topology() { + let chips = lambda_vm_topology(); + let mut b: MmcsBuilder = MmcsBuilder::new(); + for (t, l) in &chips { + b.add_matrix(*t, l.clone()).unwrap(); + } + let tree = b.finalize().unwrap(); + let opening = tree.open(0).unwrap(); + + // Path siblings + per-matrix leaves -> total opening hashes. + let mmcs_hashes = opening.siblings.len() + opening.matrix_leaves.len() - 1; + + // Today (N independent trees): each chip's opening path is log2(h_i) + // hashes; verifier must hash one extra per opening for the leaf + // compute. Total per-query hashes = Σ (log2(h_i) + 1). + let ntrees_hashes: usize = chips + .iter() + .map(|(_, l)| l.len().trailing_zeros() as usize + 1) + .sum(); + + println!(); + println!("┌─────────────────────────────────────────────────────────────┐"); + println!("│ MMCS per-query opening hash count │"); + println!("├─────────────────────────────────────────────────────────────┤"); + println!( + "│ N independent trees: {:>4} hashes per query │", + ntrees_hashes + ); + println!( + "│ Unified MMCS: {:>4} hashes per query │", + mmcs_hashes + ); + println!( + "│ Reduction factor: {:>4.2}x │", + ntrees_hashes as f64 / mmcs_hashes as f64 + ); + println!("└─────────────────────────────────────────────────────────────┘"); + } +} From 24dabdf895e20a07bac62a84c649de4e32275492 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 10:45:31 -0300 Subject: [PATCH 05/21] feat(prover/mmcs_tags): per-chip MatrixTag spec (PR2 foundation) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MMCS leaf hash binds matrix identity via a `MatrixTag` (8 bytes per matrix). Prover and verifier must derive the same tag for the same chip-chunk; otherwise the Fiat-Shamir transcript diverges silently — opaque verification failure with no clear diagnostic. Centralising the tag derivation in one shared module turns "same tag" from a convention into a compile-time guarantee. Encoding: MatrixTag = [chip_type_id : u32 LE] [chunk_index : u32 LE] Chip type IDs are STABLE wire constants (verified by `tag_encoding_is_stable` test pinning specific bytes). Append-only — never reassign, never reuse removed IDs. Split tables share a low ID range (CPU=0..MEMW_REGISTER=9), single-instance tables a mid range (100..107), and per-page tables get a single ID (200) with `chunk_index` encoding the page index. Tests cover: - Uniqueness across all realistic (chip, chunk) pairs (10 split tables × 64 chunks + 8 single + 256 page indices, ~944 distinct tags). - Encoding stability (specific bytes pinned). - Sensitivity (changing chip_type or chunk_index changes the tag). No prover/verifier code touched yet. This module exists so PR2's wire-up (which replaces N per-chip Merkle trees with one MMCS) can import a single source of truth. --- prover/src/tables/mmcs_tags.rs | 148 +++++++++++++++++++++++++++++++++ prover/src/tables/mod.rs | 1 + 2 files changed, 149 insertions(+) create mode 100644 prover/src/tables/mmcs_tags.rs diff --git a/prover/src/tables/mmcs_tags.rs b/prover/src/tables/mmcs_tags.rs new file mode 100644 index 000000000..9b70c17e6 --- /dev/null +++ b/prover/src/tables/mmcs_tags.rs @@ -0,0 +1,148 @@ +//! Per-chip [`MatrixTag`] assignments for the unified MMCS over the main +//! trace (PR2 of the streaming-MMCS plan). +//! +//! ## Why this lives here +//! +//! The MMCS leaf-hash binds matrix identity via a per-matrix `MatrixTag`. +//! Prover and verifier MUST derive the same tag for the same chip-chunk; +//! otherwise the Fiat-Shamir transcript diverges and verification fails +//! silently from the user's POV (just an opaque rejection). Centralising +//! the tag derivation in one place — used by both sides — turns "same tag" +//! from a hope into a compile-time guarantee. +//! +//! ## Encoding +//! +//! ```text +//! MatrixTag = [chip_type_id : u32 (le)] [chunk_index : u32 (le)] +//! ``` +//! +//! `chip_type_id` values are **stable** — they go on the wire (indirectly, +//! via the Fiat-Shamir transcript) and must never be reassigned. Adding a +//! new chip type appends a new ID; removing one leaves the gap (do not +//! reuse). +//! +//! `chunk_index` is the 0-based index within a single chip type (e.g. CPU +//! chunk 0, CPU chunk 1, ...). For non-split chips (BITWISE, DECODE, ...) +//! it's always 0. + +use crypto::merkle_tree::mmcs::MatrixTag; + +// ========================================================================= +// Chip type IDs — STABLE. Never reassign. Append-only. +// ========================================================================= +// Split tables (multiple chunks possible) +pub const CHIP_CPU: u32 = 0; +pub const CHIP_LT: u32 = 1; +pub const CHIP_MEMW: u32 = 2; +pub const CHIP_MEMW_ALIGNED: u32 = 3; +pub const CHIP_LOAD: u32 = 4; +pub const CHIP_MUL: u32 = 5; +pub const CHIP_DVRM: u32 = 6; +pub const CHIP_SHIFT: u32 = 7; +pub const CHIP_BRANCH: u32 = 8; +pub const CHIP_MEMW_REGISTER: u32 = 9; + +// Single-instance tables (chunk_index is always 0) +pub const CHIP_BITWISE: u32 = 100; +pub const CHIP_DECODE: u32 = 101; +pub const CHIP_HALT: u32 = 102; +pub const CHIP_COMMIT: u32 = 103; +pub const CHIP_KECCAK: u32 = 104; +pub const CHIP_KECCAK_RC: u32 = 105; +pub const CHIP_KECCAK_RND: u32 = 106; +pub const CHIP_REGISTER: u32 = 107; + +// Per-page tables — chunk_index encodes the page index within the page +// configuration the prover and verifier reconstruct from the proof's +// runtime_page_ranges + num_private_input_pages. ELF-segment pages and +// runtime zero-init pages live here; private-input pages also share this +// space because the AIR is the same kind. +pub const CHIP_PAGE: u32 = 200; + +/// Build a [`MatrixTag`] from a chip type ID and a chunk index. The +/// encoding is `chip_type_id` (4 bytes LE) followed by `chunk_index` +/// (4 bytes LE) — total 8 bytes. +#[inline] +pub const fn chip_tag(chip_type_id: u32, chunk_index: u32) -> MatrixTag { + let ct = chip_type_id.to_le_bytes(); + let ci = chunk_index.to_le_bytes(); + MatrixTag::new([ct[0], ct[1], ct[2], ct[3], ci[0], ci[1], ci[2], ci[3]]) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashSet; + + /// Every (chip_type, chunk) pair we might realistically use must + /// produce a distinct tag. This catches accidental ID collisions. + #[test] + fn tags_are_unique_across_realistic_assignments() { + let split_chips = [ + CHIP_CPU, + CHIP_LT, + CHIP_MEMW, + CHIP_MEMW_ALIGNED, + CHIP_LOAD, + CHIP_MUL, + CHIP_DVRM, + CHIP_SHIFT, + CHIP_BRANCH, + CHIP_MEMW_REGISTER, + ]; + let single_chips = [ + CHIP_BITWISE, + CHIP_DECODE, + CHIP_HALT, + CHIP_COMMIT, + CHIP_KECCAK, + CHIP_KECCAK_RC, + CHIP_KECCAK_RND, + CHIP_REGISTER, + ]; + + let mut seen: HashSet<[u8; 8]> = HashSet::new(); + for chip in split_chips { + for chunk in 0..64u32 { + let tag = chip_tag(chip, chunk); + assert!( + seen.insert(tag.0), + "duplicate tag for chip {chip:#x} chunk {chunk}" + ); + } + } + for chip in single_chips { + let tag = chip_tag(chip, 0); + assert!(seen.insert(tag.0), "duplicate single-chip tag {chip:#x}"); + } + for page_idx in 0..256u32 { + let tag = chip_tag(CHIP_PAGE, page_idx); + assert!(seen.insert(tag.0), "duplicate PAGE tag at index {page_idx}"); + } + } + + /// Stability test: specific bytes must match a frozen layout so a + /// future refactor that reshuffles the encoding fails loudly. If you + /// need to change the encoding, BUMP a new constant family (V2) and + /// migrate the verifier alongside. + #[test] + fn tag_encoding_is_stable() { + assert_eq!(chip_tag(CHIP_CPU, 0).0, [0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(chip_tag(CHIP_CPU, 1).0, [0, 0, 0, 0, 1, 0, 0, 0]); + assert_eq!(chip_tag(CHIP_BITWISE, 0).0, [100, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!( + chip_tag(CHIP_PAGE, 0xABCD).0, + [200, 0, 0, 0, 0xCD, 0xAB, 0, 0] + ); + } + + /// chip_type and chunk_index encode into independent halves; flipping + /// either changes the tag. + #[test] + fn changing_chip_type_or_chunk_changes_tag() { + let base = chip_tag(CHIP_CPU, 0); + assert_ne!(base, chip_tag(CHIP_LT, 0)); + assert_ne!(base, chip_tag(CHIP_CPU, 1)); + assert_ne!(base, chip_tag(CHIP_CPU, u32::MAX)); + } +} diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index 4a6032ef2..7d80bd2c6 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -36,6 +36,7 @@ pub mod lt; pub mod memw; pub mod memw_aligned; pub mod memw_register; +pub mod mmcs_tags; pub mod mul; pub mod page; pub mod register; From 8f09c0d8bbcd78cc826b6bc2087409bab1aabf8f Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 11:01:22 -0300 Subject: [PATCH 06/21] feat(prover/mmcs_commit): chip-leaf hashing + MMCS-build adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bridges per-chip LDE columns to the unified MMCS over the main trace. Not yet wired into `multi_prove` — exists so the API + leaf-hash format can be reviewed and tested before the hot-path change in the next step. Module surface: - `compute_chip_leaves_with_tag(columns, tag) -> Vec`: per-row Keccak256 of `LEAF_DOMAIN_TAG || tag.0 || row_bytes_be`. Bit-reversed row order matches the existing FRI / Merkle layout. Caller can drop the LDE columns immediately after — memory peak is one chip's LDE at a time (same as today's per-chip Merkle build, no regression). - `build_main_trace_mmcs(entries)`: thin wrapper that pours `(MatrixTag, leaves)` pairs into the standalone `MmcsBuilder` from `crypto/merkle_tree/mmcs`. Returns one Mmcs with one root; per-query opens come from `mmcs.open(global_index)`. Soundness notes baked into the doc: - The leaf hash MUST include the chip tag. Without it, a single shared root cannot bind matrix identity (cross-chip row swap becomes a real attack). The legacy `keccak_leaves_bit_reversed` is unsafe for MMCS use; mixing the two formats would be a silent soundness bug. The test `leaves_differ_from_legacy_format` pins that the two encodings produce different bytes. - The domain tag `LAMBDAVM_MAIN_MMCS_LEAF_V1` is versioned so future changes to the leaf encoding can be detected by stale verifiers. 5 tests, 0 failed. `make lint` clean. Next step: replace the per-chip transcript-absorb loop in `multi_prove` Round 1 Phase A with a single MMCS build + absorb. That commit also drops `StarkProof.lde_trace_main_merkle_root` and changes the per-query opening format. --- prover/src/lib.rs | 1 + prover/src/mmcs_commit.rs | 257 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 258 insertions(+) create mode 100644 prover/src/mmcs_commit.rs diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 14f35cdf8..cfa9a52ef 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -17,6 +17,7 @@ pub mod constraints; mod debug_report; #[cfg(feature = "instruments")] pub mod instruments; +pub mod mmcs_commit; mod statement; pub mod tables; pub mod test_utils; diff --git a/prover/src/mmcs_commit.rs b/prover/src/mmcs_commit.rs new file mode 100644 index 000000000..f531e8e9f --- /dev/null +++ b/prover/src/mmcs_commit.rs @@ -0,0 +1,257 @@ +//! Helpers that bridge per-chip LDE columns to the unified MMCS over the +//! main trace (PR2 of the streaming-MMCS plan). +//! +//! This module is **not yet wired into `multi_prove`**. It exists so the +//! API + leaf-hash format can be reviewed and tested in isolation before +//! the hot-path change. The pattern PR2 will use: +//! +//! 1. For each chip-chunk: compute its tagged leaf-digest array via +//! [`compute_chip_leaves_with_tag`]. The chip's LDE columns can be +//! dropped immediately after. +//! 2. Once every chip has produced its leaves, call +//! [`build_main_trace_mmcs`] with the `(MatrixTag, leaves)` pairs to +//! get a single MMCS root + the prover-side tree for opens. +//! 3. Absorb that one root into the transcript instead of N per-chip roots. +//! 4. Per query: `mmcs.open(global_index)` returns one `MmcsOpening` +//! covering every chip at the appropriate shifted indices. +//! +//! The leaf-hash format is deliberately **distinct** from +//! `stark::prover::keccak_leaves_bit_reversed` — that one omits the +//! per-chip tag, which is why N independent trees today are safe (each +//! root inherently binds its content). With a single shared root the tag +//! must move into the leaf, and feeding the old bytes into the MMCS would +//! be a silent soundness bug. + +use crypto::merkle_tree::mmcs::{Mmcs, MmcsBuilder, MmcsError, MmcsOpening}; +use math::fft::bit_reversing::reverse_index; +use math::field::element::FieldElement; +use math::field::traits::IsField; +use math::traits::{AsBytes, ByteConversion}; +use sha3::{Digest, Keccak256}; +use stark::config::{BatchedMerkleTreeBackend, Commitment}; + +pub use crate::tables::mmcs_tags as tags; +pub use crypto::merkle_tree::mmcs::MatrixTag; + +/// Domain tag prepended to every main-trace MMCS leaf hash so that +/// (a) the bytes are clearly versioned against any future change and +/// (b) they cannot collide with leaves of a different MMCS (aux trace, +/// composition, ...). Bump the suffix on any encoding change. +const LEAF_DOMAIN_TAG: &[u8] = b"LAMBDAVM_MAIN_MMCS_LEAF_V1"; + +/// Compute the per-row leaf digests for a chip's main-trace LDE, +/// binding the chip's `MatrixTag` into every leaf so the MMCS can +/// authenticate (matrix, row) pairs uniquely. +/// +/// Each row is laid out bit-reversed (matching the existing FRI / Merkle +/// layout). The leaf is `Keccak256(LEAF_DOMAIN_TAG || tag.0 || row_bytes)` +/// where `row_bytes` is every column's element written big-endian and +/// concatenated. +/// +/// The input columns are read but never mutated; the caller can drop +/// them immediately after this returns — memory peak is one chip's LDE +/// at a time (same as today's per-chip Merkle build). +pub fn compute_chip_leaves_with_tag( + columns: &[Vec>], + tag: MatrixTag, +) -> Vec +where + E: IsField + Send + Sync, + FieldElement: AsBytes + Sync + Send + ByteConversion, +{ + if columns.is_empty() || columns[0].is_empty() { + return Vec::new(); + } + let num_rows = columns[0].len(); + let num_cols = columns.len(); + let byte_len = as ByteConversion>::BYTE_LEN; + debug_assert!( + num_rows.is_power_of_two(), + "num_rows must be a power of two for reverse_index" + ); + + let total_bytes = num_cols * byte_len; + + let hash_leaf = |buf: &mut [u8], row_idx: usize| -> Commitment { + let br_idx = reverse_index(row_idx, num_rows as u64); + for (col_idx, col) in columns.iter().enumerate() { + col[br_idx].write_bytes_be(&mut buf[col_idx * byte_len..(col_idx + 1) * byte_len]); + } + let mut h = Keccak256::new(); + h.update(LEAF_DOMAIN_TAG); + h.update(tag.0); + h.update(&buf[..]); + h.finalize().into() + }; + + #[cfg(feature = "parallel")] + { + use rayon::prelude::*; + (0..num_rows) + .into_par_iter() + .map_init( + || vec![0u8; total_bytes], + |buf, row_idx| hash_leaf(buf, row_idx), + ) + .collect() + } + #[cfg(not(feature = "parallel"))] + { + let mut buf = vec![0u8; total_bytes]; + (0..num_rows) + .map(|row_idx| hash_leaf(&mut buf, row_idx)) + .collect() + } +} + +/// Convenience: build the unified main-trace MMCS from `(tag, leaves)` +/// pairs that the caller produced via [`compute_chip_leaves_with_tag`]. +pub fn build_main_trace_mmcs( + entries: Vec<(MatrixTag, Vec)>, +) -> Result>, MmcsError> +where + F: IsField + Send + Sync, + FieldElement: AsBytes + Sync + Send, +{ + let mut builder = MmcsBuilder::>::new(); + for (tag, leaves) in entries { + builder.add_matrix(tag, leaves)?; + } + builder.finalize() +} + +/// Convenience opening accessor for tests / callers that don't want to +/// import `Mmcs` directly. +pub fn open_main_trace_mmcs( + mmcs: &Mmcs>, + global_index: usize, +) -> Result, MmcsError> +where + F: IsField + Send + Sync, + FieldElement: AsBytes + Sync + Send, +{ + mmcs.open(global_index) +} + +#[cfg(test)] +mod tests { + use super::*; + use math::field::goldilocks::GoldilocksField; + + type FE = FieldElement; + + fn fake_columns(seed: u64, num_cols: usize, num_rows: usize) -> Vec> { + (0..num_cols) + .map(|c| { + (0..num_rows) + .map(|r| FE::from((seed.wrapping_add(c as u64) * 31 + r as u64) % 1_000_003)) + .collect() + }) + .collect() + } + + #[test] + fn leaves_change_when_tag_changes() { + let cols = fake_columns(42, 4, 8); + let tag_a = tags::chip_tag(tags::CHIP_CPU, 0); + let tag_b = tags::chip_tag(tags::CHIP_CPU, 1); + let la = compute_chip_leaves_with_tag(&cols, tag_a); + let lb = compute_chip_leaves_with_tag(&cols, tag_b); + assert_eq!(la.len(), 8); + assert_eq!(la.len(), lb.len()); + assert_ne!(la[0], lb[0], "tag must be in the leaf"); + // Every row should differ; collision at one row would be extreme. + assert!(la.iter().zip(lb.iter()).any(|(a, b)| a != b)); + } + + #[test] + fn leaves_differ_from_legacy_format() { + // Sanity: our tagged leaves are NOT equal to a Keccak256 of just + // the row bytes (i.e. the legacy non-tagged format). Feeding old + // bytes into the MMCS would be a silent soundness bug. + let cols = fake_columns(1, 2, 4); + let tag = tags::chip_tag(tags::CHIP_BITWISE, 0); + let tagged = compute_chip_leaves_with_tag(&cols, tag); + let untagged: Commitment = { + let mut buf = [0u8; 2 * 8]; + let br = reverse_index(0, 4); + for (c, col) in cols.iter().enumerate() { + col[br].write_bytes_be(&mut buf[c * 8..(c + 1) * 8]); + } + let mut h = Keccak256::new(); + h.update(&buf[..]); + h.finalize().into() + }; + assert_ne!(tagged[0], untagged); + } + + #[test] + fn build_main_trace_mmcs_round_trips() { + // 3 chips at distinct heights — realistic small case. + let cols_a = fake_columns(1, 6, 16); + let cols_b = fake_columns(2, 4, 8); + let cols_c = fake_columns(3, 2, 4); + let tag_a = tags::chip_tag(tags::CHIP_CPU, 0); + let tag_b = tags::chip_tag(tags::CHIP_MEMW, 0); + let tag_c = tags::chip_tag(tags::CHIP_BITWISE, 0); + let leaves_a = compute_chip_leaves_with_tag(&cols_a, tag_a); + let leaves_b = compute_chip_leaves_with_tag(&cols_b, tag_b); + let leaves_c = compute_chip_leaves_with_tag(&cols_c, tag_c); + let entries = vec![(tag_a, leaves_a), (tag_b, leaves_b), (tag_c, leaves_c)]; + let mmcs = build_main_trace_mmcs::(entries).expect("build mmcs"); + let spec = mmcs.spec(); + // 16 is the max; open at every row in that domain. + for i in 0..16 { + let opening = mmcs.open(i).expect("open"); + assert!( + opening.verify::>(mmcs.root(), &spec), + "round-trip failed at index {i}" + ); + } + } + + #[test] + fn build_main_trace_mmcs_same_height_chunks() { + // 3 chips at the SAME height — exercises the same-height combine + // path with realistic lambda-vm-style data (CPU chunks). + let cols_0 = fake_columns(10, 8, 16); + let cols_1 = fake_columns(11, 8, 16); + let cols_2 = fake_columns(12, 8, 16); + let entries = vec![ + ( + tags::chip_tag(tags::CHIP_CPU, 0), + compute_chip_leaves_with_tag(&cols_0, tags::chip_tag(tags::CHIP_CPU, 0)), + ), + ( + tags::chip_tag(tags::CHIP_CPU, 1), + compute_chip_leaves_with_tag(&cols_1, tags::chip_tag(tags::CHIP_CPU, 1)), + ), + ( + tags::chip_tag(tags::CHIP_CPU, 2), + compute_chip_leaves_with_tag(&cols_2, tags::chip_tag(tags::CHIP_CPU, 2)), + ), + ]; + let mmcs = build_main_trace_mmcs::(entries).expect("build mmcs"); + let spec = mmcs.spec(); + for i in 0..16 { + let opening = mmcs.open(i).expect("open"); + assert!( + opening.verify::>(mmcs.root(), &spec) + ); + } + } + + #[test] + fn duplicate_tag_caught_at_build() { + // Two chips sharing a tag is a caller bug (e.g. forgot to bump + // chunk_index). MMCS rejects at finalize time. + let cols = fake_columns(7, 2, 4); + let tag = tags::chip_tag(tags::CHIP_CPU, 0); + let entries = vec![ + (tag, compute_chip_leaves_with_tag(&cols, tag)), + (tag, compute_chip_leaves_with_tag(&cols, tag)), + ]; + let err = build_main_trace_mmcs::(entries); + assert!(matches!(err, Err(MmcsError::DuplicateTag))); + } +} From 5e784da412a49c5c572d6da1a81be56dc831ba33 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 11:50:31 -0300 Subject: [PATCH 07/21] feat(stark/mmcs): foundation types + leaf hash module + per-AIR tags Foundation for the upcoming MMCS Phase C wire-up. No hot-path changes; all new types are defined but unused. Lands the surfaces both the prover-side wire-up commit and the verifier-side replay commit will consume next. - crypto/mmcs: add serde derives on MatrixTag and MmcsOpening so they can live in the proof format. - stark/mmcs_leaf: single source of truth for the main-trace MMCS leaf hash format (LEAF_DOMAIN_TAG + hash_tagged_row_bytes + hash_tagged_row). Prover and verifier will both call this; the prover-side compute_chip_leaves_with_tag adapter calls it row-wise. - stark/proof/stark: MainTraceOpening struct (evaluations + evaluations_sym + MmcsOpening per row, paired with its symmetric counterpart). Not yet wired into DeepPolynomialOpening. - stark/prover: MainCommit struct (shared Arc + per-table MatrixTag + optional precomputed tree). Unused at the wire-up level but defined here as the keystone type. Marked allow(dead_code). - prover/lib: VmAirs::air_tags() returns the parallel Vec in air_trace_pairs / air_refs order. Prover and verifier must call this on identical VmAirs configurations. --- crypto/crypto/src/merkle_tree/mmcs.rs | 6 ++ crypto/stark/src/lib.rs | 1 + crypto/stark/src/mmcs_leaf.rs | 84 +++++++++++++++++++++++++++ crypto/stark/src/proof/stark.rs | 17 ++++++ crypto/stark/src/prover.rs | 52 ++++++++++++++++- prover/src/lib.rs | 61 +++++++++++++++++++ 6 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 crypto/stark/src/mmcs_leaf.rs diff --git a/crypto/crypto/src/merkle_tree/mmcs.rs b/crypto/crypto/src/merkle_tree/mmcs.rs index b775bf17a..8bbd8607f 100644 --- a/crypto/crypto/src/merkle_tree/mmcs.rs +++ b/crypto/crypto/src/merkle_tree/mmcs.rs @@ -31,6 +31,7 @@ use super::traits::IsMerkleTreeBackend; /// Per-matrix domain separator. Caller-defined; verifier reconstructs /// from chip spec. #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct MatrixTag(pub [u8; 8]); impl MatrixTag { @@ -269,6 +270,11 @@ impl Mmcs { } #[derive(Debug, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr( + feature = "serde", + serde(bound = "N: serde::Serialize + serde::de::DeserializeOwned") +)] pub struct MmcsOpening { /// `(tag, leaf_at_shifted_index)` per matrix, in the builder's sort /// order (height desc, tag asc). diff --git a/crypto/stark/src/lib.rs b/crypto/stark/src/lib.rs index 7379594b4..dce36aece 100644 --- a/crypto/stark/src/lib.rs +++ b/crypto/stark/src/lib.rs @@ -17,6 +17,7 @@ pub mod grinding; #[cfg(feature = "instruments")] pub mod instruments; pub mod lookup; +pub mod mmcs_leaf; pub(crate) mod par; pub mod proof; pub mod prover; diff --git a/crypto/stark/src/mmcs_leaf.rs b/crypto/stark/src/mmcs_leaf.rs new file mode 100644 index 000000000..4c2c84b67 --- /dev/null +++ b/crypto/stark/src/mmcs_leaf.rs @@ -0,0 +1,84 @@ +//! Single source of truth for the main-trace MMCS leaf hash format. +//! +//! Both the prover (when computing per-row leaves before MMCS build) and +//! the verifier (when re-hashing a per-row opening to compare against +//! `MmcsOpening::matrix_leaves`) must produce byte-identical digests for +//! the same `(MatrixTag, row_bytes)` pair. Centralising the format here +//! removes the risk of prover/verifier divergence. +//! +//! Leaf bytes layout: +//! +//! ```text +//! Keccak256( LEAF_DOMAIN_TAG || tag.0 (8 bytes) || row_bytes_be ) +//! ``` +//! +//! where `row_bytes_be` is every committed column's element written +//! big-endian, in column order. For preprocessed tables the precomputed +//! slice is NOT included here (those columns live in a separate +//! per-table Merkle tree). +//! +//! Bump `LEAF_DOMAIN_TAG` on any wire-incompatible change. + +use crypto::merkle_tree::mmcs::MatrixTag; +use math::field::element::FieldElement; +use math::field::traits::IsField; +use math::traits::ByteConversion; +use sha3::{Digest, Keccak256}; + +use crate::config::Commitment; + +/// Versioned domain separator for main-trace MMCS leaves. Bump suffix on +/// any encoding change so old proofs cannot be silently re-interpreted. +pub const LEAF_DOMAIN_TAG: &[u8] = b"LAMBDAVM_MAIN_MMCS_LEAF_V1"; + +/// Hash one row's worth of column bytes into a leaf digest using the +/// canonical tagged format. `row_bytes_be` is the concatenation of every +/// committed column's element written big-endian, in column order. +#[inline] +pub fn hash_tagged_row_bytes(tag: MatrixTag, row_bytes_be: &[u8]) -> Commitment { + let mut h = Keccak256::new(); + h.update(LEAF_DOMAIN_TAG); + h.update(tag.0); + h.update(row_bytes_be); + h.finalize().into() +} + +/// Convenience: hash a row from individual field elements. Allocates a +/// stack-or-heap buffer for the row, suitable for verifier-side per-query +/// re-hashing (where allocation cost is dominated by FRI work anyway). +pub fn hash_tagged_row(tag: MatrixTag, row: &[FieldElement]) -> Commitment +where + E: IsField, + FieldElement: ByteConversion, +{ + let byte_len = as ByteConversion>::BYTE_LEN; + let mut buf = vec![0u8; row.len() * byte_len]; + for (col_idx, fe) in row.iter().enumerate() { + fe.write_bytes_be(&mut buf[col_idx * byte_len..(col_idx + 1) * byte_len]); + } + hash_tagged_row_bytes(tag, &buf) +} + +#[cfg(test)] +mod tests { + use super::*; + use math::field::goldilocks::GoldilocksField; + + type FE = FieldElement; + + #[test] + fn tag_changes_digest() { + let row = vec![FE::from(1u64), FE::from(2u64), FE::from(3u64)]; + let a = hash_tagged_row(MatrixTag::new([0; 8]), &row); + let b = hash_tagged_row(MatrixTag::new([1, 0, 0, 0, 0, 0, 0, 0]), &row); + assert_ne!(a, b); + } + + #[test] + fn row_change_changes_digest() { + let tag = MatrixTag::new([7; 8]); + let row_a = vec![FE::from(1u64), FE::from(2u64)]; + let row_b = vec![FE::from(1u64), FE::from(3u64)]; + assert_ne!(hash_tagged_row(tag, &row_a), hash_tagged_row(tag, &row_b)); + } +} diff --git a/crypto/stark/src/proof/stark.rs b/crypto/stark/src/proof/stark.rs index 1751d60fe..ec11acd3b 100644 --- a/crypto/stark/src/proof/stark.rs +++ b/crypto/stark/src/proof/stark.rs @@ -1,3 +1,4 @@ +use crypto::merkle_tree::mmcs::MmcsOpening; use crypto::merkle_tree::proof::Proof; use math::field::{ element::FieldElement, @@ -17,6 +18,22 @@ pub struct PolynomialOpenings { pub evaluations_sym: Vec>, } +/// Per-query main-trace opening backed by the shared MMCS. +/// +/// The (iota, iota_sym) pair are consecutive global indices in the LDE. +/// Each carries its own `MmcsOpening` because they live at different +/// positions in the layer-0 array — there is no shared sibling sub-path +/// between them at the leaf level (only at higher tree levels, which the +/// MMCS opening encodes). +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[serde(bound = "")] +pub struct MainTraceOpening { + pub evaluations: Vec>, + pub evaluations_sym: Vec>, + pub mmcs_opening: MmcsOpening, + pub mmcs_opening_sym: MmcsOpening, +} + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] #[serde(bound = "")] pub struct DeepPolynomialOpening, E: IsField> { diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 53af372ec..5c35281e5 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -27,11 +27,15 @@ use rayon::prelude::{ use crate::debug::validate_trace; use crate::fri; use crate::lookup::LOGUP_NUM_CHALLENGES; +#[allow(unused_imports)] +use crate::mmcs_leaf::hash_tagged_row_bytes; use crate::proof::stark::{DeepPolynomialOpenings, PolynomialOpenings}; #[cfg(feature = "disk-spill")] use crate::storage_mode::StorageMode; use crate::table::Table; use crate::trace::LDETraceTable; +#[allow(unused_imports)] +use crypto::merkle_tree::mmcs::{MatrixTag, Mmcs, MmcsBuilder, MmcsError}; use super::config::{BatchedMerkleTree, BatchedMerkleTreeBackend, Commitment}; use super::constraints::evaluator::ConstraintEvaluator; @@ -149,6 +153,52 @@ where } } +/// Per-table commitment artifacts for the main trace under the shared +/// MMCS protocol. The `mmcs` Arc is the SAME instance for every table in +/// the multi-proof — Phase A builds it once. +/// +/// Currently unused at the wire-up level; defined here as the keystone +/// type for the upcoming MMCS Phase C wire-up (see +/// `docs/mmcs-streaming-c1-spec.md`). Marked `allow(dead_code)` until the +/// follow-up commit consumes it. +#[allow(dead_code)] +pub(crate) struct MainCommit +where + FieldElement: AsBytes, +{ + /// Shared MMCS across all tables in the multi-proof. + pub(crate) mmcs: Arc>>, + /// This table's MatrixTag within the MMCS. + pub(crate) tag: MatrixTag, + /// Preprocessed tables only: separate Merkle tree over precomputed columns. + pub(crate) precomputed_tree: Option>>, + /// Preprocessed tables only: root of `precomputed_tree`. + pub(crate) precomputed_root: Option, + /// Preprocessed tables only: number of precomputed columns. Zero otherwise. + pub(crate) num_precomputed_cols: usize, +} + +#[allow(dead_code)] +impl MainCommit +where + FieldElement: AsBytes, +{ + fn is_preprocessed(&self) -> bool { + self.precomputed_tree.is_some() + } + + /// Cheap clone. Only bumps Arc refcounts. + fn share(&self) -> Self { + Self { + mmcs: Arc::clone(&self.mmcs), + tag: self.tag, + precomputed_tree: self.precomputed_tree.as_ref().map(Arc::clone), + precomputed_root: self.precomputed_root, + num_precomputed_cols: self.num_precomputed_cols, + } + } +} + /// A container for the results of the first round of the STARK Prove protocol. pub(crate) struct Round1 where @@ -201,7 +251,7 @@ where FieldElement: AsBytes, { /// Build a `Round1` by consuming a `Lde` and borrowing commitment data. - /// The `TableCommit::share` calls are cheap — only bump Arc refcounts. + /// The `share` calls are cheap — only bump Arc refcounts. fn build_round1( &self, lde: Lde, diff --git a/prover/src/lib.rs b/prover/src/lib.rs index cfa9a52ef..1632ac642 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -273,6 +273,67 @@ impl VmAirs { pairs } + /// Build the parallel `Vec` for the main-trace MMCS, in the + /// exact same order as [`Self::air_trace_pairs`] and [`Self::air_refs`]. + /// Prover and verifier MUST call this on identical `VmAirs` configurations. + /// + /// Currently unused at the call sites; defined here as the foundation + /// for the upcoming MMCS Phase C wire-up (see + /// `docs/mmcs-streaming-c1-spec.md`). + #[allow(dead_code)] + pub fn air_tags(&self) -> Vec { + use crate::tables::mmcs_tags::{ + CHIP_BITWISE, CHIP_BRANCH, CHIP_COMMIT, CHIP_CPU, CHIP_DECODE, CHIP_DVRM, CHIP_HALT, + CHIP_KECCAK, CHIP_KECCAK_RC, CHIP_KECCAK_RND, CHIP_LOAD, CHIP_LT, CHIP_MEMW, + CHIP_MEMW_ALIGNED, CHIP_MEMW_REGISTER, CHIP_MUL, CHIP_PAGE, CHIP_REGISTER, CHIP_SHIFT, + chip_tag, + }; + let mut tags = vec![ + chip_tag(CHIP_BITWISE, 0), + chip_tag(CHIP_DECODE, 0), + chip_tag(CHIP_HALT, 0), + chip_tag(CHIP_COMMIT, 0), + chip_tag(CHIP_KECCAK, 0), + chip_tag(CHIP_KECCAK_RND, 0), + chip_tag(CHIP_KECCAK_RC, 0), + chip_tag(CHIP_REGISTER, 0), + ]; + for i in 0..self.cpus.len() { + tags.push(chip_tag(CHIP_CPU, i as u32)); + } + for i in 0..self.lts.len() { + tags.push(chip_tag(CHIP_LT, i as u32)); + } + for i in 0..self.shifts.len() { + tags.push(chip_tag(CHIP_SHIFT, i as u32)); + } + for i in 0..self.memws.len() { + tags.push(chip_tag(CHIP_MEMW, i as u32)); + } + for i in 0..self.memw_aligneds.len() { + tags.push(chip_tag(CHIP_MEMW_ALIGNED, i as u32)); + } + for i in 0..self.loads.len() { + tags.push(chip_tag(CHIP_LOAD, i as u32)); + } + for i in 0..self.muls.len() { + tags.push(chip_tag(CHIP_MUL, i as u32)); + } + for i in 0..self.dvrms.len() { + tags.push(chip_tag(CHIP_DVRM, i as u32)); + } + for i in 0..self.branches.len() { + tags.push(chip_tag(CHIP_BRANCH, i as u32)); + } + for i in 0..self.pages.len() { + tags.push(chip_tag(CHIP_PAGE, i as u32)); + } + for i in 0..self.memw_registers.len() { + tags.push(chip_tag(CHIP_MEMW_REGISTER, i as u32)); + } + tags + } + /// Collect AIR references for [`Verifier::multi_verify`]. pub fn air_refs(&self) -> Vec<&dyn AIR> { let mut refs: Vec<&dyn AIR> = vec![ From 00cc9d0ed1ec9a041981d41fdb3c03cb7be956a7 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 12:20:51 -0300 Subject: [PATCH 08/21] feat(stark): thread main_tags through multi_prove / multi_verify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the parallel Vec through both prover and verifier signatures. The value is not consumed yet (let _ = main_tags), only validated against AIR count. Lays the surface for the upcoming Phase A MMCS wire-up without changing any cryptographic behaviour. - multi_prove(air_trace_pairs, main_tags, transcript, ...) - multi_verify(airs, multi_proof, main_tags, transcript, balance) - Both reject a mismatch between `main_tags.len()` and AIR count. - Single-AIR `prove` / `verify` synthesize a default tag. - stark::test_utils::multi_verify_ram helper hides the synthetic tag generation from stark internal tests; bulk-migrated all 37 call sites to use it. - New `mmcs_leaf::synth_main_tags(n)` + `synth_main_tags_for(slice)` helpers for lambda-vm tests that build ad-hoc AIR slices. - prover/src/lib.rs `prove` / `verify` wire `airs.air_tags()` to the shared MMCS-bound transcript. - lambda-vm tests bulk-updated to pass synthesized tags (values are ignored under the current `let _ = main_tags` no-op). Tests: stark 130/130 green; lambda-vm-prover non-ELF tests green (prove_elfs failures pre-date this change — they need ELF binaries not present in the working tree). --- crypto/stark/src/mmcs_leaf.rs | 16 +++++++ crypto/stark/src/prover.rs | 16 +++++++ crypto/stark/src/test_utils.rs | 30 +++++++++++++ crypto/stark/src/tests/air_tests.rs | 6 +-- .../src/tests/bus_tests/completeness_tests.rs | 13 +++--- .../src/tests/bus_tests/multiplicity_tests.rs | 7 ++- .../src/tests/bus_tests/soundness_tests.rs | 44 +++++++++---------- .../src/tests/prove_verify_roundtrip_tests.rs | 3 +- crypto/stark/src/tests/prover_tests.rs | 5 +-- crypto/stark/src/verifier.rs | 20 ++++++++- prover/src/lib.rs | 4 ++ prover/src/test_utils.rs | 2 + prover/src/tests/bitwise_bus_tests.rs | 2 + prover/src/tests/bitwise_tests.rs | 3 ++ prover/src/tests/branch_bus_tests.rs | 2 + prover/src/tests/decode_tests.rs | 2 + prover/src/tests/lt_bus_tests.rs | 2 + prover/src/tests/prove_elfs_tests.rs | 7 +++ 18 files changed, 142 insertions(+), 42 deletions(-) diff --git a/crypto/stark/src/mmcs_leaf.rs b/crypto/stark/src/mmcs_leaf.rs index 4c2c84b67..488a937af 100644 --- a/crypto/stark/src/mmcs_leaf.rs +++ b/crypto/stark/src/mmcs_leaf.rs @@ -31,6 +31,22 @@ use crate::config::Commitment; /// any encoding change so old proofs cannot be silently re-interpreted. pub const LEAF_DOMAIN_TAG: &[u8] = b"LAMBDAVM_MAIN_MMCS_LEAF_V1"; +/// Synthesize `n` distinct [`MatrixTag`]s derived from positional index. +/// Useful for generic stark tests where the caller does not own a stable +/// chip-type assignment. Production code in lambda-vm uses +/// `VmAirs::air_tags()` instead, which encodes chip type + chunk index. +pub fn synth_main_tags(n: usize) -> Vec { + (0..n) + .map(|i| MatrixTag::new((i as u64).to_le_bytes())) + .collect() +} + +/// Convenience: synthesize `MatrixTag`s sized to a slice. Equivalent to +/// `synth_main_tags(slice.len())`. +pub fn synth_main_tags_for(slice: &[T]) -> Vec { + synth_main_tags(slice.len()) +} + /// Hash one row's worth of column bytes into a leaf digest using the /// canonical tagged format. `row_bytes_be` is the concatenation of every /// committed column's element written big-endian, in column order. diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 5c35281e5..f71fc4343 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -1495,6 +1495,7 @@ pub trait IsStarkProver< /// The transcript must be safely initialized before passing it to this method. fn multi_prove( mut air_trace_pairs: Vec>, + main_tags: &[MatrixTag], transcript: &mut (impl IsStarkTranscript + Clone + Send), #[cfg(feature = "disk-spill")] storage_mode: StorageMode, ) -> Result, ProvingError> @@ -1516,6 +1517,16 @@ pub trait IsStarkProver< let num_airs = air_trace_pairs.len(); + if main_tags.len() != num_airs { + return Err(ProvingError::WrongParameter(format!( + "main_tags len ({}) does not match number of AIRs ({})", + main_tags.len(), + num_airs + ))); + } + // `main_tags` is reserved for the upcoming MMCS wire-up; not consumed yet. + let _ = main_tags; + // Check if any AIR has an auxiliary trace let needs_lookup_challenges = air_trace_pairs .iter() @@ -1972,8 +1983,13 @@ pub trait IsStarkProver< ::BaseType: SpillSafe, { let air_trace_pairs = vec![(air, trace, pub_inputs)]; + // Single-AIR path: synthesize a default tag. Callers that want + // multi-table soundness should call `multi_prove` directly with + // distinct tags. + let main_tags = [MatrixTag::new([0; 8])]; Self::multi_prove( air_trace_pairs, + &main_tags, transcript, #[cfg(feature = "disk-spill")] StorageMode::Ram, diff --git a/crypto/stark/src/test_utils.rs b/crypto/stark/src/test_utils.rs index f5cd19f80..383d0c3d3 100644 --- a/crypto/stark/src/test_utils.rs +++ b/crypto/stark/src/test_utils.rs @@ -5,6 +5,7 @@ use crate::prover::{IsStarkProver, Prover, ProvingError}; use crate::trace::TraceTable; use crate::traits::AIR; use crypto::fiat_shamir::is_transcript::IsStarkTranscript; +// MatrixTag is re-exported via `synth_main_tags`; no direct use here. use math::field::element::FieldElement; use math::field::traits::{IsFFTField, IsField, IsSubFieldOf}; use math::spill_safe::SpillSafe; @@ -16,6 +17,33 @@ type AirTracePair<'a, Field, FieldExtension, PI> = ( &'a PI, ); +pub use crate::mmcs_leaf::synth_main_tags; + +pub fn multi_verify_ram( + airs: &[&dyn AIR], + multi_proof: &MultiProof, + transcript: &mut (impl IsStarkTranscript + Clone), + expected_bus_balance: &FieldElement, +) -> bool +where + Field: IsSubFieldOf + IsFFTField + Send + Sync + Copy + 'static, + FieldExtension: IsField + Send + Sync + Copy + 'static, + FieldElement: AsBytes + ByteConversion + Sync + Send, + FieldElement: AsBytes + ByteConversion + Sync + Send, + ::BaseType: SpillSafe, + ::BaseType: SpillSafe, +{ + use crate::verifier::{IsStarkVerifier, Verifier}; + let main_tags = synth_main_tags(airs.len()); + Verifier::::multi_verify( + airs, + multi_proof, + &main_tags, + transcript, + expected_bus_balance, + ) +} + pub fn multi_prove_ram( air_trace_pairs: Vec>, transcript: &mut (impl IsStarkTranscript + Clone + Send), @@ -29,8 +57,10 @@ where ::BaseType: SpillSafe, ::BaseType: SpillSafe, { + let main_tags = synth_main_tags(air_trace_pairs.len()); Prover::::multi_prove( air_trace_pairs, + &main_tags, transcript, #[cfg(feature = "disk-spill")] crate::storage_mode::StorageMode::Ram, diff --git a/crypto/stark/src/tests/air_tests.rs b/crypto/stark/src/tests/air_tests.rs index 8e20f303e..5084c7462 100644 --- a/crypto/stark/src/tests/air_tests.rs +++ b/crypto/stark/src/tests/air_tests.rs @@ -411,7 +411,7 @@ fn test_multi_prove_fib_3_tables() { >, > = vec![&air_1, &air_2, &air_3]; - assert!(Verifier::multi_verify( + assert!(crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -515,7 +515,7 @@ fn test_multi_prove_2_tables_small_field() { >, > = vec![&air_1, &air_2]; - assert!(Verifier::multi_verify( + assert!(crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -545,7 +545,7 @@ fn test_multi_prove_different_airs() { &dyn AIR, > = vec![&air_1, &air_2]; - assert!(Verifier::multi_verify( + assert!(crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), diff --git a/crypto/stark/src/tests/bus_tests/completeness_tests.rs b/crypto/stark/src/tests/bus_tests/completeness_tests.rs index 83f8ac391..cdef1d1e3 100644 --- a/crypto/stark/src/tests/bus_tests/completeness_tests.rs +++ b/crypto/stark/src/tests/bus_tests/completeness_tests.rs @@ -19,7 +19,6 @@ use crate::proof::options::ProofOptions; use crate::test_utils::multi_prove_ram; use crate::trace::TraceTable; use crate::traits::AIR; -use crate::verifier::{IsStarkVerifier, Verifier}; type F = GoldilocksField; type E = Degree3GoldilocksExtensionField; @@ -127,7 +126,7 @@ fn test_multi_table_proof() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(Verifier::multi_verify( + assert!(crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -190,7 +189,7 @@ fn test_all_padding() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(Verifier::multi_verify( + assert!(crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -253,7 +252,7 @@ fn test_single_operation() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(Verifier::multi_verify( + assert!(crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -316,7 +315,7 @@ fn test_duplicate_operations() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(Verifier::multi_verify( + assert!(crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -384,7 +383,7 @@ fn test_serialization_roundtrip() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(Verifier::multi_verify( + assert!(crate::test_utils::multi_verify_ram( &airs, &deserialized, &mut DefaultTranscript::::new(&[]), @@ -524,7 +523,7 @@ fn test_bus_value_features() { let airs: Vec<&dyn AIR> = vec![&sender_air, &receiver_air]; - assert!(Verifier::multi_verify( + assert!(crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), diff --git a/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs b/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs index 7e4d632dd..7bbcbf239 100644 --- a/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs +++ b/crypto/stark/src/tests/bus_tests/multiplicity_tests.rs @@ -18,7 +18,6 @@ use crate::proof::options::ProofOptions; use crate::test_utils::multi_prove_ram; use crate::trace::TraceTable; use crate::traits::AIR; -use crate::verifier::{IsStarkVerifier, Verifier}; type F = GoldilocksField; type E = Degree3GoldilocksExtensionField; @@ -119,7 +118,7 @@ fn test_multiplicity_one() { vec![&sender, &receiver]; assert!( - Verifier::multi_verify( + crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -229,7 +228,7 @@ fn test_multiplicity_sum() { vec![&sender, &receiver]; assert!( - Verifier::multi_verify( + crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -337,7 +336,7 @@ fn test_multiplicity_negated() { vec![&sender, &receiver]; assert!( - Verifier::multi_verify( + crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), diff --git a/crypto/stark/src/tests/bus_tests/soundness_tests.rs b/crypto/stark/src/tests/bus_tests/soundness_tests.rs index fc718bf7c..d2af70678 100644 --- a/crypto/stark/src/tests/bus_tests/soundness_tests.rs +++ b/crypto/stark/src/tests/bus_tests/soundness_tests.rs @@ -85,7 +85,7 @@ fn test_wrong_result_value() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -148,7 +148,7 @@ fn test_off_by_one() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -211,7 +211,7 @@ fn test_swapped_operands() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -274,7 +274,7 @@ fn test_single_column_wrong() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -341,7 +341,7 @@ fn test_over_report_multiplicity() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -404,7 +404,7 @@ fn test_under_report_multiplicity() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -467,7 +467,7 @@ fn test_zero_multiplicity_skip() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -534,7 +534,7 @@ fn test_phantom_receive() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -597,7 +597,7 @@ fn test_missing_receiver() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -680,7 +680,7 @@ fn test_tampered_table_contribution() { vec![&cpu_air, &add_air, &mul_air]; assert!( - !Verifier::multi_verify( + !crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -761,7 +761,7 @@ fn test_tampered_acc_ood_evaluation() { vec![&cpu_air, &add_air, &mul_air]; assert!( - !Verifier::multi_verify( + !crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -837,7 +837,7 @@ fn test_missing_bus_public_inputs_rejected() { vec![&cpu_air, &add_air, &mul_air]; assert!( - !Verifier::multi_verify( + !crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -963,7 +963,7 @@ fn test_zeroed_table_contribution_rejected() { vec![&cpu_air, &add_air, &mul_air]; assert!( - !Verifier::multi_verify( + !crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -1032,7 +1032,7 @@ fn test_one_of_many_wrong() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -1140,7 +1140,7 @@ fn test_full_scenario_wrong_add() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -1217,7 +1217,7 @@ fn test_wrong_table_consumes_value_rejected() { // Verification MUST fail: MUL table cannot consume values sent to ADD bus // because bus_id is included in the fingerprint assert!( - !Verifier::multi_verify( + !crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -1334,7 +1334,7 @@ fn test_packing_mismatch_direct_vs_word2l() { // Sender: z - (100 + 200*α) // Receiver: z - (100 + 200*2^16) = z - (100 + 13107200) assert!( - !Verifier::multi_verify( + !crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -1439,7 +1439,7 @@ fn test_packing_mismatch_element_count() { // Receiver: z - ((10 + 20*65536) + 30*α) = z - (1310730 + 30*α) [2 bus elements] // Different fingerprints! assert!( - !Verifier::multi_verify( + !crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -1538,7 +1538,7 @@ fn test_packing_mismatch_shift_constant() { vec![&sender, &receiver]; assert!( - !Verifier::multi_verify( + !crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -1641,7 +1641,7 @@ fn test_compound_mismatch_dwordhhw_vs_dwordwhh() { vec![&sender, &receiver]; assert!( - !Verifier::multi_verify( + !crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -1735,7 +1735,7 @@ fn test_compound_equals_primitive_expansion() { // This should PASS - compound and primitive expansion are equivalent assert!( - Verifier::multi_verify( + crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -1849,7 +1849,7 @@ fn test_full_scenario_wrong_mul() { let airs: Vec<&dyn AIR> = vec![&cpu_air, &add_air, &mul_air]; - assert!(!Verifier::multi_verify( + assert!(!crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), diff --git a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs index 4059ed481..b58df3975 100644 --- a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs +++ b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs @@ -18,7 +18,6 @@ use crate::proof::options::ProofOptions; use crate::proof::stark::MultiProof; use crate::test_utils::multi_prove_ram; use crate::traits::AIR; -use crate::verifier::{IsStarkVerifier, Verifier}; type F = GoldilocksField; type E = Degree3GoldilocksExtensionField; @@ -168,7 +167,7 @@ fn test_verify_serialized_multi_table_proofs() { vec![&cpu_air, &add_air, &mul_air]; assert!( - Verifier::multi_verify( + crate::test_utils::multi_verify_ram( &airs, &received_proofs, &mut DefaultTranscript::::new(&[]), diff --git a/crypto/stark/src/tests/prover_tests.rs b/crypto/stark/src/tests/prover_tests.rs index c645eebb2..640608ae8 100644 --- a/crypto/stark/src/tests/prover_tests.rs +++ b/crypto/stark/src/tests/prover_tests.rs @@ -12,7 +12,6 @@ use crate::{ tests::domain_cache_stats, trace::{LDETraceTable, get_trace_evaluations, get_trace_evaluations_from_lde}, traits::AIR, - verifier::{IsStarkVerifier, Verifier}, }; use math::{ field::{element::FieldElement, goldilocks::GoldilocksField, traits::IsFFTField}, @@ -304,7 +303,7 @@ fn test_multi_prove_mixed_coset_offsets() { > = vec![&air_1, &air_2]; assert!( - Verifier::multi_verify( + crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), @@ -380,7 +379,7 @@ fn test_multi_prove_dedups_shared_domain_params() { > = vec![&air_1, &air_2, &air_3]; assert!( - Verifier::multi_verify( + crate::test_utils::multi_verify_ram( &airs, &multi_proof, &mut DefaultTranscript::::new(&[]), diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs index 8091c8b32..7b86cd4b4 100644 --- a/crypto/stark/src/verifier.rs +++ b/crypto/stark/src/verifier.rs @@ -716,6 +716,7 @@ pub trait IsStarkVerifier< fn multi_verify( airs: &[&dyn AIR], multi_proof: &MultiProof, + main_tags: &[crypto::merkle_tree::mmcs::MatrixTag], transcript: &mut (impl IsStarkTranscript + Clone), expected_bus_balance: &FieldElement, ) -> bool @@ -731,6 +732,16 @@ pub trait IsStarkVerifier< ); return false; } + if main_tags.len() != airs.len() { + error!( + "main_tags count ({}) does not match AIR count ({})", + main_tags.len(), + airs.len() + ); + return false; + } + // `main_tags` is reserved for the upcoming MMCS verifier replay. + let _ = main_tags; // Check if any AIR has an auxiliary trace let needs_lookup_challenges = airs.iter().any(|air| air.has_aux_trace()); @@ -903,7 +914,14 @@ pub trait IsStarkVerifier< let multi_proof = MultiProof { proofs: vec![proof.clone()], }; - Self::multi_verify(&[air], &multi_proof, transcript, &FieldElement::zero()) + let main_tags = [crypto::merkle_tree::mmcs::MatrixTag::new([0; 8])]; + Self::multi_verify( + &[air], + &multi_proof, + &main_tags, + transcript, + &FieldElement::zero(), + ) } /// Replays rounds 2, 3 and 4 of the protocol for a given proof, assuming round 1 has diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 1632ac642..209d45853 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -736,8 +736,10 @@ pub fn prove_with_options_and_inputs( ); // Phase 4: Prove (multi_prove) + let main_tags = airs.air_tags(); let proof = Prover::multi_prove( airs.air_trace_pairs(&mut traces), + &main_tags, &mut transcript, #[cfg(feature = "disk-spill")] storage_mode, @@ -870,9 +872,11 @@ pub fn verify_with_options( None => return Ok(false), }; + let main_tags = airs.air_tags(); Ok(Verifier::multi_verify( &air_refs, &vm_proof.proof, + &main_tags, &mut transcript, &expected_bus_balance, )) diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 1b608034c..db62577dc 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -100,8 +100,10 @@ pub fn multi_prove_ram( where PI: Send + Sync + Clone, { + let main_tags = stark::mmcs_leaf::synth_main_tags(air_trace_pairs.len()); Prover::::multi_prove( air_trace_pairs, + &main_tags, transcript, #[cfg(feature = "disk-spill")] StorageMode::Ram, diff --git a/prover/src/tests/bitwise_bus_tests.rs b/prover/src/tests/bitwise_bus_tests.rs index 2a5fd31dd..2dca2d670 100644 --- a/prover/src/tests/bitwise_bus_tests.rs +++ b/prover/src/tests/bitwise_bus_tests.rs @@ -205,6 +205,7 @@ fn prove_and_verify(sender_lookups: &[(u8, u8, u8)]) -> bool { Verifier::multi_verify( &airs, &multi_proof, + &stark::mmcs_leaf::synth_main_tags_for(&airs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ) @@ -315,6 +316,7 @@ fn prove_and_verify_custom( Verifier::multi_verify( &airs, &multi_proof, + &stark::mmcs_leaf::synth_main_tags_for(&airs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ) diff --git a/prover/src/tests/bitwise_tests.rs b/prover/src/tests/bitwise_tests.rs index 8337f8bf7..fc9907ed4 100644 --- a/prover/src/tests/bitwise_tests.rs +++ b/prover/src/tests/bitwise_tests.rs @@ -599,6 +599,7 @@ mod soundness_tests { let result = Verifier::multi_verify( &airs, &multi_proof, + &stark::mmcs_leaf::synth_main_tags_for(&airs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ); @@ -647,6 +648,7 @@ mod soundness_tests { let result = Verifier::multi_verify( &airs, &multi_proof, + &stark::mmcs_leaf::synth_main_tags_for(&airs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ); @@ -718,6 +720,7 @@ mod soundness_tests { let result = Verifier::multi_verify( &verifier_airs, &multi_proof, + &stark::mmcs_leaf::synth_main_tags_for(&verifier_airs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ); diff --git a/prover/src/tests/branch_bus_tests.rs b/prover/src/tests/branch_bus_tests.rs index c19a580ad..d234a585d 100644 --- a/prover/src/tests/branch_bus_tests.rs +++ b/prover/src/tests/branch_bus_tests.rs @@ -348,6 +348,7 @@ fn prove_and_verify(ops: &[BranchOperation]) -> bool { Verifier::multi_verify( &airs, &multi_proof, + &stark::mmcs_leaf::synth_main_tags_for(&airs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ) @@ -438,6 +439,7 @@ fn prove_and_verify_custom(ops: &[BranchOperation], receiver_rows: &[CustomBranc Verifier::multi_verify( &airs, &multi_proof, + &stark::mmcs_leaf::synth_main_tags_for(&airs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ) diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs index c6a436c95..fd11d6392 100644 --- a/prover/src/tests/decode_tests.rs +++ b/prover/src/tests/decode_tests.rs @@ -981,6 +981,7 @@ fn test_decode_soundness_different_elf_rejected() { let result = Verifier::multi_verify( &verifier_airs, &proof, + &stark::mmcs_leaf::synth_main_tags_for(&verifier_airs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ); @@ -1076,6 +1077,7 @@ fn test_decode_soundness_same_elf_accepted() { let result = Verifier::multi_verify( &verifier_air_refs, &proof, + &stark::mmcs_leaf::synth_main_tags_for(&verifier_air_refs), &mut DefaultTranscript::::new(&[]), &expected_bus_balance, ); diff --git a/prover/src/tests/lt_bus_tests.rs b/prover/src/tests/lt_bus_tests.rs index dcc555780..36011c831 100644 --- a/prover/src/tests/lt_bus_tests.rs +++ b/prover/src/tests/lt_bus_tests.rs @@ -301,6 +301,7 @@ fn prove_and_verify(ops: &[LtOperation]) -> bool { Verifier::multi_verify( &airs, &multi_proof, + &stark::mmcs_leaf::synth_main_tags_for(&airs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ) @@ -385,6 +386,7 @@ fn prove_and_verify_custom(ops: &[LtOperation], receiver_rows: &[CustomLtRow]) - Verifier::multi_verify( &airs, &multi_proof, + &stark::mmcs_leaf::synth_main_tags_for(&airs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ) diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index fe97911b9..1e0c1236f 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -80,6 +80,7 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool { Verifier::multi_verify( &airs.air_refs(), &multi_proof, + &stark::mmcs_leaf::synth_main_tags_for(&airs.air_refs()), &mut DefaultTranscript::::new(&[]), &expected_bus_balance, ) @@ -135,6 +136,7 @@ fn test_cpu_only_no_bus() { Verifier::multi_verify( &airs, &multi_proof, + &stark::mmcs_leaf::synth_main_tags_for(&airs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ), @@ -895,6 +897,7 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() { let verified = Verifier::multi_verify( &verifier_air_refs, &proof, + &stark::mmcs_leaf::synth_main_tags_for(&verifier_air_refs), &mut DefaultTranscript::::new(&[]), &expected_bus_balance, ); @@ -1633,6 +1636,7 @@ fn test_deep_stack_runtime_pages_roundtrip() { let verified = Verifier::multi_verify( &verifier_air_refs, &proof, + &stark::mmcs_leaf::synth_main_tags_for(&verifier_air_refs), &mut DefaultTranscript::::new(&[]), &expected_bus_balance, ); @@ -1690,6 +1694,7 @@ fn test_deep_stack_missing_pages_rejected() { let verified = Verifier::multi_verify( &verifier_air_refs, &proof, + &stark::mmcs_leaf::synth_main_tags_for(&verifier_air_refs), &mut DefaultTranscript::::new(&[]), &expected_bus_balance, ); @@ -1782,6 +1787,7 @@ fn test_heap_alloc_runtime_pages_roundtrip() { let verified = Verifier::multi_verify( &verifier_air_refs, &proof, + &stark::mmcs_leaf::synth_main_tags_for(&verifier_air_refs), &mut DefaultTranscript::::new(&[]), &expected_bus_balance, ); @@ -1942,6 +1948,7 @@ fn test_crafted_zero_count_proof_must_not_verify() { let verified = Verifier::multi_verify( &verifier_air_refs, &proof, + &stark::mmcs_leaf::synth_main_tags_for(&verifier_air_refs), &mut DefaultTranscript::::new(&[]), &FieldElement::zero(), ); From 284fe640aa4338b866de1e9f89b641c36b9a1a2d Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 14:41:14 -0300 Subject: [PATCH 09/21] feat(stark/mmcs): wire MMCS into Phase A + verifier; preprocessed kept on per-table trees MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase C wire-up of the streaming-MMCS plan. Non-preprocessed tables are now committed under a single shared main-trace MMCS; preprocessed tables stay OUT of the MMCS and keep their own per-table multiplicities Merkle tree (in addition to the AIR-pinned precomputed tree). Architecture: - `MainCommit` is now an enum: * `Shared { mmcs (Arc), tag, padded_height }` for non-preprocessed * `Preprocessed { multiplicities_tree, multiplicities_root, precomputed_tree, precomputed_root, num_precomputed_cols }` - `MainTraceOpening` is now an enum: * `Mmcs { evaluations, evaluations_sym, mmcs_opening, mmcs_opening_sym }` * `Tree(PolynomialOpenings)` for the preprocessed per-table tree - `MultiProof` gains `main_mmcs_root: Commitment` and `main_mmcs_spec: Vec<(MatrixTag, usize)>` (the MMCS is at multi-proof level, hence single-AIR `prove` / `verify` now return / take MultiProof). - `StarkProof.lde_trace_main_merkle_root: Option` is repurposed: `Some` for preprocessed tables (per-table multiplicities root), `None` for non-preprocessed tables (covered by `main_mmcs_root`). Phase A absorb order (prover + verifier + `replay_transcript_phase_a` match): for each table in spec-fixed order: if preprocessed: absorb precomputed_root (AIR-pinned, verifier rejects mismatch) absorb multiplicities_root (per-table tree) absorb main_mmcs_root (once, over Shared tables only) Per-query main-trace opening: - Shared tables: `mmcs.open(iota*2 << shift)` and `mmcs.open((iota*2+1) << shift)`, where `shift = log2(max_height / this_padded_height)`. The verifier rehashes `evaluations` with `mmcs_leaf::hash_tagged_row`, compares against `matrix_leaves[table_idx]`, and authenticates against root+spec. - Preprocessed tables: `verify_opening_pair` against `StarkProof.lde_trace_main_merkle_root` (unchanged single-tree path). Test-suite updates (single-AIR `prove` now returns MultiProof): - crypto/stark/src/tests/small_trace_tests.rs: field access via `proof.proofs[0]`, `MainTraceOpening` matched as enum. - crypto/stark/src/tests/bus_tests/soundness_tests.rs: same. Tests: stark 130/130 green. lambda-vm-prover non-ELF tests unchanged (the 77 prove_elfs failures predate this change — same UnknownSyscall(5) executor bug on `git stash` baseline). --- crypto/stark/src/proof/stark.rs | 67 ++- crypto/stark/src/prover.rs | 478 +++++++++++++----- .../src/tests/bus_tests/soundness_tests.rs | 2 +- crypto/stark/src/tests/small_trace_tests.rs | 31 +- crypto/stark/src/verifier.rs | 230 +++++++-- prover/src/lib.rs | 12 +- 6 files changed, 627 insertions(+), 193 deletions(-) diff --git a/crypto/stark/src/proof/stark.rs b/crypto/stark/src/proof/stark.rs index ec11acd3b..667f9f170 100644 --- a/crypto/stark/src/proof/stark.rs +++ b/crypto/stark/src/proof/stark.rs @@ -1,4 +1,4 @@ -use crypto::merkle_tree::mmcs::MmcsOpening; +use crypto::merkle_tree::mmcs::{MatrixTag, MmcsOpening}; use crypto::merkle_tree::proof::Proof; use math::field::{ element::FieldElement, @@ -18,27 +18,50 @@ pub struct PolynomialOpenings { pub evaluations_sym: Vec>, } -/// Per-query main-trace opening backed by the shared MMCS. +/// Per-query main-trace opening. /// -/// The (iota, iota_sym) pair are consecutive global indices in the LDE. -/// Each carries its own `MmcsOpening` because they live at different -/// positions in the layer-0 array — there is no shared sibling sub-path -/// between them at the leaf level (only at higher tree levels, which the -/// MMCS opening encodes). +/// Non-preprocessed tables are committed under the shared main-trace MMCS, +/// so a query carries an `MmcsOpening` pair (one per iota / iota_sym). +/// Preprocessed tables keep their multiplicities slice in their OWN +/// per-table Merkle tree (distinct from the shared MMCS) and use the +/// legacy `PolynomialOpenings` layout. The per-table root for the latter +/// lives in `StarkProof::lde_trace_main_merkle_root`. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] #[serde(bound = "")] -pub struct MainTraceOpening { - pub evaluations: Vec>, - pub evaluations_sym: Vec>, - pub mmcs_opening: MmcsOpening, - pub mmcs_opening_sym: MmcsOpening, +pub enum MainTraceOpening { + /// Opening into the shared main-trace MMCS (non-preprocessed tables). + Mmcs { + evaluations: Vec>, + evaluations_sym: Vec>, + mmcs_opening: MmcsOpening, + mmcs_opening_sym: MmcsOpening, + }, + /// Opening into this table's own multiplicities Merkle tree + /// (preprocessed tables). + Tree(PolynomialOpenings), +} + +impl MainTraceOpening { + pub fn evaluations(&self) -> &[FieldElement] { + match self { + Self::Mmcs { evaluations, .. } => evaluations, + Self::Tree(p) => &p.evaluations, + } + } + + pub fn evaluations_sym(&self) -> &[FieldElement] { + match self { + Self::Mmcs { evaluations_sym, .. } => evaluations_sym, + Self::Tree(p) => &p.evaluations_sym, + } + } } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] #[serde(bound = "")] pub struct DeepPolynomialOpening, E: IsField> { pub composition_poly: PolynomialOpenings, - pub main_trace_polys: PolynomialOpenings, + pub main_trace_polys: MainTraceOpening, /// For preprocessed tables: openings for precomputed columns. /// These are verified against the hardcoded precomputed commitment. pub precomputed_trace_polys: Option>, @@ -52,9 +75,11 @@ pub type DeepPolynomialOpenings = Vec>; pub struct StarkProof, E: IsField, PI> { // Length of the execution trace pub trace_length: usize, - // Commitments of the trace columns - // [tⱼ] - pub lde_trace_main_merkle_root: Commitment, + /// For PREPROCESSED tables only: per-table Merkle root over the + /// multiplicities columns (the non-precomputed slice). Preprocessed + /// tables stay out of the shared main-trace MMCS, so their main slice + /// keeps its own per-table tree. `None` for non-preprocessed tables. + pub lde_trace_main_merkle_root: Option, // Commitments of auxiliary trace columns // [tⱼ] pub lde_trace_aux_merkle_root: Option, @@ -90,8 +115,18 @@ pub struct StarkProof, E: IsField, PI> { /// A collection of STARK proofs for multiple AIRs. /// Used for multi-table proving where tables are linked via bus (LogUp). /// Returned by `Prover::multi_prove` and verified by `Verifier::multi_verify`. +/// +/// Non-preprocessed tables share a single main-trace MMCS authenticated by +/// `main_mmcs_root`; `main_mmcs_spec` lists `(MatrixTag, padded_height)` +/// per committed table in the MMCS sort order. Preprocessed tables stay +/// out of this MMCS — each carries its own per-table Merkle root in +/// `StarkProof::lde_trace_main_merkle_root` plus the AIR-pinned +/// precomputed root. Both groups' roots are absorbed in spec-fixed order +/// during Phase A. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound = "PI: serde::Serialize + serde::de::DeserializeOwned")] pub struct MultiProof, E: IsField, PI> { pub proofs: Vec>, + pub main_mmcs_root: Commitment, + pub main_mmcs_spec: Vec<(MatrixTag, usize)>, } diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index f71fc4343..6c58d5ac0 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -27,14 +27,11 @@ use rayon::prelude::{ use crate::debug::validate_trace; use crate::fri; use crate::lookup::LOGUP_NUM_CHALLENGES; -#[allow(unused_imports)] -use crate::mmcs_leaf::hash_tagged_row_bytes; -use crate::proof::stark::{DeepPolynomialOpenings, PolynomialOpenings}; +use crate::proof::stark::{DeepPolynomialOpenings, MainTraceOpening, PolynomialOpenings}; #[cfg(feature = "disk-spill")] use crate::storage_mode::StorageMode; use crate::table::Table; use crate::trace::LDETraceTable; -#[allow(unused_imports)] use crypto::merkle_tree::mmcs::{MatrixTag, Mmcs, MmcsBuilder, MmcsError}; use super::config::{BatchedMerkleTree, BatchedMerkleTreeBackend, Commitment}; @@ -120,23 +117,6 @@ where } } - /// Build a `TableCommit` for a preprocessed table. - fn preprocessed( - tree: BatchedMerkleTree, - root: Commitment, - precomputed_tree: BatchedMerkleTree, - precomputed_root: Commitment, - num_precomputed_cols: usize, - ) -> Self { - Self { - tree: Arc::new(tree), - root, - precomputed_tree: Some(Arc::new(precomputed_tree)), - precomputed_root: Some(precomputed_root), - num_precomputed_cols, - } - } - /// Cheap clone. Only bumps Arc refcounts, no tree data is copied. fn share(&self) -> Self { Self { @@ -148,53 +128,130 @@ where } } - fn is_preprocessed(&self) -> bool { - self.precomputed_tree.is_some() - } } /// Per-table commitment artifacts for the main trace under the shared /// MMCS protocol. The `mmcs` Arc is the SAME instance for every table in /// the multi-proof — Phase A builds it once. /// -/// Currently unused at the wire-up level; defined here as the keystone -/// type for the upcoming MMCS Phase C wire-up (see -/// `docs/mmcs-streaming-c1-spec.md`). Marked `allow(dead_code)` until the -/// follow-up commit consumes it. -#[allow(dead_code)] -pub(crate) struct MainCommit +/// `padded_height` is this table's LDE height (a power of two), needed to +/// translate the table's local FRI iota into a global MMCS index when +/// opening (see `open_deep_composition_poly`). +pub(crate) enum MainCommit where FieldElement: AsBytes, { - /// Shared MMCS across all tables in the multi-proof. - pub(crate) mmcs: Arc>>, - /// This table's MatrixTag within the MMCS. - pub(crate) tag: MatrixTag, - /// Preprocessed tables only: separate Merkle tree over precomputed columns. - pub(crate) precomputed_tree: Option>>, - /// Preprocessed tables only: root of `precomputed_tree`. - pub(crate) precomputed_root: Option, - /// Preprocessed tables only: number of precomputed columns. Zero otherwise. - pub(crate) num_precomputed_cols: usize, + /// Non-preprocessed table: committed under the shared MMCS. + Shared { + mmcs: Arc>>, + tag: MatrixTag, + /// Padded height (== LDE row count); needed to translate a local + /// FRI iota into a global MMCS index. + padded_height: usize, + }, + /// Preprocessed table: two per-table Merkle trees, NOT in the MMCS. + Preprocessed { + multiplicities_tree: Arc>, + multiplicities_root: Commitment, + precomputed_tree: Arc>, + precomputed_root: Commitment, + num_precomputed_cols: usize, + }, } -#[allow(dead_code)] impl MainCommit where FieldElement: AsBytes, { - fn is_preprocessed(&self) -> bool { - self.precomputed_tree.is_some() + fn precomputed_root(&self) -> Option { + match self { + Self::Shared { .. } => None, + Self::Preprocessed { + precomputed_root, .. + } => Some(*precomputed_root), + } + } + + fn main_tree_root(&self) -> Option { + match self { + Self::Shared { .. } => None, + Self::Preprocessed { + multiplicities_root, + .. + } => Some(*multiplicities_root), + } } /// Cheap clone. Only bumps Arc refcounts. fn share(&self) -> Self { - Self { - mmcs: Arc::clone(&self.mmcs), - tag: self.tag, - precomputed_tree: self.precomputed_tree.as_ref().map(Arc::clone), - precomputed_root: self.precomputed_root, - num_precomputed_cols: self.num_precomputed_cols, + match self { + Self::Shared { + mmcs, + tag, + padded_height, + } => Self::Shared { + mmcs: Arc::clone(mmcs), + tag: *tag, + padded_height: *padded_height, + }, + Self::Preprocessed { + multiplicities_tree, + multiplicities_root, + precomputed_tree, + precomputed_root, + num_precomputed_cols, + } => Self::Preprocessed { + multiplicities_tree: Arc::clone(multiplicities_tree), + multiplicities_root: *multiplicities_root, + precomputed_tree: Arc::clone(precomputed_tree), + precomputed_root: *precomputed_root, + num_precomputed_cols: *num_precomputed_cols, + }, + } + } +} + +/// Per-table Phase-A output. Non-preprocessed tables contribute their +/// tagged leaf vector to the shared MMCS; preprocessed tables ship two +/// independent per-table Merkle trees that stay out of the MMCS. +enum MainPhaseAOutput +where + FieldElement: AsBytes, +{ + Shared { + tag: MatrixTag, + leaves: Vec, + padded_height: usize, + }, + Preprocessed { + multiplicities_tree: Arc>, + multiplicities_root: Commitment, + precomputed_tree: Arc>, + precomputed_root: Commitment, + num_precomputed_cols: usize, + }, +} + +impl MainPhaseAOutput +where + FieldElement: AsBytes, +{ + fn precomputed_root(&self) -> Option { + match self { + Self::Shared { .. } => None, + Self::Preprocessed { + precomputed_root, .. + } => Some(*precomputed_root), + } + } + + fn main_tree_root(&self) -> Option { + match self { + Self::Shared { .. } => None, + Self::Preprocessed { + multiplicities_root, + .. + } => Some(*multiplicities_root), } } } @@ -209,8 +266,8 @@ where { /// The table of evaluations over the LDE of the main and auxiliary trace tables. pub(crate) lde_trace: LDETraceTable, - /// Commitment to the main trace. - pub(crate) main: TableCommit, + /// Commitment to the main trace (shared MMCS handle + per-table tag). + pub(crate) main: MainCommit, /// Commitment to the auxiliary (RAP) trace, if any. pub(crate) aux: Option>, /// The challenges of the RAP round. @@ -228,7 +285,7 @@ where FieldElement: AsBytes, FieldElement: AsBytes, { - main: TableCommit, + main: MainCommit, aux: Option>, rap_challenges: Vec>, bus_public_inputs: Option>, @@ -452,6 +509,87 @@ where result } +fn map_mmcs_err(e: MmcsError) -> ProvingError { + ProvingError::WrongParameter(format!("MMCS: {e:?}")) +} + +/// Build the unified main-trace MMCS from the per-table Phase A outputs. +/// Returns the root, the (tag, padded_height) spec, and the shared Arc that +/// every table's `MainCommit` borrows. +#[allow(clippy::type_complexity)] +fn build_main_mmcs( + outputs: &[MainPhaseAOutput], +) -> Result< + ( + Commitment, + Vec<(MatrixTag, usize)>, + Arc>>, + ), + ProvingError, +> +where + F: IsField + Send + Sync, + FieldElement: AsBytes + Send + Sync, +{ + let mut builder: MmcsBuilder> = MmcsBuilder::new(); + for output in outputs { + if let MainPhaseAOutput::Shared { + tag, + leaves, + padded_height: _, + } = output + { + builder + .add_matrix(*tag, leaves.clone()) + .map_err(map_mmcs_err)?; + } + } + let mmcs = builder.finalize().map_err(map_mmcs_err)?; + let root = *mmcs.root(); + let spec = mmcs.spec(); + Ok((root, spec, Arc::new(mmcs))) +} + +/// Tagged per-row leaf digest for the main-trace MMCS. +pub fn compute_tagged_leaves_bit_reversed( + columns: &[Vec>], + tag: MatrixTag, +) -> Vec +where + E: IsField, + FieldElement: AsBytes + Sync + Send + ByteConversion, +{ + if columns.is_empty() || columns[0].is_empty() { + return Vec::new(); + } + let num_rows = columns[0].len(); + let num_cols = columns.len(); + let byte_len = as ByteConversion>::BYTE_LEN; + debug_assert!(num_rows.is_power_of_two()); + let total_bytes = num_cols * byte_len; + let hash_leaf = + |buf: &mut [u8], row_idx: usize| -> Commitment { + let br_idx = reverse_index(row_idx, num_rows as u64); + for (col_idx, col) in columns.iter().enumerate() { + col[br_idx] + .write_bytes_be(&mut buf[col_idx * byte_len..(col_idx + 1) * byte_len]); + } + crate::mmcs_leaf::hash_tagged_row_bytes(tag, buf) + }; + #[cfg(feature = "parallel")] + { + (0..num_rows) + .into_par_iter() + .map_init(|| vec![0u8; total_bytes], |buf, i| hash_leaf(buf, i)) + .collect() + } + #[cfg(not(feature = "parallel"))] + { + let mut buf = vec![0u8; total_bytes]; + (0..num_rows).map(|i| hash_leaf(&mut buf, i)).collect() + } +} + /// Compute Keccak-256 leaf hashes for `commit_composition_polynomial`: one /// leaf per row-pair, where leaf `i` hashes the BE concatenation of /// `parts[..][br_0] ++ parts[..][br_1]` with @@ -653,20 +791,26 @@ pub trait IsStarkProver< }); } - /// Compute the main-trace LDE and commit. Returns a `TableCommit` along - /// with the owned LDE columns (consumed later in Phase D). + /// Compute the main-trace LDE and the per-table inputs needed by the + /// shared MMCS build. Returns a `MainPhaseAOutput` (tagged leaves + the + /// optional precomputed-columns Merkle tree) together with the owned + /// LDE columns consumed later in Phase D. /// - /// `precomputed`: if present, the leading `num_cols` columns are committed - /// as a separate Merkle tree (the precomputed split for preprocessed - /// tables) and the root is checked against the AIR-hardcoded commitment. + /// `tag`: the table's MatrixTag, fed into every leaf hash so the MMCS + /// can authenticate (matrix, row) pairs uniquely. + /// `precomputed`: if present, the leading `num_cols` columns are + /// committed as a separate Merkle tree (the precomputed split) and the + /// root is checked against the AIR-hardcoded commitment. The remaining + /// columns feed the MMCS leaves. If absent, every column feeds the MMCS. #[allow(clippy::type_complexity)] fn commit_main_trace( trace: &TraceTable, domain: &Domain, twiddles: &LdeTwiddles, + tag: MatrixTag, precomputed: Option<(Commitment, usize)>, #[cfg(feature = "disk-spill")] storage_mode: StorageMode, - ) -> Result<(TableCommit, Vec>>), ProvingError> + ) -> Result<(MainPhaseAOutput, Vec>>), ProvingError> where FieldElement: AsBytes, FieldElement: AsBytes, @@ -686,54 +830,58 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let t_sub = Instant::now(); - let commit = match precomputed { + let output = match precomputed { None => { - #[allow(unused_mut)] - let (mut tree, root) = Self::commit_columns_bit_reversed(&columns) - .ok_or(ProvingError::EmptyCommitment)?; - #[cfg(feature = "disk-spill")] - if storage_mode == StorageMode::Disk { - tree.spill_nodes_to_disk() - .map_err(|e| ProvingError::DiskSpill(format!("main Merkle tree: {e}")))?; + let leaves = compute_tagged_leaves_bit_reversed::(&columns, tag); + if leaves.is_empty() { + return Err(ProvingError::EmptyCommitment); + } + let padded_height = leaves.len(); + MainPhaseAOutput::Shared { + tag, + leaves, + padded_height, } - TableCommit::plain(tree, root) } Some((expected_precomputed_root, num_cols)) => { #[allow(unused_mut)] let (mut precomputed_tree, precomputed_root) = Self::commit_columns_bit_reversed(&columns[..num_cols]) .ok_or(ProvingError::EmptyCommitment)?; - #[allow(unused_mut)] - let (mut mult_tree, mult_root) = - Self::commit_columns_bit_reversed(&columns[num_cols..]) - .ok_or(ProvingError::EmptyCommitment)?; debug_assert_eq!( precomputed_root, expected_precomputed_root, - "Prover's precomputed commitment doesn't match hardcoded AIR commitment" + "Prover precomputed commitment must match the AIR-pinned value" ); #[cfg(feature = "disk-spill")] if storage_mode == StorageMode::Disk { precomputed_tree.spill_nodes_to_disk().map_err(|e| { ProvingError::DiskSpill(format!("precomputed Merkle tree: {e}")) })?; - mult_tree - .spill_nodes_to_disk() - .map_err(|e| ProvingError::DiskSpill(format!("mult Merkle tree: {e}")))?; } - TableCommit::preprocessed( - mult_tree, - mult_root, - precomputed_tree, + #[allow(unused_mut)] + let (mut multiplicities_tree, multiplicities_root) = + Self::commit_columns_bit_reversed(&columns[num_cols..]) + .ok_or(ProvingError::EmptyCommitment)?; + #[cfg(feature = "disk-spill")] + if storage_mode == StorageMode::Disk { + multiplicities_tree.spill_nodes_to_disk().map_err(|e| { + ProvingError::DiskSpill(format!("multiplicities Merkle tree: {e}")) + })?; + } + MainPhaseAOutput::Preprocessed { + multiplicities_tree: Arc::new(multiplicities_tree), + multiplicities_root, + precomputed_tree: Arc::new(precomputed_tree), precomputed_root, - num_cols, - ) + num_precomputed_cols: num_cols, + } } }; #[cfg(feature = "instruments")] crate::instruments::accum_r1_main(main_lde_dur, t_sub.elapsed()); - Ok((commit, columns)) + Ok((output, columns)) } /// Recompute Round1 from the trace, reusing the Merkle trees stored in commitments. @@ -1426,30 +1574,9 @@ pub trait IsStarkProver< let lde_trace = &round_1_result.lde_trace; let main_commit = &round_1_result.main; - let is_preprocessed = main_commit.is_preprocessed(); - let num_precomputed_cols = main_commit.num_precomputed_cols; let total_cols = lde_trace.num_main_cols(); for index in indexes_to_open.iter() { - // For preprocessed tables, open the main split (multiplicities only); - // for normal tables, open all main columns. - let main_trace_opening = if is_preprocessed { - Self::open_polys_with(domain, &main_commit.tree, *index, |row| { - lde_trace.gather_main_row_range(row, num_precomputed_cols, total_cols) - }) - } else { - Self::open_polys_with(domain, &main_commit.tree, *index, |row| { - lde_trace.gather_main_row(row) - }) - }; - - // For preprocessed tables, also open the precomputed-columns tree. - let precomputed_trace_opening = main_commit.precomputed_tree.as_ref().map(|tree| { - Self::open_polys_with(domain, tree, *index, |row| { - lde_trace.gather_main_row_range(row, 0, num_precomputed_cols) - }) - }); - let composition_openings = Self::open_composition_poly( &round_2_result.composition_poly_merkle_tree, &round_2_result.lde_composition_poly_evaluations, @@ -1462,6 +1589,69 @@ pub trait IsStarkProver< }) }); + let (main_trace_opening, precomputed_trace_opening) = match main_commit { + MainCommit::Shared { + mmcs, + padded_height, + .. + } => { + let max_height = mmcs + .spec() + .first() + .map(|(_, h)| *h) + .expect("MMCS spec is non-empty"); + debug_assert!( + padded_height.is_power_of_two() && max_height >= *padded_height + ); + let shift = (max_height / *padded_height).trailing_zeros() as usize; + let domain_size = domain.lde_roots_of_unity_coset.len() as u64; + let primary = *index * 2; + let sym = *index * 2 + 1; + let evaluations = lde_trace.gather_main_row(reverse_index(primary, domain_size)); + let evaluations_sym = lde_trace.gather_main_row(reverse_index(sym, domain_size)); + let mmcs_opening = mmcs + .open(primary << shift) + .expect("MMCS open: prover-side primary index in range"); + let mmcs_opening_sym = mmcs + .open(sym << shift) + .expect("MMCS open: prover-side sym index in range"); + let opening = MainTraceOpening::Mmcs { + evaluations, + evaluations_sym, + mmcs_opening, + mmcs_opening_sym, + }; + (opening, None) + } + MainCommit::Preprocessed { + multiplicities_tree, + precomputed_tree, + num_precomputed_cols, + .. + } => { + let num_precomputed_cols = *num_precomputed_cols; + let mult = Self::open_polys_with( + domain, + multiplicities_tree, + *index, + |row| { + lde_trace.gather_main_row_range( + row, + num_precomputed_cols, + total_cols, + ) + }, + ); + let pre = Self::open_polys_with( + domain, + precomputed_tree, + *index, + |row| lde_trace.gather_main_row_range(row, 0, num_precomputed_cols), + ); + (MainTraceOpening::Tree(mult), Some(pre)) + } + }; + openings.push(DeepPolynomialOpening { composition_poly: composition_openings, main_trace_polys: main_trace_opening, @@ -1524,8 +1714,6 @@ pub trait IsStarkProver< num_airs ))); } - // `main_tags` is reserved for the upcoming MMCS wire-up; not consumed yet. - let _ = main_tags; // Check if any AIR has an auxiliary trace let needs_lookup_challenges = air_trace_pairs @@ -1611,7 +1799,7 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let phase_start = Instant::now(); - let mut main_commits: Vec> = Vec::with_capacity(num_airs); + let mut phase_a_outputs: Vec> = Vec::with_capacity(num_airs); let mut main_ldes: Vec>>> = Vec::with_capacity(num_airs); for chunk_start in (0..num_airs).step_by(k) { @@ -1628,6 +1816,7 @@ pub trait IsStarkProver< let (air, trace, _) = &air_trace_pairs[idx]; let domain = &domains[idx]; let twiddles = &twiddle_caches[idx]; + let tag = main_tags[idx]; let precomputed = air .is_preprocessed() @@ -1636,6 +1825,7 @@ pub trait IsStarkProver< *trace, domain, twiddles, + tag, precomputed, #[cfg(feature = "disk-spill")] storage_mode, @@ -1643,18 +1833,58 @@ pub trait IsStarkProver< }) .collect(); - // Sequential: append roots to shared transcript (Fiat-Shamir ordering) + // Sequential: per table, absorb its preprocessed root and then + // its own per-table multiplicities root (preprocessed only). The + // shared MMCS root is absorbed once after the loop. Order must + // match the verifier replay. for result in chunk_results { - let (commit, cached_main) = result?; - if let Some(ref pre_root) = commit.precomputed_root { + let (output, cached_main) = result?; + if let Some(ref pre_root) = output.precomputed_root() { transcript.append_bytes(pre_root); } - transcript.append_bytes(&commit.root); - main_commits.push(commit); + if let Some(ref main_root) = output.main_tree_root() { + transcript.append_bytes(main_root); + } + phase_a_outputs.push(output); main_ldes.push(cached_main); } } + // Build the unified main-trace MMCS once over Shared (non-preprocessed) + // entries. Preprocessed tables stay out of the MMCS and keep their + // own per-table Merkle trees (already absorbed above). + let (main_mmcs_root, main_mmcs_spec, mmcs_arc) = + build_main_mmcs::(&phase_a_outputs)?; + transcript.append_bytes(&main_mmcs_root); + + let main_commits: Vec> = phase_a_outputs + .into_iter() + .map(|o| match o { + MainPhaseAOutput::Shared { + tag, + padded_height, + leaves: _, + } => MainCommit::Shared { + mmcs: Arc::clone(&mmcs_arc), + tag, + padded_height, + }, + MainPhaseAOutput::Preprocessed { + multiplicities_tree, + multiplicities_root, + precomputed_tree, + precomputed_root, + num_precomputed_cols, + } => MainCommit::Preprocessed { + multiplicities_tree, + multiplicities_root, + precomputed_tree, + precomputed_root, + num_precomputed_cols, + }, + }) + .collect(); + #[cfg(feature = "instruments")] let main_commits_elapsed = phase_start.elapsed(); #[cfg(feature = "instruments")] @@ -1962,17 +2192,22 @@ pub trait IsStarkProver< }); } - Ok(MultiProof { proofs }) + Ok(MultiProof { + proofs, + main_mmcs_root, + main_mmcs_spec, + }) } - /// Generate a STARK proof for a single AIR/trace. - /// This is equivalent to calling `multi_prove` with a single-element slice. + /// Generate a single-AIR STARK proof, returned as a one-element + /// `MultiProof`. The MMCS root + spec live at the multi-proof level (see + /// `MultiProof`), so single-table callers consume the wrapper directly. fn prove( air: &dyn AIR, trace: &mut TraceTable, pub_inputs: &PI, transcript: &mut (impl IsStarkTranscript + Clone + Send), - ) -> Result, ProvingError> + ) -> Result, ProvingError> where FieldElement: AsBytes, FieldElement: AsBytes, @@ -1983,9 +2218,8 @@ pub trait IsStarkProver< ::BaseType: SpillSafe, { let air_trace_pairs = vec![(air, trace, pub_inputs)]; - // Single-AIR path: synthesize a default tag. Callers that want - // multi-table soundness should call `multi_prove` directly with - // distinct tags. + // Single-AIR path: synthesize a default tag. Callers that need + // distinct chip identities call `multi_prove` directly. let main_tags = [MatrixTag::new([0; 8])]; Self::multi_prove( air_trace_pairs, @@ -1994,7 +2228,6 @@ pub trait IsStarkProver< #[cfg(feature = "disk-spill")] StorageMode::Ram, ) - .map(|mut multi_proof| multi_proof.proofs.remove(0)) } // TODO: propagate errors instead of unwrap() in open_deep_composition_poly and FRI operations @@ -2128,12 +2361,13 @@ pub trait IsStarkProver< info!("End proof generation"); Ok(StarkProof { - // [t] - lde_trace_main_merkle_root: round_1_result.main.root, + // For preprocessed tables: per-table Merkle root over multiplicities + // (preprocessed tables stay out of the shared main-trace MMCS). + lde_trace_main_merkle_root: round_1_result.main.main_tree_root(), // [t] lde_trace_aux_merkle_root: round_1_result.aux.as_ref().map(|x| x.root), // For preprocessed tables: commitment to precomputed columns only - lde_trace_precomputed_merkle_root: round_1_result.main.precomputed_root, + lde_trace_precomputed_merkle_root: round_1_result.main.precomputed_root(), // tⱼ(zgᵏ) trace_ood_evaluations: round_3_result.trace_ood_evaluations, // [H₁] and [H₂] diff --git a/crypto/stark/src/tests/bus_tests/soundness_tests.rs b/crypto/stark/src/tests/bus_tests/soundness_tests.rs index d2af70678..922049e0e 100644 --- a/crypto/stark/src/tests/bus_tests/soundness_tests.rs +++ b/crypto/stark/src/tests/bus_tests/soundness_tests.rs @@ -875,7 +875,7 @@ fn test_injected_bus_public_inputs_on_non_logup_air_rejected() { // Inject fake bus_public_inputs into a non-LogUp proof. // DummyAIR has has_trace_interaction() = false, so this must be rejected. - proof.bus_public_inputs = Some(BusPublicInputs { + proof.proofs[0].bus_public_inputs = Some(BusPublicInputs { table_contribution: FieldElement::::from(42u64), #[cfg(feature = "debug-checks")] per_bus_sums: Default::default(), diff --git a/crypto/stark/src/tests/small_trace_tests.rs b/crypto/stark/src/tests/small_trace_tests.rs index 8373ae9d6..0a006d6a6 100644 --- a/crypto/stark/src/tests/small_trace_tests.rs +++ b/crypto/stark/src/tests/small_trace_tests.rs @@ -19,7 +19,7 @@ type Felt = FieldElement; fn make_valid_simple_proof() -> ( SimpleAdditionAIR, - crate::proof::stark::StarkProof< + crate::proof::stark::MultiProof< GoldilocksField, GoldilocksField, SimpleAdditionPublicInputs, @@ -99,7 +99,7 @@ fn test_verify_fails_with_wrong_inputs() { let (air, mut proof) = make_valid_simple_proof(); // Tamper with the proof's public inputs - proof.public_inputs = SimpleAdditionPublicInputs { + proof.proofs[0].public_inputs = SimpleAdditionPublicInputs { a: Felt::from(99u64), // Wrong value - doesn't match trace b: Felt::from(2u64), }; @@ -124,11 +124,13 @@ fn test_verify_rejects_truncated_composition_poly_parts_ood() { let (air, mut proof) = make_valid_simple_proof(); assert!( - !proof.composition_poly_parts_ood_evaluation.is_empty(), + !proof.proofs[0] + .composition_poly_parts_ood_evaluation + .is_empty(), "test precondition: a valid proof has at least one composition poly part", ); // Drop one entry so the per-query opening has more parts than the header. - proof.composition_poly_parts_ood_evaluation.pop(); + proof.proofs[0].composition_poly_parts_ood_evaluation.pop(); assert!( !Verifier::verify( @@ -150,15 +152,28 @@ fn test_verify_rejects_opening_column_count_mismatch() { // Append a phantom extra evaluation column to the first query's // main-trace opening so the (base + aux) count exceeds `ood_evaluations_table_width`. - if let Some(opening) = proof.deep_poly_openings.first_mut() { + use crate::proof::stark::MainTraceOpening; + if let Some(opening) = proof.proofs[0].deep_poly_openings.first_mut() { let extra = opening .main_trace_polys - .evaluations + .evaluations() .last() .cloned() .unwrap_or_else(Felt::zero); - opening.main_trace_polys.evaluations.push(extra); - opening.main_trace_polys.evaluations_sym.push(extra); + match &mut opening.main_trace_polys { + MainTraceOpening::Mmcs { + evaluations, + evaluations_sym, + .. + } => { + evaluations.push(extra); + evaluations_sym.push(extra); + } + MainTraceOpening::Tree(p) => { + p.evaluations.push(extra); + p.evaluations_sym.push(extra); + } + } } else { panic!("test precondition: a valid proof has at least one deep poly opening"); } diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs index 7b86cd4b4..31ccbb3cb 100644 --- a/crypto/stark/src/verifier.rs +++ b/crypto/stark/src/verifier.rs @@ -340,23 +340,36 @@ pub trait IsStarkVerifier< ) } - /// Verify opening Open(tⱼ(D_LDE), 𝜐) and Open(tⱼ(D_LDE), -𝜐) for all trace polynomials tⱼ, - /// where 𝜐 and -𝜐 are the elements corresponding to the index challenge `iota`. + /// Verify the main MMCS opening + precomputed/aux Merkle openings at FRI + /// challenge `iota`. `main_tag`, `main_mmcs_root`, `main_mmcs_spec` come + /// from the surrounding multi-proof. fn verify_trace_openings( proof: &StarkProof, deep_poly_openings: &DeepPolynomialOpening, iota: usize, + main_tag: crypto::merkle_tree::mmcs::MatrixTag, + main_mmcs_root: &Commitment, + main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], ) -> bool where - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, FieldElement: AsBytes + Sync + Send, { - // Main trace (multiplicities for preprocessed, full trace for normal). - let mut ok = Self::verify_opening_pair::( - &deep_poly_openings.main_trace_polys, - &proof.lde_trace_main_merkle_root, - iota, - ); + use crate::proof::stark::MainTraceOpening; + let main_ok = match &deep_poly_openings.main_trace_polys { + MainTraceOpening::Mmcs { .. } => Self::verify_main_mmcs_pair( + &deep_poly_openings.main_trace_polys, + iota, + main_tag, + main_mmcs_root, + main_mmcs_spec, + ), + MainTraceOpening::Tree(opening) => match &proof.lde_trace_main_merkle_root { + Some(root) => Self::verify_opening_pair::(opening, root, iota), + None => false, + }, + }; + let mut ok = main_ok; // Precomputed trace (preprocessed tables only). Mismatched presence is // unreachable in practice (multi_verify rejects such proofs upstream), @@ -385,6 +398,26 @@ pub trait IsStarkVerifier< ok } + /// Authenticate the main-trace MMCS pair for one query. + fn verify_main_mmcs_pair( + main_opening: &crate::proof::stark::MainTraceOpening, + iota: usize, + main_tag: crypto::merkle_tree::mmcs::MatrixTag, + main_mmcs_root: &Commitment, + main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], + ) -> bool + where + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, + { + verify_main_mmcs_pair_inner::( + main_opening, + iota, + main_tag, + main_mmcs_root, + main_mmcs_spec, + ) + } + /// Verify opening Open(Hᵢ(D_LDE), 𝜐) and Open(Hᵢ(D_LDE), -𝜐) for all parts Hᵢof the composition /// polynomial, where 𝜐 and -𝜐 are the elements corresponding to the index challenge `iota`. fn verify_composition_poly_opening( @@ -415,9 +448,12 @@ pub trait IsStarkVerifier< fn step_4_verify_trace_and_composition_openings( proof: &StarkProof, challenges: &Challenges, + main_tag: crypto::merkle_tree::mmcs::MatrixTag, + main_mmcs_root: &Commitment, + main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], ) -> bool where - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, FieldElement: AsBytes + Sync + Send, { challenges @@ -429,7 +465,14 @@ pub trait IsStarkVerifier< deep_poly_opening, &proof.composition_poly_root, iota_n, - ) && Self::verify_trace_openings(proof, deep_poly_opening, *iota_n) + ) && Self::verify_trace_openings( + proof, + deep_poly_opening, + *iota_n, + main_tag, + main_mmcs_root, + main_mmcs_spec, + ) }) } @@ -570,7 +613,7 @@ pub trait IsStarkVerifier< if let Some(p) = &opening.precomputed_trace_polys { lde_base.extend_from_slice(&p.evaluations); } - lde_base.extend_from_slice(&opening.main_trace_polys.evaluations); + lde_base.extend_from_slice(opening.main_trace_polys.evaluations()); let lde_aux: &[FieldElement] = opening .aux_trace_polys @@ -594,7 +637,7 @@ pub trait IsStarkVerifier< if let Some(p) = &opening.precomputed_trace_polys { lde_base_sym.extend_from_slice(&p.evaluations_sym); } - lde_base_sym.extend_from_slice(&opening.main_trace_polys.evaluations_sym); + lde_base_sym.extend_from_slice(opening.main_trace_polys.evaluations_sym()); let lde_aux_sym: &[FieldElement] = opening .aux_trace_polys @@ -721,7 +764,7 @@ pub trait IsStarkVerifier< expected_bus_balance: &FieldElement, ) -> bool where - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, FieldElement: AsBytes + Sync + Send, { if airs.len() != multi_proof.proofs.len() { @@ -740,8 +783,6 @@ pub trait IsStarkVerifier< ); return false; } - // `main_tags` is reserved for the upcoming MMCS verifier replay. - let _ = main_tags; // Check if any AIR has an auxiliary trace let needs_lookup_challenges = airs.iter().any(|air| air.has_aux_trace()); @@ -749,18 +790,24 @@ pub trait IsStarkVerifier< // ===================================================================== // Round 1, Phase A: Replay main trace commitments // ===================================================================== - // For preprocessed tables, use the hardcoded commitment (verifier cannot - // trust the prover). For normal tables, use the commitment from the proof. - + // Per table: validate the optional precomputed commitment against + // the hardcoded AIR value (the only one the verifier trusts), and + // absorb it into the transcript. After every table, absorb the + // single shared MMCS root that commits to every main trace. Also + // cross-check `main_mmcs_spec` against the (tag, padded_height_lde) + // pairs reproduced from the AIRs. + + let mut expected_spec: Vec<(crypto::merkle_tree::mmcs::MatrixTag, usize)> = + Vec::with_capacity(airs.len()); for (idx, (air, proof)) in airs.iter().zip(&multi_proof.proofs).enumerate() { + let lde_size = proof.trace_length * (air.options().blowup_factor as usize); if air.is_preprocessed() { - // Preprocessed table: VERIFY precomputed commitment matches hardcoded. - // This is the critical soundness check - ensures prover used correct precomputed values. + // Preprocessed table: validate + absorb both its AIR-pinned + // precomputed root and its own per-table multiplicities root. + // Stays OUT of the shared MMCS spec. let expected_precomputed = air.precomputed_commitment(); match &proof.lde_trace_precomputed_merkle_root { - Some(actual) if *actual == expected_precomputed => { - // OK - commitment matches hardcoded - } + Some(actual) if *actual == expected_precomputed => {} Some(actual) => { error!( "Preprocessed commitment MISMATCH for table {idx}: expected {:?}, got {:?}", @@ -773,18 +820,42 @@ pub trait IsStarkVerifier< return false; } } - - // Add BOTH commitments to transcript (Fiat-Shamir binding). - // Precomputed commitment binds challenges to correct precomputed values. - // Multiplicities commitment binds challenges to actual lookups made. transcript.append_bytes(&expected_precomputed); - transcript.append_bytes(&proof.lde_trace_main_merkle_root); + + match &proof.lde_trace_main_merkle_root { + Some(root) => transcript.append_bytes(root), + None => { + error!( + "Preprocessed table {idx} proof missing multiplicities Merkle root" + ); + return false; + } + } } else { - // Normal table: use commitment from proof - transcript.append_bytes(&proof.lde_trace_main_merkle_root); + // Non-preprocessed table: nothing per-table; the shared MMCS + // root absorbed below covers its main columns. + if proof.lde_trace_main_merkle_root.is_some() { + error!( + "Non-preprocessed table {idx} unexpectedly supplied a per-table main root" + ); + return false; + } + expected_spec.push((main_tags[idx], lde_size)); } } + // Deterministic sort matches `MmcsBuilder::finalize` (height desc, tag asc). + expected_spec.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0))); + if expected_spec != multi_proof.main_mmcs_spec { + error!( + "main_mmcs_spec mismatch: expected {:?}, got {:?}", + expected_spec, multi_proof.main_mmcs_spec, + ); + return false; + } + + transcript.append_bytes(&multi_proof.main_mmcs_root); + // ===================================================================== // Round 1, Phase B: Sample shared LogUp challenges // ===================================================================== @@ -847,12 +918,15 @@ pub trait IsStarkVerifier< table_transcript.append_field_element(&bpi.table_contribution); } - // Rounds 2-4: verify + // Rounds 2-4: verify (per-table MMCS context threaded through). if !Self::verify_rounds_2_to_4( *air, proof, &mut table_transcript, lookup_challenges.clone(), + main_tags[idx], + &multi_proof.main_mmcs_root, + &multi_proof.main_mmcs_spec, ) { error!( "Table {} failed verify_rounds_2_to_4 (num_constraints={}, trace_cols={})", @@ -899,25 +973,22 @@ pub trait IsStarkVerifier< true } - /// Verify a single STARK proof. - /// This is equivalent to calling `multi_verify` with a single-element slice. + /// Verify a single-AIR STARK proof packaged as a one-element `MultiProof`. + /// Equivalent to `multi_verify(&[air], proof, &[default_tag], ...)`. fn verify( - proof: &StarkProof, + proof: &MultiProof, air: &dyn AIR, transcript: &mut (impl IsStarkTranscript + Clone), ) -> bool where - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, FieldElement: AsBytes + Sync + Send, PI: Clone, { - let multi_proof = MultiProof { - proofs: vec![proof.clone()], - }; let main_tags = [crypto::merkle_tree::mmcs::MatrixTag::new([0; 8])]; Self::multi_verify( &[air], - &multi_proof, + proof, &main_tags, transcript, &FieldElement::zero(), @@ -1061,14 +1132,22 @@ pub trait IsStarkVerifier< } /// Verifies a single table after round 1 has been replayed. + /// + /// `main_tag`, `main_mmcs_root`, `main_mmcs_spec` come from the shared + /// multi-proof and are needed to authenticate the per-table main-trace + /// openings in step 4. + #[allow(clippy::too_many_arguments)] fn verify_rounds_2_to_4( air: &dyn AIR, proof: &StarkProof, transcript: &mut impl IsStarkTranscript, rap_challenges: Vec>, + main_tag: crypto::merkle_tree::mmcs::MatrixTag, + main_mmcs_root: &Commitment, + main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], ) -> bool where - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, FieldElement: AsBytes + Sync + Send, { let domain = new_verifier_domain(air, proof.trace_length); @@ -1142,7 +1221,13 @@ pub trait IsStarkVerifier< let timer4 = Instant::now(); #[allow(clippy::let_and_return)] - if !Self::step_4_verify_trace_and_composition_openings(proof, &challenges) { + if !Self::step_4_verify_trace_and_composition_openings( + proof, + &challenges, + main_tag, + main_mmcs_root, + main_mmcs_spec, + ) { #[cfg(not(feature = "test_fiat_shamir"))] error!("DEEP Composition Polynomial verification failed"); return false; @@ -1168,3 +1253,64 @@ pub trait IsStarkVerifier< true } } + +fn verify_main_mmcs_pair_inner( + main_opening: &crate::proof::stark::MainTraceOpening, + iota: usize, + main_tag: crypto::merkle_tree::mmcs::MatrixTag, + main_mmcs_root: &Commitment, + main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], +) -> bool +where + F: IsField, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, +{ + use crate::mmcs_leaf::hash_tagged_row; + use crate::proof::stark::MainTraceOpening; + + let (evaluations, evaluations_sym, mmcs_opening, mmcs_opening_sym) = match main_opening { + MainTraceOpening::Mmcs { + evaluations, + evaluations_sym, + mmcs_opening, + mmcs_opening_sym, + } => (evaluations, evaluations_sym, mmcs_opening, mmcs_opening_sym), + MainTraceOpening::Tree(_) => return false, + }; + + let table_idx = match main_mmcs_spec.iter().position(|(t, _)| *t == main_tag) { + Some(i) => i, + None => return false, + }; + let table_height = main_mmcs_spec[table_idx].1; + let max_height = match main_mmcs_spec.first().map(|(_, h)| *h) { + Some(h) => h, + None => return false, + }; + if !table_height.is_power_of_two() || max_height < table_height { + return false; + } + let shift = (max_height / table_height).trailing_zeros() as usize; + let g_primary = (iota * 2) << shift; + let g_sym = (iota * 2 + 1) << shift; + let leaf_primary = hash_tagged_row::(main_tag, evaluations); + let leaf_sym = hash_tagged_row::(main_tag, evaluations_sym); + if mmcs_opening.global_index != g_primary || mmcs_opening_sym.global_index != g_sym { + return false; + } + let leaves = &mmcs_opening.matrix_leaves; + let leaves_sym = &mmcs_opening_sym.matrix_leaves; + if table_idx >= leaves.len() || table_idx >= leaves_sym.len() { + return false; + } + if leaves[table_idx].0 != main_tag || leaves[table_idx].1 != leaf_primary { + return false; + } + if leaves_sym[table_idx].0 != main_tag || leaves_sym[table_idx].1 != leaf_sym { + return false; + } + let ok = mmcs_opening.verify::>(main_mmcs_root, main_mmcs_spec); + let ok_sym = + mmcs_opening_sym.verify::>(main_mmcs_root, main_mmcs_spec); + ok && ok_sym +} diff --git a/prover/src/lib.rs b/prover/src/lib.rs index 209d45853..dc5073ac9 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -503,9 +503,10 @@ impl VmAirs { // ============================================================================= /// Replay the prover's Phase A (main trace commitments) to recover the shared -/// LogUp challenges (z, alpha). Creates a fresh transcript, appends all main -/// trace commitments in the same order as the prover, then samples two -/// challenge elements. +/// LogUp challenges (z, alpha). Mirrors `multi_verify` Phase A absorb order: +/// for each table, absorb its precomputed root and (preprocessed only) its +/// per-table multiplicities Merkle root; then absorb the shared main-trace +/// MMCS root once at the end. pub(crate) fn replay_transcript_phase_a( airs: &[&dyn AIR], multi_proof: &MultiProof, @@ -514,9 +515,12 @@ pub(crate) fn replay_transcript_phase_a( for (air, proof) in airs.iter().zip(&multi_proof.proofs) { if air.is_preprocessed() { transcript.append_bytes(&air.precomputed_commitment()); + if let Some(root) = &proof.lde_trace_main_merkle_root { + transcript.append_bytes(root); + } } - transcript.append_bytes(&proof.lde_trace_main_merkle_root); } + transcript.append_bytes(&multi_proof.main_mmcs_root); let z: FieldElement = transcript.sample_field_element(); let alpha: FieldElement = transcript.sample_field_element(); (z, alpha) From 34ff7658c674a19cc1abc1de5aa88b7c0dde6666 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 14:46:40 -0300 Subject: [PATCH 10/21] test(stark/mmcs): per-vector soundness tests for the shared MMCS path 10 tests covering the attack surface of the shared main-trace MMCS introduced in the previous commit. All operate on a baseline-valid two-table multi-proof over non-preprocessed AIRs (DummyAIR + BitFlagsAIR) so every main opening is `MainTraceOpening::Mmcs`. The preprocessed `MainTraceOpening::Tree` branch is exercised end-to-end by lambda-vm-prover `bitwise_tests`. Each test tampers with a single field and asserts verifier rejection: Field tampered Detection mechanism ----------------------------------------------------------------------- multi_proof.main_mmcs_root[0] transcript divergence multi_proof.main_mmcs_spec[0].1 (height) reproduced-spec check multi_proof.main_mmcs_spec[0].0 (tag) reproduced-spec check mmcs_opening.matrix_leaves[idx].1 (digest) rehash-vs-leaf check mmcs_opening.matrix_leaves[idx].0 (tag) leaf-tag check mmcs_opening.global_index g_primary/g_sym match mmcs_opening.siblings[0][0] MmcsOpening::verify evaluations[0] rehash mismatch main_tags slice swapped at verifier spec sort mismatch (baseline test) verifies cleanly Together these pin the soundness of every byte the verifier consults on the new path, locking in behaviour before the C2 streaming-builder work. --- .../stark/src/tests/mmcs_soundness_tests.rs | 239 ++++++++++++++++++ crypto/stark/src/tests/mod.rs | 1 + 2 files changed, 240 insertions(+) create mode 100644 crypto/stark/src/tests/mmcs_soundness_tests.rs diff --git a/crypto/stark/src/tests/mmcs_soundness_tests.rs b/crypto/stark/src/tests/mmcs_soundness_tests.rs new file mode 100644 index 000000000..0a690e085 --- /dev/null +++ b/crypto/stark/src/tests/mmcs_soundness_tests.rs @@ -0,0 +1,239 @@ +//! Soundness tests for the shared main-trace MMCS path. +//! +//! All tests use a multi-table proof over non-preprocessed AIRs (so every +//! table's main slice lives in `MainTraceOpening::Mmcs`). The preprocessed +//! per-table-tree path is exercised end-to-end by lambda-vm-prover's +//! `bitwise_tests` (the bitwise AIR is preprocessed). +//! +//! Each test starts from a baseline-valid multi-proof, tampers with a +//! single field on the MMCS path, and asserts the verifier rejects. + +use crypto::fiat_shamir::default_transcript::DefaultTranscript; +use crypto::merkle_tree::mmcs::MatrixTag; +use math::field::{element::FieldElement, goldilocks::GoldilocksField}; + +use crate::examples::{ + bit_flags::{self, BitFlagsAIR}, + dummy_air::{self, DummyAIR}, +}; +use crate::proof::options::ProofOptions; +use crate::proof::stark::{MainTraceOpening, MultiProof}; +use crate::test_utils::{multi_prove_ram, multi_verify_ram, synth_main_tags}; +use crate::traits::AIR; + +type F = GoldilocksField; + +/// Build a baseline multi-proof over (DummyAIR, BitFlagsAIR). Both are +/// non-preprocessed → every main opening is `MainTraceOpening::Mmcs`. +#[allow(clippy::type_complexity)] +fn baseline_proof() -> ( + DummyAIR, + BitFlagsAIR, + MultiProof, +) { + let proof_options = ProofOptions::default_test_options(); + let air_1 = DummyAIR::new(&proof_options); + let air_2 = BitFlagsAIR::new(&proof_options); + let mut trace_1 = dummy_air::dummy_trace::(16); + let mut trace_2 = bit_flags::bit_prefix_flag_trace(32); + let air_trace_pairs: Vec<( + &dyn AIR, + &mut _, + &_, + )> = vec![ + (&air_1, &mut trace_1, &()), + (&air_2, &mut trace_2, &()), + ]; + let proof = + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + (air_1, air_2, proof) +} + +fn verify(airs: &[&dyn AIR], proof: &MultiProof) -> bool { + multi_verify_ram( + airs, + proof, + &mut DefaultTranscript::::new(&[]), + &FieldElement::zero(), + ) +} + +/// First-iota opening for the first table in the multi-proof, in the Mmcs +/// variant. Helper for tests that need a mutable handle into the per-query +/// MMCS opening fields. +fn first_mmcs_opening_mut( + proof: &mut MultiProof, +) -> &mut MainTraceOpening { + &mut proof.proofs[0].deep_poly_openings[0].main_trace_polys +} + +#[test_log::test] +fn baseline_two_table_proof_verifies() { + let (air_1, air_2, proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + assert!(verify(&airs, &proof), "baseline proof must verify"); +} + +#[test_log::test] +fn tampered_main_mmcs_root_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + proof.main_mmcs_root[0] ^= 1; + assert!( + !verify(&airs, &proof), + "tampered main MMCS root must be rejected" + ); +} + +#[test_log::test] +fn tampered_main_mmcs_spec_height_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + let height = &mut proof.main_mmcs_spec[0].1; + *height /= 2; + assert!( + !verify(&airs, &proof), + "spec height mismatch must be rejected" + ); +} + +#[test_log::test] +fn tampered_main_mmcs_spec_tag_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + proof.main_mmcs_spec[0].0 = MatrixTag::new([0xFF; 8]); + assert!( + !verify(&airs, &proof), + "spec tag mismatch must be rejected" + ); +} + +#[test_log::test] +fn tampered_mmcs_opening_leaf_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + match first_mmcs_opening_mut(&mut proof) { + MainTraceOpening::Mmcs { mmcs_opening, .. } => { + mmcs_opening.matrix_leaves[0].1[0] ^= 1; + } + MainTraceOpening::Tree(_) => panic!("baseline must produce Mmcs variant"), + } + assert!( + !verify(&airs, &proof), + "tampered matrix-leaf digest must be rejected" + ); +} + +#[test_log::test] +fn tampered_mmcs_opening_leaf_tag_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + match first_mmcs_opening_mut(&mut proof) { + MainTraceOpening::Mmcs { mmcs_opening, .. } => { + mmcs_opening.matrix_leaves[0].0 = MatrixTag::new([0xCC; 8]); + } + MainTraceOpening::Tree(_) => panic!("baseline must produce Mmcs variant"), + } + assert!( + !verify(&airs, &proof), + "tampered matrix-leaf tag must be rejected" + ); +} + +#[test_log::test] +fn tampered_mmcs_opening_global_index_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + match first_mmcs_opening_mut(&mut proof) { + MainTraceOpening::Mmcs { mmcs_opening, .. } => { + mmcs_opening.global_index ^= 0b10; + } + MainTraceOpening::Tree(_) => panic!("baseline must produce Mmcs variant"), + } + assert!( + !verify(&airs, &proof), + "tampered MMCS global_index must be rejected" + ); +} + +#[test_log::test] +fn tampered_mmcs_opening_sibling_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + match first_mmcs_opening_mut(&mut proof) { + MainTraceOpening::Mmcs { mmcs_opening, .. } => { + assert!(!mmcs_opening.siblings.is_empty()); + mmcs_opening.siblings[0][0] ^= 1; + } + MainTraceOpening::Tree(_) => panic!("baseline must produce Mmcs variant"), + } + assert!( + !verify(&airs, &proof), + "tampered MMCS sibling must be rejected" + ); +} + +#[test_log::test] +fn tampered_evaluations_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + match first_mmcs_opening_mut(&mut proof) { + MainTraceOpening::Mmcs { evaluations, .. } => { + assert!(!evaluations.is_empty()); + evaluations[0] += FieldElement::::one(); + } + MainTraceOpening::Tree(_) => panic!("baseline must produce Mmcs variant"), + } + assert!( + !verify(&airs, &proof), + "tampered row evaluations must be rejected (rehash mismatch)" + ); +} + +#[test_log::test] +fn swapped_main_tags_at_verifier_rejected() { + // The verifier reproduces `main_tags` from `synth_main_tags(num_airs)` + // inside `multi_verify_ram`. To simulate a verifier that "lies" about + // tag ordering we call `multi_verify` directly with a permuted slice. + use crate::verifier::{IsStarkVerifier, Verifier}; + let (air_1, air_2, proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + + // Sanity: with the correct (synth) tag order it passes. + let correct = synth_main_tags(airs.len()); + assert!( + Verifier::multi_verify( + &airs, + &proof, + &correct, + &mut DefaultTranscript::::new(&[]), + &FieldElement::zero(), + ), + "baseline must verify with correct tags" + ); + + // Swap the two tags — the spec sort order is now wrong relative to the + // prover's commitments, so the spec match check must reject. + let mut swapped = correct.clone(); + swapped.swap(0, 1); + assert!( + !Verifier::multi_verify( + &airs, + &proof, + &swapped, + &mut DefaultTranscript::::new(&[]), + &FieldElement::zero(), + ), + "swapped main_tags must be rejected" + ); +} diff --git a/crypto/stark/src/tests/mod.rs b/crypto/stark/src/tests/mod.rs index bc80e522e..f44c65ee9 100644 --- a/crypto/stark/src/tests/mod.rs +++ b/crypto/stark/src/tests/mod.rs @@ -2,6 +2,7 @@ pub mod air_tests; pub mod bus_tests; pub mod domain_cache_stats; pub mod fri_tests; +pub mod mmcs_soundness_tests; pub mod proof_options_tests; pub mod prove_verify_roundtrip_tests; pub mod prover_tests; From 9f6abba1050c4ec03425608e8c9450eaaee9e7a9 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 14:48:51 -0300 Subject: [PATCH 11/21] feat(cli): `proof-size` subcommand with per-section byte breakdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `cli proof-size ` which generates (or loads via `--proof`) a VmProof and reports the serialized byte size, broken down by component across every sub-proof. Intended for CI to track proof-size regressions and improvements (e.g. the streaming MMCS migration that just landed). Usage: cli proof-size # human-readable table cli proof-size --json # machine-readable cli proof-size --proof bundle.bin # skip the re-prove cli proof-size --private-input file # stdin if needed Sections reported (summed across all sub-proofs): - main_mmcs_root / main_mmcs_spec (multi-proof header) - per_table_main_merkle_root (preprocessed only) - per_table_precomputed_merkle_root - per_table_aux_merkle_root - deep_poly_openings.main_trace_polys (MMCS vs Tree opens) - deep_poly_openings.{precomputed,aux,composition_poly} - fri_layers_merkle_roots / fri_query_list (FRI; usually dominant) - trace_ood_evaluations / composition_poly_parts_ood_evaluation - bus_public_inputs - other (bundle delta — headers, public_inputs, nonce, ...) Encoding is bincode v1, matching `cli prove`'s output format so saved bundles round-trip 1:1. Cargo.lock updated only for the new `serde` + `serde_json` deps in `bin/cli`. No core dep changes. --- Cargo.lock | 2 + bin/cli/Cargo.toml | 2 + bin/cli/src/main.rs | 223 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 227 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 8fff60dcf..30a3adde6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -570,6 +570,8 @@ dependencies = [ "env_logger", "executor", "lambda-vm-prover", + "serde", + "serde_json", "stark", "tikv-jemalloc-ctl", "tikv-jemallocator", diff --git a/bin/cli/Cargo.toml b/bin/cli/Cargo.toml index 87bb1c8fc..45195a28c 100644 --- a/bin/cli/Cargo.toml +++ b/bin/cli/Cargo.toml @@ -10,6 +10,8 @@ prover = { path = "../../prover", package = "lambda-vm-prover" } stark = { path = "../../crypto/stark" } clap = { version = "4.3.10", features = ["derive"] } bincode = "1" +serde = { version = "1", features = ["derive"] } +serde_json = "1" tikv-jemallocator = "0.6" tikv-jemalloc-ctl = { version = "0.6", features = ["stats"], optional = true } env_logger = "0.11" diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index bdcea9518..a8ba411d5 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -171,6 +171,31 @@ enum Commands { #[arg(long, value_hint = ValueHint::FilePath)] private_input: Option, }, + + /// Generate a proof and report its serialized byte size, broken down + /// by component (trace openings, FRI, OOD evals, MMCS metadata, ...). + /// Intended for CI to track proof-size regressions / improvements + /// (e.g. the streaming MMCS migration). + ProofSize { + /// Path to the ELF file + #[arg(value_parser, value_hint = ValueHint::FilePath)] + elf: PathBuf, + + /// Optional path to a pre-generated proof bundle. When supplied, + /// the ELF is not re-proven; the file is decoded and its sizes + /// reported directly. The ELF is still needed to bind the proof + /// to the program statement. + #[arg(long, value_hint = ValueHint::FilePath)] + proof: Option, + + /// Path to the private input file + #[arg(long, value_hint = ValueHint::FilePath)] + private_input: Option, + + /// Emit machine-readable JSON instead of a human-readable table. + #[arg(long)] + json: bool, + }, } fn main() -> ExitCode { @@ -199,6 +224,12 @@ fn main() -> ExitCode { time, } => cmd_verify(proof, elf, blowup, time), Commands::CountElements { elf, private_input } => cmd_count_elements(elf, private_input), + Commands::ProofSize { + elf, + proof, + private_input, + json, + } => cmd_proof_size(elf, proof, private_input, json), } } @@ -554,3 +585,195 @@ fn cmd_count_elements(elf_path: PathBuf, private_input_path: Option) -> } } } + +// ============================================================================= +// proof-size: serialize a VmProof and report a per-section byte breakdown. +// ============================================================================= + +/// One row of the proof-size report. `bytes` are the serialized length of +/// the corresponding piece of the proof under the same encoder used for the +/// full bundle (bincode v1). +#[derive(Debug, Clone, serde::Serialize)] +struct ProofSizeEntry { + section: &'static str, + bytes: usize, +} + +fn ser_len(value: &T) -> usize { + // bincode v1 mirrors the encoding used by VmProof callers (bin/cli prove + // and prover tests), so per-section sums add up to the total bundle. + bincode::serialize(value).map(|v| v.len()).unwrap_or(0) +} + +fn cmd_proof_size( + elf_path: PathBuf, + proof_path: Option, + private_input_path: Option, + json: bool, +) -> ExitCode { + let elf_data = match std::fs::read(&elf_path) { + Ok(data) => data, + Err(e) => { + eprintln!("Failed to read ELF file: {}", e); + return ExitCode::FAILURE; + } + }; + + let vm_proof: VmProof = if let Some(path) = proof_path { + let bytes = match std::fs::read(&path) { + Ok(b) => b, + Err(e) => { + eprintln!("Failed to read proof file {}: {}", path.display(), e); + return ExitCode::FAILURE; + } + }; + match bincode::deserialize(&bytes) { + Ok(p) => p, + Err(e) => { + eprintln!("Failed to decode proof bundle: {}", e); + return ExitCode::FAILURE; + } + } + } else { + let private_inputs = match read_private_input(private_input_path.as_ref()) { + Ok(v) => v, + Err(e) => { + eprintln!("{e}"); + return ExitCode::FAILURE; + } + }; + eprintln!("Generating proof to measure..."); + match prover::prove_with_inputs(&elf_data, &private_inputs) { + Ok(p) => p, + Err(e) => { + eprintln!("Proving failed: {:?}", e); + return ExitCode::FAILURE; + } + } + }; + + let total = ser_len(&vm_proof); + let multi_proof_bytes = ser_len(&vm_proof.proof); + let main_mmcs_root_bytes = ser_len(&vm_proof.proof.main_mmcs_root); + let main_mmcs_spec_bytes = ser_len(&vm_proof.proof.main_mmcs_spec); + + // Sum per-section across every sub-proof so a single number captures the + // contribution of, e.g., "all FRI query lists across all tables". + let mut s_main_trace_openings = 0usize; + let mut s_precomputed_trace_openings = 0usize; + let mut s_aux_trace_openings = 0usize; + let mut s_composition_openings = 0usize; + let mut s_fri_query_list = 0usize; + let mut s_fri_layers_roots = 0usize; + let mut s_trace_ood = 0usize; + let mut s_composition_ood = 0usize; + let mut s_per_table_main_root = 0usize; + let mut s_aux_root = 0usize; + let mut s_precomputed_root = 0usize; + let mut s_bus_public_inputs = 0usize; + let s_other; + + for proof in &vm_proof.proof.proofs { + s_per_table_main_root += ser_len(&proof.lde_trace_main_merkle_root); + s_aux_root += ser_len(&proof.lde_trace_aux_merkle_root); + s_precomputed_root += ser_len(&proof.lde_trace_precomputed_merkle_root); + s_trace_ood += ser_len(&proof.trace_ood_evaluations); + s_composition_ood += ser_len(&proof.composition_poly_parts_ood_evaluation); + s_fri_query_list += ser_len(&proof.query_list); + s_fri_layers_roots += ser_len(&proof.fri_layers_merkle_roots); + s_bus_public_inputs += ser_len(&proof.bus_public_inputs); + + for opening in &proof.deep_poly_openings { + s_main_trace_openings += ser_len(&opening.main_trace_polys); + s_precomputed_trace_openings += ser_len(&opening.precomputed_trace_polys); + s_aux_trace_openings += ser_len(&opening.aux_trace_polys); + s_composition_openings += ser_len(&opening.composition_poly); + } + } + + // Anything not captured above (composition_poly_root, fri_last_value, + // nonce, public_inputs, trace_length, headers...). Calculate as the + // bundle delta so the breakdown still sums to ~total. + let accounted = main_mmcs_root_bytes + + main_mmcs_spec_bytes + + s_main_trace_openings + + s_precomputed_trace_openings + + s_aux_trace_openings + + s_composition_openings + + s_fri_query_list + + s_fri_layers_roots + + s_trace_ood + + s_composition_ood + + s_per_table_main_root + + s_aux_root + + s_precomputed_root + + s_bus_public_inputs; + s_other = multi_proof_bytes.saturating_sub(accounted); + + let entries: Vec = vec![ + ProofSizeEntry { section: "main_mmcs_root", bytes: main_mmcs_root_bytes }, + ProofSizeEntry { section: "main_mmcs_spec", bytes: main_mmcs_spec_bytes }, + ProofSizeEntry { section: "per_table_main_merkle_root (preprocessed)", bytes: s_per_table_main_root }, + ProofSizeEntry { section: "per_table_precomputed_merkle_root", bytes: s_precomputed_root }, + ProofSizeEntry { section: "per_table_aux_merkle_root", bytes: s_aux_root }, + ProofSizeEntry { section: "deep_poly_openings.main_trace_polys", bytes: s_main_trace_openings }, + ProofSizeEntry { section: "deep_poly_openings.precomputed_trace_polys", bytes: s_precomputed_trace_openings }, + ProofSizeEntry { section: "deep_poly_openings.aux_trace_polys", bytes: s_aux_trace_openings }, + ProofSizeEntry { section: "deep_poly_openings.composition_poly", bytes: s_composition_openings }, + ProofSizeEntry { section: "fri_layers_merkle_roots", bytes: s_fri_layers_roots }, + ProofSizeEntry { section: "fri_query_list", bytes: s_fri_query_list }, + ProofSizeEntry { section: "trace_ood_evaluations", bytes: s_trace_ood }, + ProofSizeEntry { section: "composition_poly_parts_ood_evaluation", bytes: s_composition_ood }, + ProofSizeEntry { section: "bus_public_inputs", bytes: s_bus_public_inputs }, + ProofSizeEntry { section: "other (headers / public_inputs / nonce / ...)", bytes: s_other }, + ]; + + if json { + #[derive(serde::Serialize)] + struct Report<'a> { + elf: String, + total_vm_proof_bytes: usize, + multi_proof_bytes: usize, + sub_proof_count: usize, + main_mmcs_spec_entries: usize, + sections: &'a [ProofSizeEntry], + } + let report = Report { + elf: elf_path.display().to_string(), + total_vm_proof_bytes: total, + multi_proof_bytes, + sub_proof_count: vm_proof.proof.proofs.len(), + main_mmcs_spec_entries: vm_proof.proof.main_mmcs_spec.len(), + sections: &entries, + }; + match serde_json::to_string_pretty(&report) { + Ok(s) => println!("{s}"), + Err(e) => { + eprintln!("Failed to encode JSON: {}", e); + return ExitCode::FAILURE; + } + } + } else { + println!(); + println!("== VmProof size report =="); + println!("ELF: {}", elf_path.display()); + println!("Total VmProof: {:>10} bytes", total); + println!("MultiProof only: {:>10} bytes", multi_proof_bytes); + println!("Sub-proofs: {:>10}", vm_proof.proof.proofs.len()); + println!("MMCS spec entries: {:>10}", vm_proof.proof.main_mmcs_spec.len()); + println!(); + println!("{:<48}{:>14}{:>10}", "section", "bytes", "% of total"); + println!("{}", "-".repeat(72)); + let denom = total.max(1) as f64; + for e in &entries { + println!( + "{:<48}{:>14}{:>9.2}%", + e.section, + e.bytes, + (e.bytes as f64) * 100.0 / denom + ); + } + } + + ExitCode::SUCCESS +} From dea5289652a14ee6668e8d6d970d618b34cab476 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 14:58:48 -0300 Subject: [PATCH 12/21] feat(cli): `proof-size-diff` subcommand + stable `ProofSizeReport` JSON MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a pure post-processing subcommand for CI to render proof-size deltas between two `proof-size --json` reports. Mirrors the `tooling/loc` workflow: prove on the baseline and on the PR, then diff the JSONs to produce a comment-ready table. cli proof-size base.elf --json > base.json cli proof-size pr.elf --json > pr.json cli proof-size-diff base.json pr.json --format github > comment.md Formats: - `text` — plain aligned table for terminal / logs - `github` — Markdown with a collapsible
per-section block; the headline row stays visible in the PR comment so reviewers see the total delta at a glance - `slack` — Slack-flavoured Markdown (code fence for the breakdown) Per-section diff handles the typical edge cases: - Section present in both → bytes delta + percentage - Section new in current → `+N (new)` - Section dropped in current → `-N (gone)` - Section order follows `current`, then prev-only at the end (lossless) The previously-anonymous `Report` struct in `cmd_proof_size` is hoisted to a public-shape `ProofSizeReport { elf, total_vm_proof_bytes, multi_proof_bytes, sub_proof_count, main_mmcs_spec_entries, sections }` so the JSON schema is stable enough for CI to depend on. `section` is now `String` (was `&'static str`) so the same struct round-trips through deserialization. 3 unit tests in `proof_size_diff_tests` cover: - Headline + per-section deltas in text format - New-section + removed-section handling - GitHub format collapsible block + percentage rendering Build clean, 3/3 tests green, smoke-tested both formats with fake JSONs. --- bin/cli/src/main.rs | 279 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 251 insertions(+), 28 deletions(-) diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index a8ba411d5..9e4c95ad4 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -196,6 +196,25 @@ enum Commands { #[arg(long)] json: bool, }, + + /// Diff two `proof-size --json` reports and emit a comparison suitable + /// for posting to a PR / Slack channel. Pure post-processing — does not + /// run the prover. Designed to mirror the `tooling/loc` workflow: + /// cli proof-size base.elf --json > base.json + /// cli proof-size pr.elf --json > pr.json + /// cli proof-size-diff base.json pr.json --format github > comment.md + ProofSizeDiff { + /// JSON report from the baseline (e.g. main) build. + #[arg(value_hint = ValueHint::FilePath)] + previous: PathBuf, + /// JSON report from the candidate (e.g. PR) build. + #[arg(value_hint = ValueHint::FilePath)] + current: PathBuf, + /// Output format: `github` (markdown table for PR comments), + /// `slack` (Slack-flavoured markdown), or `text` (plain table). + #[arg(long, default_value = "text")] + format: String, + }, } fn main() -> ExitCode { @@ -230,6 +249,11 @@ fn main() -> ExitCode { private_input, json, } => cmd_proof_size(elf, proof, private_input, json), + Commands::ProofSizeDiff { + previous, + current, + format, + } => cmd_proof_size_diff(previous, current, &format), } } @@ -593,12 +617,24 @@ fn cmd_count_elements(elf_path: PathBuf, private_input_path: Option) -> /// One row of the proof-size report. `bytes` are the serialized length of /// the corresponding piece of the proof under the same encoder used for the /// full bundle (bincode v1). -#[derive(Debug, Clone, serde::Serialize)] +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] struct ProofSizeEntry { - section: &'static str, + section: String, bytes: usize, } +/// Top-level JSON shape emitted by `cli proof-size --json` and consumed by +/// `cli proof-size-diff`. Stable enough for CI to depend on. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +struct ProofSizeReport { + elf: String, + total_vm_proof_bytes: usize, + multi_proof_bytes: usize, + sub_proof_count: usize, + main_mmcs_spec_entries: usize, + sections: Vec, +} + fn ser_len(value: &T) -> usize { // bincode v1 mirrors the encoding used by VmProof callers (bin/cli prove // and prover tests), so per-section sums add up to the total bundle. @@ -711,40 +747,31 @@ fn cmd_proof_size( s_other = multi_proof_bytes.saturating_sub(accounted); let entries: Vec = vec![ - ProofSizeEntry { section: "main_mmcs_root", bytes: main_mmcs_root_bytes }, - ProofSizeEntry { section: "main_mmcs_spec", bytes: main_mmcs_spec_bytes }, - ProofSizeEntry { section: "per_table_main_merkle_root (preprocessed)", bytes: s_per_table_main_root }, - ProofSizeEntry { section: "per_table_precomputed_merkle_root", bytes: s_precomputed_root }, - ProofSizeEntry { section: "per_table_aux_merkle_root", bytes: s_aux_root }, - ProofSizeEntry { section: "deep_poly_openings.main_trace_polys", bytes: s_main_trace_openings }, - ProofSizeEntry { section: "deep_poly_openings.precomputed_trace_polys", bytes: s_precomputed_trace_openings }, - ProofSizeEntry { section: "deep_poly_openings.aux_trace_polys", bytes: s_aux_trace_openings }, - ProofSizeEntry { section: "deep_poly_openings.composition_poly", bytes: s_composition_openings }, - ProofSizeEntry { section: "fri_layers_merkle_roots", bytes: s_fri_layers_roots }, - ProofSizeEntry { section: "fri_query_list", bytes: s_fri_query_list }, - ProofSizeEntry { section: "trace_ood_evaluations", bytes: s_trace_ood }, - ProofSizeEntry { section: "composition_poly_parts_ood_evaluation", bytes: s_composition_ood }, - ProofSizeEntry { section: "bus_public_inputs", bytes: s_bus_public_inputs }, - ProofSizeEntry { section: "other (headers / public_inputs / nonce / ...)", bytes: s_other }, + ProofSizeEntry { section: "main_mmcs_root".into(), bytes: main_mmcs_root_bytes }, + ProofSizeEntry { section: "main_mmcs_spec".into(), bytes: main_mmcs_spec_bytes }, + ProofSizeEntry { section: "per_table_main_merkle_root (preprocessed)".into(), bytes: s_per_table_main_root }, + ProofSizeEntry { section: "per_table_precomputed_merkle_root".into(), bytes: s_precomputed_root }, + ProofSizeEntry { section: "per_table_aux_merkle_root".into(), bytes: s_aux_root }, + ProofSizeEntry { section: "deep_poly_openings.main_trace_polys".into(), bytes: s_main_trace_openings }, + ProofSizeEntry { section: "deep_poly_openings.precomputed_trace_polys".into(), bytes: s_precomputed_trace_openings }, + ProofSizeEntry { section: "deep_poly_openings.aux_trace_polys".into(), bytes: s_aux_trace_openings }, + ProofSizeEntry { section: "deep_poly_openings.composition_poly".into(), bytes: s_composition_openings }, + ProofSizeEntry { section: "fri_layers_merkle_roots".into(), bytes: s_fri_layers_roots }, + ProofSizeEntry { section: "fri_query_list".into(), bytes: s_fri_query_list }, + ProofSizeEntry { section: "trace_ood_evaluations".into(), bytes: s_trace_ood }, + ProofSizeEntry { section: "composition_poly_parts_ood_evaluation".into(), bytes: s_composition_ood }, + ProofSizeEntry { section: "bus_public_inputs".into(), bytes: s_bus_public_inputs }, + ProofSizeEntry { section: "other (headers / public_inputs / nonce / ...)".into(), bytes: s_other }, ]; if json { - #[derive(serde::Serialize)] - struct Report<'a> { - elf: String, - total_vm_proof_bytes: usize, - multi_proof_bytes: usize, - sub_proof_count: usize, - main_mmcs_spec_entries: usize, - sections: &'a [ProofSizeEntry], - } - let report = Report { + let report = ProofSizeReport { elf: elf_path.display().to_string(), total_vm_proof_bytes: total, multi_proof_bytes, sub_proof_count: vm_proof.proof.proofs.len(), main_mmcs_spec_entries: vm_proof.proof.main_mmcs_spec.len(), - sections: &entries, + sections: entries.clone(), }; match serde_json::to_string_pretty(&report) { Ok(s) => println!("{s}"), @@ -777,3 +804,199 @@ fn cmd_proof_size( ExitCode::SUCCESS } + +// ============================================================================= +// proof-size-diff: read two ProofSizeReport JSONs and emit a comparison. +// ============================================================================= + +fn cmd_proof_size_diff(previous: PathBuf, current: PathBuf, format: &str) -> ExitCode { + let prev: ProofSizeReport = match load_report(&previous) { + Ok(r) => r, + Err(e) => { + eprintln!("Failed to load previous report ({}): {}", previous.display(), e); + return ExitCode::FAILURE; + } + }; + let curr: ProofSizeReport = match load_report(¤t) { + Ok(r) => r, + Err(e) => { + eprintln!("Failed to load current report ({}): {}", current.display(), e); + return ExitCode::FAILURE; + } + }; + + let rendered = match format { + "github" => render_github(&prev, &curr), + "slack" => render_slack(&prev, &curr), + "text" | "txt" => render_text(&prev, &curr), + other => { + eprintln!("Unknown --format value: {other:?}. Try github | slack | text."); + return ExitCode::FAILURE; + } + }; + println!("{rendered}"); + ExitCode::SUCCESS +} + +fn load_report(path: &PathBuf) -> Result { + let s = std::fs::read_to_string(path).map_err(|e| e.to_string())?; + serde_json::from_str(&s).map_err(|e| e.to_string()) +} + +/// Pair sections from two reports by name. The order returned mirrors the +/// section order of `curr`; any section present in `prev` but missing in +/// `curr` is appended at the end so the diff is lossless. +fn paired_sections<'a>( + prev: &'a ProofSizeReport, + curr: &'a ProofSizeReport, +) -> Vec<(String, Option, Option)> { + let mut out: Vec<(String, Option, Option)> = Vec::new(); + for c in &curr.sections { + let p = prev.sections.iter().find(|p| p.section == c.section); + out.push((c.section.clone(), p.map(|p| p.bytes), Some(c.bytes))); + } + for p in &prev.sections { + if curr.sections.iter().all(|c| c.section != p.section) { + out.push((p.section.clone(), Some(p.bytes), None)); + } + } + out +} + +fn fmt_delta(prev: Option, curr: Option) -> String { + match (prev, curr) { + (Some(p), Some(c)) => { + let d = c as i64 - p as i64; + let pct = if p == 0 { 0.0 } else { d as f64 * 100.0 / p as f64 }; + format!("{:+} ({:+.2}%)", d, pct) + } + (None, Some(c)) => format!("+{} (new)", c), + (Some(p), None) => format!("-{} (gone)", p), + (None, None) => "—".to_string(), + } +} + +fn fmt_total_delta(prev: usize, curr: usize) -> String { + let d = curr as i64 - prev as i64; + let pct = if prev == 0 { 0.0 } else { d as f64 * 100.0 / prev as f64 }; + format!("{:+} ({:+.2}%)", d, pct) +} + +fn render_text(prev: &ProofSizeReport, curr: &ProofSizeReport) -> String { + let mut s = String::new(); + s.push_str("== Proof size diff ==\n"); + s.push_str(&format!("previous: {} ({} bytes)\n", prev.elf, prev.total_vm_proof_bytes)); + s.push_str(&format!("current: {} ({} bytes)\n", curr.elf, curr.total_vm_proof_bytes)); + s.push_str(&format!( + "total delta: {}\n\n", + fmt_total_delta(prev.total_vm_proof_bytes, curr.total_vm_proof_bytes) + )); + s.push_str(&format!("{:<48}{:>12}{:>12}{:>22}\n", "section", "previous", "current", "delta")); + s.push_str(&format!("{}\n", "-".repeat(94))); + for (section, p, c) in paired_sections(prev, curr) { + let p_str = p.map(|v| v.to_string()).unwrap_or_else(|| "—".into()); + let c_str = c.map(|v| v.to_string()).unwrap_or_else(|| "—".into()); + s.push_str(&format!("{:<48}{:>12}{:>12}{:>22}\n", section, p_str, c_str, fmt_delta(p, c))); + } + s +} + +fn render_github(prev: &ProofSizeReport, curr: &ProofSizeReport) -> String { + let mut s = String::new(); + s.push_str("### 📦 Proof size diff\n\n"); + s.push_str(&format!( + "| | bytes |\n|---|---:|\n| previous (`{}`) | {} |\n| current (`{}`) | {} |\n| **total delta** | **{}** |\n\n", + prev.elf, + prev.total_vm_proof_bytes, + curr.elf, + curr.total_vm_proof_bytes, + fmt_total_delta(prev.total_vm_proof_bytes, curr.total_vm_proof_bytes), + )); + s.push_str("
Per-section breakdown\n\n"); + s.push_str("| section | previous | current | delta |\n|---|---:|---:|---:|\n"); + for (section, p, c) in paired_sections(prev, curr) { + let p_str = p.map(|v| v.to_string()).unwrap_or_else(|| "—".into()); + let c_str = c.map(|v| v.to_string()).unwrap_or_else(|| "—".into()); + s.push_str(&format!("| `{}` | {} | {} | {} |\n", section, p_str, c_str, fmt_delta(p, c))); + } + s.push_str("\n
\n"); + s +} + +fn render_slack(prev: &ProofSizeReport, curr: &ProofSizeReport) -> String { + let mut s = String::new(); + s.push_str("*Proof size diff*\n"); + s.push_str(&format!( + "previous (`{}`): {} bytes\n", + prev.elf, prev.total_vm_proof_bytes + )); + s.push_str(&format!( + "current (`{}`): {} bytes\n", + curr.elf, curr.total_vm_proof_bytes + )); + s.push_str(&format!( + "*total delta*: {}\n\n```\n", + fmt_total_delta(prev.total_vm_proof_bytes, curr.total_vm_proof_bytes) + )); + s.push_str(&format!("{:<48}{:>12}{:>12}{:>22}\n", "section", "previous", "current", "delta")); + for (section, p, c) in paired_sections(prev, curr) { + let p_str = p.map(|v| v.to_string()).unwrap_or_else(|| "—".into()); + let c_str = c.map(|v| v.to_string()).unwrap_or_else(|| "—".into()); + s.push_str(&format!("{:<48}{:>12}{:>12}{:>22}\n", section, p_str, c_str, fmt_delta(p, c))); + } + s.push_str("```\n"); + s +} + +#[cfg(test)] +mod proof_size_diff_tests { + use super::*; + + fn r(elf: &str, total: usize, sections: &[(&str, usize)]) -> ProofSizeReport { + ProofSizeReport { + elf: elf.into(), + total_vm_proof_bytes: total, + multi_proof_bytes: total, + sub_proof_count: 1, + main_mmcs_spec_entries: 0, + sections: sections + .iter() + .map(|(s, b)| ProofSizeEntry { section: (*s).into(), bytes: *b }) + .collect(), + } + } + + #[test] + fn text_diff_shows_total_and_per_section_delta() { + let prev = r("base.elf", 100, &[("a", 60), ("b", 40)]); + let curr = r("pr.elf", 110, &[("a", 50), ("b", 60)]); + let out = render_text(&prev, &curr); + assert!(out.contains("total delta: +10")); + assert!(out.contains("-10")); + assert!(out.contains("+20")); + } + + #[test] + fn diff_handles_new_and_removed_sections() { + let prev = r("base.elf", 50, &[("a", 30), ("gone", 20)]); + let curr = r("pr.elf", 60, &[("a", 30), ("new", 30)]); + let pairs = paired_sections(&prev, &curr); + // Order: current sections first, then prev-only. + assert_eq!(pairs[0].0, "a"); + assert_eq!(pairs[1].0, "new"); + assert_eq!(pairs[2].0, "gone"); + let text = render_text(&prev, &curr); + assert!(text.contains("(new)")); + assert!(text.contains("(gone)")); + } + + #[test] + fn github_format_has_collapsible_section() { + let prev = r("base.elf", 100, &[("a", 100)]); + let curr = r("pr.elf", 90, &[("a", 90)]); + let out = render_github(&prev, &curr); + assert!(out.contains("### 📦 Proof size diff")); + assert!(out.contains("
")); + assert!(out.contains("-10 (-10.00%)")); + } +} From 3f6e5a33069c80ff91bc679c54fefd82ecd6922e Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 15:29:29 -0300 Subject: [PATCH 13/21] feat(stark/mmcs): wire AUX trace under a shared MMCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror of the main-trace MMCS C1 work, applied to the auxiliary trace. Pulls per-table aux Merkle roots out of the per-table forked transcripts into a single shared aux MMCS root absorbed into the SHARED transcript BEFORE per-table forking — every fork inherits the same aux binding identically, and the proof carries one aux root instead of N. Domain separation: - `LEAF_DOMAIN_TAG_AUX = "LAMBDAVM_AUX_MMCS_LEAF_V1"` (alongside the existing main tag, now also aliased as `LEAF_DOMAIN_TAG_MAIN`). - `hash_tagged_row_bytes_aux` / `hash_tagged_row_aux` helpers use it. - An aux-MMCS opening cannot authenticate a main leaf (or vice versa); a `(tag, row)` pair under the two domains produces distinct digests. Pinned by a new test in `mmcs_leaf::tests`. Architecture: - `MainTraceOpening` gains a sibling `AuxTraceOpening` enum (Mmcs variant only — there is no preprocessed-equivalent for aux). - `MainCommit` gains a sibling `AuxCommit::Shared { mmcs (Arc), tag, padded_height }`. `Round1.aux: Option>` (`None` when an AIR has no aux trace). - `DeepPolynomialOpening.aux_trace_polys: Option>` (the old `PolynomialOpenings` shape is gone). - `StarkProof` drops `lde_trace_aux_merkle_root` entirely. - `MultiProof` gains: - `aux_mmcs_root: Option` — `None` when no AIR has aux. - `aux_mmcs_spec: Vec<(MatrixTag, usize)>` — filtered to has-aux tables. Phase C absorb order (prover + verifier match exactly): 1. Phase A: main MMCS root absorb (unchanged). 2. Phase B: sample LogUp challenges (unchanged). 3. NEW: build aux LDEs + tagged leaves for tables with aux, build shared aux MMCS, absorb its root into the SHARED transcript. 4. Fork per-table. 5. Per-table: bind `bus_public_inputs.table_contribution` (unchanged), run rounds 2-4 (unchanged). No per-table aux root absorb. Per-query open / verify path: - Prover: `aux_commit.mmcs.open((iota*2) << shift)` and `mmcs.open((iota*2+1) << shift)`, producing `AuxTraceOpening::Mmcs`. - Verifier: `verify_aux_mmcs_pair_inner` rehashes evaluations with the AUX domain, compares against `mmcs_opening.matrix_leaves[table_idx]`, cross-checks `global_index`, and authenticates against root + spec. - `verify_trace_openings` dispatches: `Some(_)` requires aux MMCS root to be present; `(None, _)` is fine (table has no aux); `(Some, None)` rejects (proof claims an opening but no MMCS exists). Verifier spec validation: - Reproduces `expected_aux_spec` from `airs.filter(has_aux_trace).map((tag, lde_size))` in spec-fixed order, sorts by `(height desc, tag asc)` to match `MmcsBuilder::finalize`, and rejects on any mismatch with the proof-supplied `aux_mmcs_spec`. Dead code cleanup: - Removed `TableCommit` entirely. Both main and aux now go through their own enum-typed commits; nothing else used it. Tests: stark 142/142 green (130 prior + the 10 main-MMCS soundness + 2 new aux leaf-hash tests). The 77 prove_elfs failures in lambda-vm-prover predate this work (UnknownSyscall(5)). --- crypto/stark/src/mmcs_leaf.rs | 88 ++++++++- crypto/stark/src/proof/stark.rs | 49 ++++- crypto/stark/src/prover.rs | 326 ++++++++++++++++++++++---------- crypto/stark/src/verifier.rs | 168 ++++++++++++---- 4 files changed, 491 insertions(+), 140 deletions(-) diff --git a/crypto/stark/src/mmcs_leaf.rs b/crypto/stark/src/mmcs_leaf.rs index 488a937af..447f9650f 100644 --- a/crypto/stark/src/mmcs_leaf.rs +++ b/crypto/stark/src/mmcs_leaf.rs @@ -31,6 +31,18 @@ use crate::config::Commitment; /// any encoding change so old proofs cannot be silently re-interpreted. pub const LEAF_DOMAIN_TAG: &[u8] = b"LAMBDAVM_MAIN_MMCS_LEAF_V1"; +/// Aliased name for `LEAF_DOMAIN_TAG`. Use this in new code to make the +/// intent explicit when an MMCS-specific tag is needed alongside the aux +/// tag below. +pub const LEAF_DOMAIN_TAG_MAIN: &[u8] = LEAF_DOMAIN_TAG; + +/// Versioned domain separator for AUX-trace MMCS leaves. Distinct from +/// `LEAF_DOMAIN_TAG_MAIN` so that an aux leaf and a main leaf with the +/// same `(MatrixTag, row_bytes)` produce different digests — i.e. neither +/// MMCS opening can authenticate a leaf that was committed against the +/// other. +pub const LEAF_DOMAIN_TAG_AUX: &[u8] = b"LAMBDAVM_AUX_MMCS_LEAF_V1"; + /// Synthesize `n` distinct [`MatrixTag`]s derived from positional index. /// Useful for generic stark tests where the caller does not own a stable /// chip-type assignment. Production code in lambda-vm uses @@ -47,22 +59,58 @@ pub fn synth_main_tags_for(slice: &[T]) -> Vec { synth_main_tags(slice.len()) } -/// Hash one row's worth of column bytes into a leaf digest using the -/// canonical tagged format. `row_bytes_be` is the concatenation of every -/// committed column's element written big-endian, in column order. +/// Hash one row's worth of column bytes into a MAIN-trace MMCS leaf digest. +/// `row_bytes_be` is the concatenation of every committed column's element +/// written big-endian, in column order. #[inline] pub fn hash_tagged_row_bytes(tag: MatrixTag, row_bytes_be: &[u8]) -> Commitment { + hash_with_domain(LEAF_DOMAIN_TAG_MAIN, tag, row_bytes_be) +} + +/// Hash one row's worth of column bytes into an AUX-trace MMCS leaf digest. +/// Uses [`LEAF_DOMAIN_TAG_AUX`] so the digest cannot collide with a +/// main-trace leaf for the same `(tag, row_bytes)`. +#[inline] +pub fn hash_tagged_row_bytes_aux(tag: MatrixTag, row_bytes_be: &[u8]) -> Commitment { + hash_with_domain(LEAF_DOMAIN_TAG_AUX, tag, row_bytes_be) +} + +#[inline] +fn hash_with_domain(domain: &[u8], tag: MatrixTag, row_bytes_be: &[u8]) -> Commitment { let mut h = Keccak256::new(); - h.update(LEAF_DOMAIN_TAG); + h.update(domain); h.update(tag.0); h.update(row_bytes_be); h.finalize().into() } -/// Convenience: hash a row from individual field elements. Allocates a -/// stack-or-heap buffer for the row, suitable for verifier-side per-query +/// Convenience: hash a MAIN-trace row from individual field elements. +/// Allocates a row-sized buffer; suitable for verifier-side per-query /// re-hashing (where allocation cost is dominated by FRI work anyway). pub fn hash_tagged_row(tag: MatrixTag, row: &[FieldElement]) -> Commitment +where + E: IsField, + FieldElement: ByteConversion, +{ + hash_tagged_row_inner::(LEAF_DOMAIN_TAG_MAIN, tag, row) +} + +/// Convenience: hash an AUX-trace row from individual field elements. Same +/// allocation pattern as [`hash_tagged_row`]. +pub fn hash_tagged_row_aux(tag: MatrixTag, row: &[FieldElement]) -> Commitment +where + E: IsField, + FieldElement: ByteConversion, +{ + hash_tagged_row_inner::(LEAF_DOMAIN_TAG_AUX, tag, row) +} + +#[inline] +fn hash_tagged_row_inner( + domain: &[u8], + tag: MatrixTag, + row: &[FieldElement], +) -> Commitment where E: IsField, FieldElement: ByteConversion, @@ -72,7 +120,7 @@ where for (col_idx, fe) in row.iter().enumerate() { fe.write_bytes_be(&mut buf[col_idx * byte_len..(col_idx + 1) * byte_len]); } - hash_tagged_row_bytes(tag, &buf) + hash_with_domain(domain, tag, &buf) } #[cfg(test)] @@ -97,4 +145,30 @@ mod tests { let row_b = vec![FE::from(1u64), FE::from(3u64)]; assert_ne!(hash_tagged_row(tag, &row_a), hash_tagged_row(tag, &row_b)); } + + #[test] + fn main_and_aux_domains_separate() { + // Same (tag, row) under the two domains MUST produce distinct + // digests; otherwise an aux opening could authenticate a main leaf + // (or vice versa). + let tag = MatrixTag::new([0xAB; 8]); + let row = vec![FE::from(42u64), FE::from(7u64)]; + let main_digest = hash_tagged_row(tag, &row); + let aux_digest = hash_tagged_row_aux(tag, &row); + assert_ne!(main_digest, aux_digest); + } + + #[test] + fn aux_bytes_helper_matches_aux_element_helper() { + // The bytes-flavoured helper and the element-flavoured helper must + // agree on the same input — same domain separator, same hash. + let tag = MatrixTag::new([3; 8]); + let row = vec![FE::from(11u64), FE::from(13u64), FE::from(17u64)]; + let byte_len = ::BYTE_LEN; + let mut buf = vec![0u8; row.len() * byte_len]; + for (i, fe) in row.iter().enumerate() { + fe.write_bytes_be(&mut buf[i * byte_len..(i + 1) * byte_len]); + } + assert_eq!(hash_tagged_row_bytes_aux(tag, &buf), hash_tagged_row_aux(tag, &row)); + } } diff --git a/crypto/stark/src/proof/stark.rs b/crypto/stark/src/proof/stark.rs index 667f9f170..57b28f75c 100644 --- a/crypto/stark/src/proof/stark.rs +++ b/crypto/stark/src/proof/stark.rs @@ -57,6 +57,37 @@ impl MainTraceOpening { } } +/// Per-query aux-trace opening. Symmetric to [`MainTraceOpening`], minus +/// the `Tree` variant — every aux table that exists goes through the +/// shared aux MMCS (there's no preprocessed-equivalent for aux). +/// +/// `Option` in `DeepPolynomialOpening.aux_trace_polys` +/// carries the "this AIR has no aux trace at all" case. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[serde(bound = "")] +pub enum AuxTraceOpening { + Mmcs { + evaluations: Vec>, + evaluations_sym: Vec>, + mmcs_opening: MmcsOpening, + mmcs_opening_sym: MmcsOpening, + }, +} + +impl AuxTraceOpening { + pub fn evaluations(&self) -> &[FieldElement] { + match self { + Self::Mmcs { evaluations, .. } => evaluations, + } + } + + pub fn evaluations_sym(&self) -> &[FieldElement] { + match self { + Self::Mmcs { evaluations_sym, .. } => evaluations_sym, + } + } +} + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] #[serde(bound = "")] pub struct DeepPolynomialOpening, E: IsField> { @@ -65,7 +96,9 @@ pub struct DeepPolynomialOpening, E: IsField> { /// For preprocessed tables: openings for precomputed columns. /// These are verified against the hardcoded precomputed commitment. pub precomputed_trace_polys: Option>, - pub aux_trace_polys: Option>, + /// `None` when the AIR has no aux trace; otherwise an MMCS opening + /// against the shared aux MMCS (root at `MultiProof::aux_mmcs_root`). + pub aux_trace_polys: Option>, } pub type DeepPolynomialOpenings = Vec>; @@ -80,9 +113,6 @@ pub struct StarkProof, E: IsField, PI> { /// tables stay out of the shared main-trace MMCS, so their main slice /// keeps its own per-table tree. `None` for non-preprocessed tables. pub lde_trace_main_merkle_root: Option, - // Commitments of auxiliary trace columns - // [tⱼ] - pub lde_trace_aux_merkle_root: Option, // For preprocessed tables: commitment to precomputed columns only. // Verifier checks this matches the hardcoded commitment from AIR. pub lde_trace_precomputed_merkle_root: Option, @@ -119,14 +149,23 @@ pub struct StarkProof, E: IsField, PI> { /// Non-preprocessed tables share a single main-trace MMCS authenticated by /// `main_mmcs_root`; `main_mmcs_spec` lists `(MatrixTag, padded_height)` /// per committed table in the MMCS sort order. Preprocessed tables stay -/// out of this MMCS — each carries its own per-table Merkle root in +/// out of the main MMCS — each carries its own per-table Merkle root in /// `StarkProof::lde_trace_main_merkle_root` plus the AIR-pinned /// precomputed root. Both groups' roots are absorbed in spec-fixed order /// during Phase A. +/// +/// Aux traces (only present for AIRs with LogUp interactions) share a +/// SECOND MMCS authenticated by `aux_mmcs_root`; `aux_mmcs_spec` lists +/// `(MatrixTag, padded_height)` for the subset of tables that contribute +/// aux. `aux_mmcs_root` is `None` when no table in the multi-proof has an +/// aux trace. Domain-separated from the main MMCS via `LEAF_DOMAIN_TAG_AUX` +/// so that no aux opening can authenticate a main leaf (or vice versa). #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound = "PI: serde::Serialize + serde::de::DeserializeOwned")] pub struct MultiProof, E: IsField, PI> { pub proofs: Vec>, pub main_mmcs_root: Commitment, pub main_mmcs_spec: Vec<(MatrixTag, usize)>, + pub aux_mmcs_root: Option, + pub aux_mmcs_spec: Vec<(MatrixTag, usize)>, } diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 6c58d5ac0..a5e2b8142 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -81,55 +81,6 @@ pub enum ProvingError { DiskSpill(String), } -/// Commitment artifacts for one trace table (main or auxiliary). Used for both -/// plain and preprocessed tables. Preprocessed tables additionally carry a -/// separate Merkle tree over their precomputed columns, hence the optional -/// `precomputed_tree`/`precomputed_root` pair and the `num_precomputed_cols` -/// index used when opening positions. -pub(crate) struct TableCommit -where - FieldElement: AsBytes, -{ - /// Merkle tree over the trace columns (multiplicities only for preprocessed tables). - pub(crate) tree: Arc>, - /// Root of `tree`. - pub(crate) root: Commitment, - /// Preprocessed tables only: Merkle tree over precomputed columns. - pub(crate) precomputed_tree: Option>>, - /// Preprocessed tables only: root of `precomputed_tree`. - pub(crate) precomputed_root: Option, - /// Preprocessed tables only: number of precomputed columns. Zero otherwise. - pub(crate) num_precomputed_cols: usize, -} - -impl TableCommit -where - FieldElement: AsBytes, -{ - /// Build a `TableCommit` for a plain (non-preprocessed) table. - fn plain(tree: BatchedMerkleTree, root: Commitment) -> Self { - Self { - tree: Arc::new(tree), - root, - precomputed_tree: None, - precomputed_root: None, - num_precomputed_cols: 0, - } - } - - /// Cheap clone. Only bumps Arc refcounts, no tree data is copied. - fn share(&self) -> Self { - Self { - tree: Arc::clone(&self.tree), - root: self.root, - precomputed_tree: self.precomputed_tree.as_ref().map(Arc::clone), - precomputed_root: self.precomputed_root, - num_precomputed_cols: self.num_precomputed_cols, - } - } - -} - /// Per-table commitment artifacts for the main trace under the shared /// MMCS protocol. The `mmcs` Arc is the SAME instance for every table in /// the multi-proof — Phase A builds it once. @@ -256,6 +207,54 @@ where } } +/// Per-table aux-trace commitment under the shared aux MMCS. +/// Mirror of [`MainCommit::Shared`]: the `mmcs` Arc is shared across every +/// table that contributes an aux trace; `tag` + `padded_height` identify +/// this table's slot inside that MMCS. +pub(crate) enum AuxCommit +where + FieldElement: AsBytes, +{ + Shared { + mmcs: Arc>>, + tag: MatrixTag, + padded_height: usize, + }, +} + +impl AuxCommit +where + FieldElement: AsBytes, +{ + fn share(&self) -> Self { + match self { + Self::Shared { + mmcs, + tag, + padded_height, + } => Self::Shared { + mmcs: Arc::clone(mmcs), + tag: *tag, + padded_height: *padded_height, + }, + } + } +} + +/// Per-table aux Phase-C output collected BEFORE the shared aux MMCS is +/// built. `leaves` are aux-tagged Keccak digests over the committed aux-trace +/// LDE rows. Consumed by the single `MmcsBuilder::finalize` call once +/// every aux-bearing table has produced them. +struct AuxPhaseCOutput +where + FieldElement: AsBytes, +{ + tag: MatrixTag, + leaves: Vec, + _marker: PhantomData, + padded_height: usize, +} + /// A container for the results of the first round of the STARK Prove protocol. pub(crate) struct Round1 where @@ -269,7 +268,7 @@ where /// Commitment to the main trace (shared MMCS handle + per-table tag). pub(crate) main: MainCommit, /// Commitment to the auxiliary (RAP) trace, if any. - pub(crate) aux: Option>, + pub(crate) aux: Option>, /// The challenges of the RAP round. pub(crate) rap_challenges: Vec>, /// Bus interaction public inputs (initial and final aux column values). @@ -286,7 +285,7 @@ where FieldElement: AsBytes, { main: MainCommit, - aux: Option>, + aux: Option>, rap_challenges: Vec>, bus_public_inputs: Option>, } @@ -318,7 +317,7 @@ where Round1 { lde_trace: LDETraceTable::from_columns(lde.main, lde.aux, step_size, blowup_factor), main: self.main.share(), - aux: self.aux.as_ref().map(TableCommit::share), + aux: self.aux.as_ref().map(AuxCommit::share), rap_challenges: self.rap_challenges.clone(), bus_public_inputs: self.bus_public_inputs.clone(), } @@ -550,6 +549,82 @@ where Ok((root, spec, Arc::new(mmcs))) } +/// Tagged per-row leaf digest for the AUX-trace MMCS. Mirror of +/// [`compute_tagged_leaves_bit_reversed`] but uses the aux domain +/// separator so aux/main leaves cannot collide. +pub fn compute_tagged_leaves_bit_reversed_aux( + columns: &[Vec>], + tag: MatrixTag, +) -> Vec +where + E: IsField, + FieldElement: AsBytes + Sync + Send + ByteConversion, +{ + if columns.is_empty() || columns[0].is_empty() { + return Vec::new(); + } + let num_rows = columns[0].len(); + let num_cols = columns.len(); + let byte_len = as ByteConversion>::BYTE_LEN; + debug_assert!(num_rows.is_power_of_two()); + let total_bytes = num_cols * byte_len; + let hash_leaf = + |buf: &mut [u8], row_idx: usize| -> Commitment { + let br_idx = reverse_index(row_idx, num_rows as u64); + for (col_idx, col) in columns.iter().enumerate() { + col[br_idx] + .write_bytes_be(&mut buf[col_idx * byte_len..(col_idx + 1) * byte_len]); + } + crate::mmcs_leaf::hash_tagged_row_bytes_aux(tag, buf) + }; + #[cfg(feature = "parallel")] + { + (0..num_rows) + .into_par_iter() + .map_init(|| vec![0u8; total_bytes], |buf, i| hash_leaf(buf, i)) + .collect() + } + #[cfg(not(feature = "parallel"))] + { + let mut buf = vec![0u8; total_bytes]; + (0..num_rows).map(|i| hash_leaf(&mut buf, i)).collect() + } +} + +/// Build the shared AUX-trace MMCS from per-table Phase-C outputs (only +/// tables that have an aux trace participate). Returns `None`/`empty spec` +/// when no table contributes aux. +#[allow(clippy::type_complexity)] +fn build_aux_mmcs( + outputs: &[Option>], +) -> Result< + ( + Option, + Vec<(MatrixTag, usize)>, + Option>>>, + ), + ProvingError, +> +where + E: IsField + Send + Sync, + FieldElement: AsBytes + Send + Sync, +{ + let any = outputs.iter().any(|o| o.is_some()); + if !any { + return Ok((None, Vec::new(), None)); + } + let mut builder: MmcsBuilder> = MmcsBuilder::new(); + for out in outputs.iter().flatten() { + builder + .add_matrix(out.tag, out.leaves.clone()) + .map_err(map_mmcs_err)?; + } + let mmcs = builder.finalize().map_err(map_mmcs_err)?; + let root = *mmcs.root(); + let spec = mmcs.spec(); + Ok((Some(root), spec, Some(Arc::new(mmcs)))) +} + /// Tagged per-row leaf digest for the main-trace MMCS. pub fn compute_tagged_leaves_bit_reversed( columns: &[Vec>], @@ -1584,9 +1659,31 @@ pub trait IsStarkProver< ); let aux_trace_polys = round_1_result.aux.as_ref().map(|aux| { - Self::open_polys_with(domain, &aux.tree, *index, |row| { - lde_trace.gather_aux_row(row) - }) + let AuxCommit::Shared { mmcs, padded_height, .. } = aux; + let max_height = mmcs + .spec() + .first() + .map(|(_, h)| *h) + .expect("aux MMCS spec is non-empty when aux commit exists"); + debug_assert!(padded_height.is_power_of_two() && max_height >= *padded_height); + let shift = (max_height / *padded_height).trailing_zeros() as usize; + let domain_size = domain.lde_roots_of_unity_coset.len() as u64; + let primary = *index * 2; + let sym = *index * 2 + 1; + let evaluations = lde_trace.gather_aux_row(reverse_index(primary, domain_size)); + let evaluations_sym = lde_trace.gather_aux_row(reverse_index(sym, domain_size)); + let mmcs_opening = mmcs + .open(primary << shift) + .expect("aux MMCS open: prover-side primary index in range"); + let mmcs_opening_sym = mmcs + .open(sym << shift) + .expect("aux MMCS open: prover-side sym index in range"); + crate::proof::stark::AuxTraceOpening::Mmcs { + evaluations, + evaluations_sym, + mmcs_opening, + mmcs_opening_sym, + } }); let (main_trace_opening, precomputed_trace_opening) = match main_commit { @@ -1959,30 +2056,20 @@ pub trait IsStarkProver< heap_snaps.push(s); } - // Pass 2: Parallel fork transcript → extract → LDE → commit in chunks of K. - // Each table gets its own transcript fork. + // Pass 2: parallel aux-LDE + tagged-leaf computation, then a single + // shared aux MMCS build. The aux MMCS root is absorbed into the + // SHARED transcript BEFORE per-table forking, so every table's + // forked transcript sees the same aux MMCS commitment without + // dragging per-table aux roots through Fiat-Shamir. #[cfg(feature = "instruments")] let phase_start = Instant::now(); - // Pre-fork all transcripts (cheap, sequential — must match verifier ordering) - let mut table_transcripts: Vec<_> = (0..num_airs) - .map(|idx| { - let mut t = transcript.clone(); - if num_airs > 1 { - t.append_bytes(&(idx as u64).to_le_bytes()); - } - t - }) - .collect(); - - // Parallel aux commit in chunks of K. Each entry holds the optional aux - // `TableCommit` (`None` when the AIR has no aux trace) and the cached - // aux LDE columns consumed in Phase D. - #[allow(clippy::type_complexity)] - let mut aux_results: Vec<( - Option>, - Vec>>, - )> = Vec::with_capacity(num_airs); + // Per-table aux Phase-C outputs. `None` entries are tables with no + // aux trace and contribute neither leaves nor an MMCS slot. + let mut aux_outputs: Vec>> = + Vec::with_capacity(num_airs); + let mut aux_ldes: Vec>>> = + Vec::with_capacity(num_airs); for chunk_start in (0..num_airs).step_by(k) { let chunk_end = (chunk_start + k).min(num_airs); @@ -1998,6 +2085,7 @@ pub trait IsStarkProver< let (air, trace, _) = &air_trace_pairs[idx]; let domain = &domains[idx]; let twiddles = &twiddle_caches[idx]; + let tag = main_tags[idx]; if air.has_aux_trace() { let lde_size = domain.interpolation_domain_size * domain.blowup_factor; @@ -2017,35 +2105,81 @@ pub trait IsStarkProver< let aux_lde_dur = t_sub.elapsed(); #[cfg(feature = "instruments")] let t_sub = Instant::now(); - #[allow(unused_mut)] - let (mut tree, root) = Self::commit_columns_bit_reversed(&columns) - .ok_or(ProvingError::EmptyCommitment)?; + let leaves = + compute_tagged_leaves_bit_reversed_aux::(&columns, tag); + if leaves.is_empty() { + return Err(ProvingError::EmptyCommitment); + } + let padded_height = leaves.len(); #[cfg(feature = "instruments")] crate::instruments::accum_r1_aux(aux_lde_dur, t_sub.elapsed()); - - #[cfg(feature = "disk-spill")] - if storage_mode == StorageMode::Disk { - tree.spill_nodes_to_disk().map_err(|e| { - ProvingError::DiskSpill(format!("aux Merkle tree: {e}")) - })?; - } - Ok((Some(TableCommit::plain(tree, root)), columns)) + let output = AuxPhaseCOutput:: { + tag, + leaves, + padded_height, + _marker: PhantomData, + }; + Ok((Some(output), columns)) } else { Ok((None, Vec::new())) } }) .collect(); - // Sequential: append aux roots to forked transcripts - for (j, result) in chunk_aux.into_iter().enumerate() { - let (aux_commit, cached_aux) = result?; - if let Some(ref c) = aux_commit { - table_transcripts[chunk_start + j].append_bytes(&c.root); - } - aux_results.push((aux_commit, cached_aux)); + for result in chunk_aux { + let (output, cached_aux) = result?; + aux_outputs.push(output); + aux_ldes.push(cached_aux); } } + // Build the shared aux MMCS over the non-None entries. Order is + // spec-fixed (matches `main_tags` order, filtered to has-aux). + let (aux_mmcs_root_opt, aux_mmcs_spec, aux_mmcs_arc) = + build_aux_mmcs::(&aux_outputs)?; + + // Absorb the aux MMCS root into the SHARED transcript before + // forking — every table's fork inherits this binding identically. + if let Some(ref root) = aux_mmcs_root_opt { + transcript.append_bytes(root); + } + + // Pre-fork all transcripts (cheap, sequential — must match verifier ordering). + // Happens AFTER aux MMCS absorb so each fork inherits the binding. + let mut table_transcripts: Vec<_> = (0..num_airs) + .map(|idx| { + let mut t = transcript.clone(); + if num_airs > 1 { + t.append_bytes(&(idx as u64).to_le_bytes()); + } + t + }) + .collect(); + + // Reassemble per-table aux commits from the shared MMCS Arc. + let aux_commits: Vec>> = aux_outputs + .into_iter() + .map(|o| { + o.map(|out| AuxCommit::Shared { + mmcs: Arc::clone( + aux_mmcs_arc + .as_ref() + .expect("MMCS Arc populated when at least one aux output present"), + ), + tag: out.tag, + padded_height: out.padded_height, + }) + }) + .collect(); + #[allow(clippy::type_complexity)] + let aux_results: Vec<( + Option>, + Vec>>, + )> = aux_commits + .into_iter() + .zip(aux_ldes) + .collect(); + // Build commitments and cached LDEs as separate vecs: // commitments are borrowed in Phase D, LDEs are consumed by value. let mut commitments: Vec> = @@ -2196,6 +2330,8 @@ pub trait IsStarkProver< proofs, main_mmcs_root, main_mmcs_spec, + aux_mmcs_root: aux_mmcs_root_opt, + aux_mmcs_spec, }) } @@ -2364,8 +2500,6 @@ pub trait IsStarkProver< // For preprocessed tables: per-table Merkle root over multiplicities // (preprocessed tables stay out of the shared main-trace MMCS). lde_trace_main_merkle_root: round_1_result.main.main_tree_root(), - // [t] - lde_trace_aux_merkle_root: round_1_result.aux.as_ref().map(|x| x.root), // For preprocessed tables: commitment to precomputed columns only lde_trace_precomputed_merkle_root: round_1_result.main.precomputed_root(), // tⱼ(zgᵏ) diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs index 31ccbb3cb..569221ce0 100644 --- a/crypto/stark/src/verifier.rs +++ b/crypto/stark/src/verifier.rs @@ -340,9 +340,11 @@ pub trait IsStarkVerifier< ) } - /// Verify the main MMCS opening + precomputed/aux Merkle openings at FRI - /// challenge `iota`. `main_tag`, `main_mmcs_root`, `main_mmcs_spec` come - /// from the surrounding multi-proof. + /// Verify the main MMCS opening + precomputed + aux openings at FRI + /// challenge `iota`. `main_*` and `aux_*` come from the surrounding + /// multi-proof. Aux is `None` when no AIR in the multi-proof has an + /// aux trace. + #[allow(clippy::too_many_arguments)] fn verify_trace_openings( proof: &StarkProof, deep_poly_openings: &DeepPolynomialOpening, @@ -350,10 +352,12 @@ pub trait IsStarkVerifier< main_tag: crypto::merkle_tree::mmcs::MatrixTag, main_mmcs_root: &Commitment, main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], + aux_mmcs_root: Option<&Commitment>, + aux_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], ) -> bool where FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, { use crate::proof::stark::MainTraceOpening; let main_ok = match &deep_poly_openings.main_trace_polys { @@ -383,16 +387,14 @@ pub trait IsStarkVerifier< _ => false, }; - // Auxiliary trace. - ok &= match ( - proof.lde_trace_aux_merkle_root, - &deep_poly_openings.aux_trace_polys, - ) { - (Some(root), Some(opening)) => { - Self::verify_opening_pair::(opening, &root, iota) - } - (None, None) => true, - _ => false, + // Auxiliary trace: shared MMCS opening for tables with aux, or + // None when this AIR has no aux at all. + ok &= match (&deep_poly_openings.aux_trace_polys, aux_mmcs_root) { + (Some(opening), Some(root)) => verify_aux_mmcs_pair_inner::( + opening, iota, main_tag, root, aux_mmcs_spec, + ), + (None, _) => true, + (Some(_), None) => false, }; ok @@ -445,16 +447,19 @@ pub trait IsStarkVerifier< /// Verifies the validity of the purported values of the trace polynomials and the composition polynomial /// parts at the domain elements and their symmetric counterparts corresponding to all the FRI query /// index challenges. + #[allow(clippy::too_many_arguments)] fn step_4_verify_trace_and_composition_openings( proof: &StarkProof, challenges: &Challenges, main_tag: crypto::merkle_tree::mmcs::MatrixTag, main_mmcs_root: &Commitment, main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], + aux_mmcs_root: Option<&Commitment>, + aux_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], ) -> bool where FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, { challenges .iotas @@ -472,6 +477,8 @@ pub trait IsStarkVerifier< main_tag, main_mmcs_root, main_mmcs_spec, + aux_mmcs_root, + aux_mmcs_spec, ) }) } @@ -618,7 +625,7 @@ pub trait IsStarkVerifier< let lde_aux: &[FieldElement] = opening .aux_trace_polys .as_ref() - .map(|a| a.evaluations.as_slice()) + .map(|a| a.evaluations()) .unwrap_or(&[]); let evaluation_point = Self::query_challenge_to_evaluation_point(*iota, false, domain); @@ -642,7 +649,7 @@ pub trait IsStarkVerifier< let lde_aux_sym: &[FieldElement] = opening .aux_trace_polys .as_ref() - .map(|a| a.evaluations_sym.as_slice()) + .map(|a| a.evaluations_sym()) .unwrap_or(&[]); let evaluation_point = Self::query_challenge_to_evaluation_point(*iota, true, domain); @@ -765,7 +772,7 @@ pub trait IsStarkVerifier< ) -> bool where FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, { if airs.len() != multi_proof.proofs.len() { error!( @@ -893,11 +900,48 @@ pub trait IsStarkVerifier< } // ===================================================================== - // Phase C + Rounds 2-4: Forked per table + // Phase C: validate + absorb the shared aux MMCS root (if any) // ===================================================================== - // Each table gets an independent transcript fork (cloned from the shared - // state after Phase B, domain-separated by table index). This matches - // the prover's forking and makes per-table verification independent. + // The aux MMCS lives at multi-proof level: a single absorb into the + // SHARED transcript replaces the per-table aux root absorb of the + // pre-MMCS protocol. Verify the spec mirrors the prover-side + // filtered-by-has_aux_trace order before binding. + let mut expected_aux_spec: Vec<(crypto::merkle_tree::mmcs::MatrixTag, usize)> = + Vec::new(); + for (idx, (air, proof)) in airs.iter().zip(&multi_proof.proofs).enumerate() { + if air.has_aux_trace() { + let lde_size = proof.trace_length * (air.options().blowup_factor as usize); + expected_aux_spec.push((main_tags[idx], lde_size)); + } + } + expected_aux_spec.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0))); + if expected_aux_spec != multi_proof.aux_mmcs_spec { + error!( + "aux_mmcs_spec mismatch: expected {:?}, got {:?}", + expected_aux_spec, multi_proof.aux_mmcs_spec, + ); + return false; + } + match (&multi_proof.aux_mmcs_root, expected_aux_spec.is_empty()) { + (Some(root), false) => transcript.append_bytes(root), + (None, true) => {} + (Some(_), true) => { + error!("aux_mmcs_root present but no AIR has an aux trace"); + return false; + } + (None, false) => { + error!("aux_mmcs_root missing but some AIR has an aux trace"); + return false; + } + } + + // ===================================================================== + // Rounds 2-4: Forked per table + // ===================================================================== + // Each table gets an independent transcript fork (cloned from the + // shared state after the aux MMCS absorb above, domain-separated by + // table index). This matches the prover's forking and makes + // per-table verification independent. for (idx, (air, proof)) in airs.iter().zip(&multi_proof.proofs).enumerate() { // Must match prover: fork with domain separator for multi-table, @@ -908,11 +952,6 @@ pub trait IsStarkVerifier< table_transcript.append_bytes(&(idx as u64).to_le_bytes()); } - // Phase C: replay aux commitment - if let Some(root) = proof.lde_trace_aux_merkle_root { - table_transcript.append_bytes(&root); - } - // Bind table_contribution (L) to transcript, matching prover. if let Some(ref bpi) = proof.bus_public_inputs { table_transcript.append_field_element(&bpi.table_contribution); @@ -927,6 +966,8 @@ pub trait IsStarkVerifier< main_tags[idx], &multi_proof.main_mmcs_root, &multi_proof.main_mmcs_spec, + multi_proof.aux_mmcs_root.as_ref(), + &multi_proof.aux_mmcs_spec, ) { error!( "Table {} failed verify_rounds_2_to_4 (num_constraints={}, trace_cols={})", @@ -982,7 +1023,7 @@ pub trait IsStarkVerifier< ) -> bool where FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, PI: Clone, { let main_tags = [crypto::merkle_tree::mmcs::MatrixTag::new([0; 8])]; @@ -1133,9 +1174,8 @@ pub trait IsStarkVerifier< /// Verifies a single table after round 1 has been replayed. /// - /// `main_tag`, `main_mmcs_root`, `main_mmcs_spec` come from the shared - /// multi-proof and are needed to authenticate the per-table main-trace - /// openings in step 4. + /// `main_*` / `aux_*` come from the shared multi-proof and authenticate + /// the per-table trace openings in step 4. #[allow(clippy::too_many_arguments)] fn verify_rounds_2_to_4( air: &dyn AIR, @@ -1145,10 +1185,12 @@ pub trait IsStarkVerifier< main_tag: crypto::merkle_tree::mmcs::MatrixTag, main_mmcs_root: &Commitment, main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], + aux_mmcs_root: Option<&Commitment>, + aux_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], ) -> bool where FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, { let domain = new_verifier_domain(air, proof.trace_length); @@ -1227,6 +1269,8 @@ pub trait IsStarkVerifier< main_tag, main_mmcs_root, main_mmcs_spec, + aux_mmcs_root, + aux_mmcs_spec, ) { #[cfg(not(feature = "test_fiat_shamir"))] error!("DEEP Composition Polynomial verification failed"); @@ -1314,3 +1358,63 @@ where mmcs_opening_sym.verify::>(main_mmcs_root, main_mmcs_spec); ok && ok_sym } + +/// Aux-trace counterpart of [`verify_main_mmcs_pair_inner`]. Same shape, +/// but rehashes the row using the AUX domain separator so an aux opening +/// cannot authenticate a main leaf (or vice versa). +fn verify_aux_mmcs_pair_inner( + aux_opening: &crate::proof::stark::AuxTraceOpening, + iota: usize, + main_tag: crypto::merkle_tree::mmcs::MatrixTag, + aux_mmcs_root: &Commitment, + aux_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], +) -> bool +where + E: IsField, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, +{ + use crate::mmcs_leaf::hash_tagged_row_aux; + use crate::proof::stark::AuxTraceOpening; + let AuxTraceOpening::Mmcs { + evaluations, + evaluations_sym, + mmcs_opening, + mmcs_opening_sym, + } = aux_opening; + + let table_idx = match aux_mmcs_spec.iter().position(|(t, _)| *t == main_tag) { + Some(i) => i, + None => return false, + }; + let table_height = aux_mmcs_spec[table_idx].1; + let max_height = match aux_mmcs_spec.first().map(|(_, h)| *h) { + Some(h) => h, + None => return false, + }; + if !table_height.is_power_of_two() || max_height < table_height { + return false; + } + let shift = (max_height / table_height).trailing_zeros() as usize; + let g_primary = (iota * 2) << shift; + let g_sym = (iota * 2 + 1) << shift; + let leaf_primary = hash_tagged_row_aux::(main_tag, evaluations); + let leaf_sym = hash_tagged_row_aux::(main_tag, evaluations_sym); + if mmcs_opening.global_index != g_primary || mmcs_opening_sym.global_index != g_sym { + return false; + } + let leaves = &mmcs_opening.matrix_leaves; + let leaves_sym = &mmcs_opening_sym.matrix_leaves; + if table_idx >= leaves.len() || table_idx >= leaves_sym.len() { + return false; + } + if leaves[table_idx].0 != main_tag || leaves[table_idx].1 != leaf_primary { + return false; + } + if leaves_sym[table_idx].0 != main_tag || leaves_sym[table_idx].1 != leaf_sym { + return false; + } + let ok = mmcs_opening.verify::>(aux_mmcs_root, aux_mmcs_spec); + let ok_sym = + mmcs_opening_sym.verify::>(aux_mmcs_root, aux_mmcs_spec); + ok && ok_sym +} From d82d9abf9a0b02e9a7d72fd0600452906b1c0740 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 15:30:00 -0300 Subject: [PATCH 14/21] test(stark/mmcs): per-vector soundness tests for the shared AUX MMCS path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 9 tests covering the aux MMCS attack surface (mirror of the existing main-MMCS suite). All use a baseline two-table multi-proof over two `LogReadOnlyRAP` AIRs so both tables contribute aux and therefore both participate in the shared MMCS. Field tampered Detection mechanism ----------------------------------------------------------------------- multi_proof.aux_mmcs_root[0] transcript divergence multi_proof.aux_mmcs_root = None root presence/spec check multi_proof.aux_mmcs_spec[0].1 (height) reproduced-spec check multi_proof.aux_mmcs_spec[0].0 (tag) reproduced-spec check mmcs_opening.matrix_leaves[idx].1 (digest) rehash-vs-leaf check mmcs_opening.global_index g_primary/g_sym match mmcs_opening.siblings[0][0] MmcsOpening::verify evaluations[0] += 1 rehash mismatch (baseline test) verifies cleanly Locks behaviour for the entire aux MMCS path — root absorb, spec sort, leaf rehash with the aux domain separator, and Merkle authentication. --- .../src/tests/mmcs_aux_soundness_tests.rs | 210 ++++++++++++++++++ crypto/stark/src/tests/mod.rs | 1 + 2 files changed, 211 insertions(+) create mode 100644 crypto/stark/src/tests/mmcs_aux_soundness_tests.rs diff --git a/crypto/stark/src/tests/mmcs_aux_soundness_tests.rs b/crypto/stark/src/tests/mmcs_aux_soundness_tests.rs new file mode 100644 index 000000000..d01d4a924 --- /dev/null +++ b/crypto/stark/src/tests/mmcs_aux_soundness_tests.rs @@ -0,0 +1,210 @@ +//! Soundness tests for the shared AUX-trace MMCS path (mirror of +//! `mmcs_soundness_tests.rs`). Uses two `LogReadOnlyRAP` AIRs so both +//! tables have an aux trace and therefore both participate in the shared +//! aux MMCS — the only path that produces `AuxTraceOpening::Mmcs` data. +//! +//! Each test tampers with a single field on the aux MMCS path and +//! asserts the verifier rejects. + +use crypto::fiat_shamir::default_transcript::DefaultTranscript; +use crypto::merkle_tree::mmcs::MatrixTag; +use math::field::{ + element::FieldElement, extensions_goldilocks::Degree3GoldilocksExtensionField, + goldilocks::GoldilocksField, +}; + +use crate::examples::read_only_memory_logup::{ + LogReadOnlyPublicInputs, LogReadOnlyRAP, read_only_logup_trace, +}; +use crate::proof::options::ProofOptions; +use crate::proof::stark::{AuxTraceOpening, MultiProof}; +use crate::test_utils::{multi_prove_ram, multi_verify_ram}; +use crate::traits::AIR; + +type F = GoldilocksField; +type E = Degree3GoldilocksExtensionField; + +#[allow(clippy::type_complexity)] +fn baseline_proof() -> ( + LogReadOnlyRAP, + LogReadOnlyRAP, + MultiProof>, +) { + let proof_options = ProofOptions::default_test_options(); + let air_1 = LogReadOnlyRAP::::new(&proof_options); + let air_2 = LogReadOnlyRAP::::new(&proof_options); + + let address_col_1 = vec![ + FieldElement::::from(3), + FieldElement::::from(2), + FieldElement::::from(2), + FieldElement::::from(3), + FieldElement::::from(4), + FieldElement::::from(5), + FieldElement::::from(1), + FieldElement::::from(3), + ]; + let value_col_1 = vec![ + FieldElement::::from(30), + FieldElement::::from(20), + FieldElement::::from(20), + FieldElement::::from(30), + FieldElement::::from(40), + FieldElement::::from(50), + FieldElement::::from(10), + FieldElement::::from(30), + ]; + let address_col_2 = vec![ + FieldElement::::from(15), + FieldElement::::from(12), + FieldElement::::from(17), + FieldElement::::from(10), + FieldElement::::from(14), + FieldElement::::from(11), + FieldElement::::from(16), + FieldElement::::from(13), + ]; + let value_col_2 = vec![ + FieldElement::::from(150), + FieldElement::::from(120), + FieldElement::::from(170), + FieldElement::::from(100), + FieldElement::::from(140), + FieldElement::::from(110), + FieldElement::::from(160), + FieldElement::::from(130), + ]; + let pub_inputs_1 = LogReadOnlyPublicInputs { + a0: FieldElement::::from(3), + v0: FieldElement::::from(30), + a_sorted_0: FieldElement::::from(1), + v_sorted_0: FieldElement::::from(10), + m0: FieldElement::::from(1), + }; + let pub_inputs_2 = LogReadOnlyPublicInputs { + a0: FieldElement::::from(15), + v0: FieldElement::::from(150), + a_sorted_0: FieldElement::::from(10), + v_sorted_0: FieldElement::::from(100), + m0: FieldElement::::from(1), + }; + + let mut trace_1 = read_only_logup_trace(address_col_1, value_col_1); + let mut trace_2 = read_only_logup_trace(address_col_2, value_col_2); + let air_trace_pairs: Vec<( + &dyn AIR>, + &mut _, + &_, + )> = vec![ + (&air_1, &mut trace_1, &pub_inputs_1), + (&air_2, &mut trace_2, &pub_inputs_2), + ]; + let proof = + multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).expect("prove"); + (air_1, air_2, proof) +} + +fn verify( + airs: &[&dyn AIR>], + proof: &MultiProof>, +) -> bool { + multi_verify_ram(airs, proof, &mut DefaultTranscript::::new(&[]), &FieldElement::zero()) +} + +fn first_aux_mmcs_opening_mut( + proof: &mut MultiProof>, +) -> &mut AuxTraceOpening { + proof.proofs[0].deep_poly_openings[0] + .aux_trace_polys + .as_mut() + .expect("baseline must have aux openings") +} + +#[test_log::test] +fn baseline_two_rap_tables_verify() { + let (air_1, air_2, proof) = baseline_proof(); + assert!(proof.aux_mmcs_root.is_some(), "aux MMCS must be present"); + assert_eq!(proof.aux_mmcs_spec.len(), 2, "both AIRs contribute aux"); + let airs: Vec<&dyn AIR>> = + vec![&air_1, &air_2]; + assert!(verify(&airs, &proof), "baseline aux proof must verify"); +} + +#[test_log::test] +fn tampered_aux_mmcs_root_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR>> = + vec![&air_1, &air_2]; + let root = proof.aux_mmcs_root.as_mut().expect("baseline has root"); + root[0] ^= 1; + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn missing_aux_mmcs_root_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR>> = + vec![&air_1, &air_2]; + proof.aux_mmcs_root = None; + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn tampered_aux_mmcs_spec_height_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR>> = + vec![&air_1, &air_2]; + proof.aux_mmcs_spec[0].1 /= 2; + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn tampered_aux_mmcs_spec_tag_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR>> = + vec![&air_1, &air_2]; + proof.aux_mmcs_spec[0].0 = MatrixTag::new([0xFF; 8]); + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn tampered_aux_mmcs_opening_leaf_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR>> = + vec![&air_1, &air_2]; + let AuxTraceOpening::Mmcs { mmcs_opening, .. } = first_aux_mmcs_opening_mut(&mut proof); + mmcs_opening.matrix_leaves[0].1[0] ^= 1; + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn tampered_aux_mmcs_opening_global_index_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR>> = + vec![&air_1, &air_2]; + let AuxTraceOpening::Mmcs { mmcs_opening, .. } = first_aux_mmcs_opening_mut(&mut proof); + mmcs_opening.global_index ^= 0b10; + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn tampered_aux_mmcs_opening_sibling_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR>> = + vec![&air_1, &air_2]; + let AuxTraceOpening::Mmcs { mmcs_opening, .. } = first_aux_mmcs_opening_mut(&mut proof); + assert!(!mmcs_opening.siblings.is_empty()); + mmcs_opening.siblings[0][0] ^= 1; + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn tampered_aux_evaluations_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR>> = + vec![&air_1, &air_2]; + let AuxTraceOpening::Mmcs { evaluations, .. } = first_aux_mmcs_opening_mut(&mut proof); + assert!(!evaluations.is_empty()); + evaluations[0] += FieldElement::::one(); + assert!(!verify(&airs, &proof)); +} diff --git a/crypto/stark/src/tests/mod.rs b/crypto/stark/src/tests/mod.rs index f44c65ee9..b42b2abd9 100644 --- a/crypto/stark/src/tests/mod.rs +++ b/crypto/stark/src/tests/mod.rs @@ -2,6 +2,7 @@ pub mod air_tests; pub mod bus_tests; pub mod domain_cache_stats; pub mod fri_tests; +pub mod mmcs_aux_soundness_tests; pub mod mmcs_soundness_tests; pub mod proof_options_tests; pub mod prove_verify_roundtrip_tests; From dd742270e25f54ab4388faf8dc365569f1f7d780 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Wed, 27 May 2026 23:53:34 -0300 Subject: [PATCH 15/21] =?UTF-8?q?feat(crypto/mmcs):=20StreamingMmcsBuilder?= =?UTF-8?q?=20=E2=80=94=20fold=20max-height=20chips=20incrementally?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a streaming-build variant of `MmcsBuilder` that folds per-chip leaves at the MAX height into a single shared running layer-0 as they arrive, instead of holding every max-height chip's leaf vector alive simultaneously. The other heights still need per-chip leaves stored until `finalize` — see "Why max-height only?" below. Equivalence guarantee: identical root + spec + layer bytes to `MmcsBuilder` for the same input set (locked by 4 round-trip tests against the one-shot path, plus order-violation rejection tests). # Why "max-height only"? MMCS layer-0 at max height is built by left-folding every chip's leaves at row `i`. With no left-anchor to compose with, the running fold `acc = hash(acc, chip_k[i])` is mathematically equivalent to the one-shot `hash(hash(hash(chip_0[i], chip_1[i]), chip_2[i]), ...)`. For chips at heights BELOW max, the MMCS injection rule is `next[i] = hash(hash(hash(next[i], chip_0[i]), chip_1[i]), ...)`, which mixes the upward-compressed `next[i]` into the left-fold. Keccak (and any non-associative hash) makes it impossible to pre-fold the chips into a single summary and inject that summary later — the resulting digest would differ from the one-shot builder, breaking verifier compatibility. So we keep per-chip leaves for non-max heights and inject them in left-fold order at `finalize`. # Add order Callers MUST add matrices in (height desc, tag asc) order — the exact sort `MmcsBuilder::finalize` does internally. Out-of-order calls return `MmcsError::OutOfOrder`. Same-height tags must strictly ascend; duplicate tags at different heights still trip `DuplicateTag`. # Memory / wire-format - Peak savings vs one-shot: `(num_max_height_chips - 1) * max_height * node_size` worth of per-chip leaf storage that is folded immediately and dropped. For lambda-vm with ~5 chips at 2^20 that is ~128 MB of transient Vec. - Wire format unchanged: the produced `Mmcs` has the same `root()` and `spec()`. Layers Vec matches one-shot byte-for-byte. - Per-chip `leaf_digests` is empty on the streaming output — `Mmcs::open` is therefore unavailable until callers supply leaves at open time. This is the prover wire-up that comes next. # MmcsMatrix change `MmcsMatrix.padded_height` is now a stored field instead of reading `leaf_digests.len()`, so `spec()` reports the right value even when the streaming builder has emptied per-chip leaves. Tests: 9 new tests on top of the prior 17 MMCS tests; 26 total green. --- crypto/crypto/src/merkle_tree/mmcs.rs | 365 +++++++++++++++++++++++++- 1 file changed, 363 insertions(+), 2 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/mmcs.rs b/crypto/crypto/src/merkle_tree/mmcs.rs index 8bbd8607f..fac4e6b2a 100644 --- a/crypto/crypto/src/merkle_tree/mmcs.rs +++ b/crypto/crypto/src/merkle_tree/mmcs.rs @@ -47,16 +47,30 @@ pub enum MmcsError { NotPowerOfTwo, Empty, IndexOutOfBounds, + /// Returned by [`StreamingMmcsBuilder::add_matrix`] when the caller + /// supplies a `(height, tag)` pair that violates the required + /// (height desc, tag asc) insertion order. + OutOfOrder, } struct MmcsMatrix { tag: MatrixTag, + /// Source row hashes. Populated by the one-shot [`MmcsBuilder`] and + /// consulted by [`Mmcs::open`] to fill the per-matrix leaf in an + /// opening. Empty when the Mmcs was produced by [`StreamingMmcsBuilder`] + /// (which discards per-chip leaves as it folds them), in which case + /// `Mmcs::open` is unavailable but `root()` / `spec()` still work. leaf_digests: Vec, + /// Padded height (= leaf_digests.len() for one-shot, or the height + /// recorded at insertion time for streaming). Carried separately so + /// `padded_height()` reports the right value when `leaf_digests` is + /// empty. + padded_height: usize, } impl MmcsMatrix { fn padded_height(&self) -> usize { - self.leaf_digests.len() + self.padded_height } } @@ -94,7 +108,12 @@ impl MmcsBuilder { if !leaf_digests.len().is_power_of_two() { return Err(MmcsError::NotPowerOfTwo); } - self.matrices.push(MmcsMatrix { tag, leaf_digests }); + let padded_height = leaf_digests.len(); + self.matrices.push(MmcsMatrix { + tag, + leaf_digests, + padded_height, + }); Ok(()) } @@ -155,6 +174,230 @@ impl MmcsBuilder { } } +/// Streaming MMCS builder. Equivalent to [`MmcsBuilder`] in output +/// (identical root + spec + opening *root* bytes for the same input set) +/// but folds per-chip leaves at the MAX height into a single shared +/// running layer-0 as they arrive, instead of holding every max-height +/// chip's leaf vector alive simultaneously. +/// +/// # Why "max height only"? +/// +/// MMCS layer-0 at the max height is built by left-folding every chip's +/// leaves at row `i`. With no left-anchor to compose with, the running +/// fold `acc = hash(acc, chip_k[i])` is mathematically equivalent to the +/// one-shot `hash(hash(hash(chip_0[i], chip_1[i]), chip_2[i]), ...)`. +/// +/// For chips at heights BELOW max, the MMCS injection rule is +/// `next[i] = hash(hash(hash(next[i], chip_0[i]), chip_1[i]), ...)`, +/// which mixes the upward-compressed `next[i]` into the left-fold. Keccak +/// (and any non-associative hash) makes it impossible to pre-fold the +/// chips into a single summary and inject that summary later — the +/// resulting digest would differ from the one-shot builder, breaking +/// verifier compatibility. So we keep per-chip leaves for non-max heights +/// and inject them in left-fold order at `finalize`. +/// +/// # Memory +/// +/// Peak savings come from the max-height chips, which is where the +/// dominant per-row storage lives in lambda-vm (CPU chunks at 2^20). +/// Smaller-height chips contribute proportionally less per chip, so +/// keeping their per-chip leaves alive has modest impact. +/// +/// # Add order +/// +/// Callers MUST call [`StreamingMmcsBuilder::add_matrix`] in the same +/// order that [`MmcsBuilder::finalize`] would sort the matrices in: +/// height descending, then tag ascending within each height. The builder +/// returns [`MmcsError::OutOfOrder`] if a call would break this. +pub struct StreamingMmcsBuilder { + /// Max-height layer-0 — incrementally folded as max-height chips + /// arrive. `None` until the first chip is added (which fixes the + /// max height). + layer0: Option>, + /// Per-chip leaves for chips at heights < max_height, grouped by + /// height. Within each group, chips are in tag-asc order (enforced + /// by `add_matrix`). + by_height_below_max: BTreeMap>>, + /// `(tag, padded_height)` in caller-supplied order. Populates the + /// final `Mmcs.matrices` (used by `spec()`). + matrix_specs: Vec<(MatrixTag, usize)>, + max_height: Option, +} + +impl Default for StreamingMmcsBuilder { + fn default() -> Self { + Self::new() + } +} + +impl StreamingMmcsBuilder { + pub fn new() -> Self { + Self { + layer0: None, + by_height_below_max: BTreeMap::new(), + matrix_specs: Vec::new(), + max_height: None, + } + } + + /// Add a chip's leaves to the in-progress MMCS. The vector is + /// consumed so the caller can drop the chip's source data + /// immediately on return. + /// + /// At the MAX height the leaves are folded into the shared layer-0 + /// running and the vector is freed. At lower heights the vector is + /// stored verbatim until `finalize`. + pub fn add_matrix( + &mut self, + tag: MatrixTag, + leaf_digests: Vec, + ) -> Result<(), MmcsError> { + if leaf_digests.is_empty() { + return Err(MmcsError::EmptyMatrix); + } + if !leaf_digests.len().is_power_of_two() { + return Err(MmcsError::NotPowerOfTwo); + } + // Order check first — protects all subsequent invariants. + let h = leaf_digests.len(); + if let Some(&(prev_tag, prev_h)) = self.matrix_specs.last() { + let ord = core::cmp::Ord::cmp(&prev_h, &h) + .reverse() + .then(prev_tag.cmp(&tag)); + if !matches!(ord, core::cmp::Ordering::Less) { + return Err(MmcsError::OutOfOrder); + } + } + if self.matrix_specs.iter().any(|(t, _)| *t == tag) { + return Err(MmcsError::DuplicateTag); + } + + match self.max_height { + None => { + // First chip — its height fixes max_height; its leaves + // seed the running layer-0. + self.max_height = Some(h); + self.layer0 = Some(leaf_digests); + } + Some(max_h) if h == max_h => { + // Subsequent max-height chip — fold into running layer-0. + let running = self + .layer0 + .as_mut() + .expect("layer0 populated once max_height is set"); + debug_assert_eq!(running.len(), leaf_digests.len()); + fold_into::(running, &leaf_digests); + } + Some(_) => { + // Below max — stash per-chip leaves, drop at finalize. + self.by_height_below_max + .entry(h) + .or_default() + .push(leaf_digests); + } + } + self.matrix_specs.push((tag, h)); + Ok(()) + } + + /// Compress the running layer-0 upward, injecting lower-height chips + /// at the matching level using the same left-fold the one-shot + /// [`MmcsBuilder::finalize`] uses. + /// + /// The returned [`Mmcs`] has empty `leaf_digests` for each matrix + /// because the streaming builder consumed them. `root()` / `spec()` + /// are fully functional; callers that also need [`Mmcs::open`] must + /// regenerate the chip leaves or use [`MmcsBuilder`]. + pub fn finalize(self) -> Result, MmcsError> { + if self.matrix_specs.is_empty() { + return Err(MmcsError::Empty); + } + let max_height = self.max_height.ok_or(MmcsError::Empty)?; + let depth = max_height.trailing_zeros() as usize; + + let StreamingMmcsBuilder { + layer0, + mut by_height_below_max, + matrix_specs, + max_height: _, + } = self; + + let mut layers: Vec> = Vec::with_capacity(depth + 1); + layers.push(layer0.ok_or(MmcsError::Empty)?); + + for level in 0..depth { + let mut next = compress_pairs::(&layers[level]); + let new_len = max_height >> (level + 1); + if let Some(chips) = by_height_below_max.remove(&new_len) { + inject_chips_left_fold::(&mut next, &chips); + } + layers.push(next); + } + + // Carry tag + height into the Mmcs so `spec()` reports the right + // pairs. leaf_digests stays empty — opens are not supported on + // streaming output (caller must use the one-shot builder when + // openings are needed). + let matrices = matrix_specs + .into_iter() + .map(|(tag, padded_height)| MmcsMatrix { + tag, + leaf_digests: Vec::new(), + padded_height, + }) + .collect(); + Ok(Mmcs { layers, matrices }) + } +} + +/// Per-row fold: `acc[i] = hash_new_parent(acc[i], other[i])`. +fn fold_into(acc: &mut [B::Node], other: &[B::Node]) { + debug_assert_eq!(acc.len(), other.len()); + let n = acc.len(); + let updated: Vec = { + let inner = |i: usize| -> B::Node { B::hash_new_parent(&acc[i], &other[i]) }; + #[cfg(feature = "parallel")] + { + (0..n).into_par_iter().map(inner).collect() + } + #[cfg(not(feature = "parallel"))] + { + (0..n).map(inner).collect() + } + }; + acc.clone_from_slice(&updated); +} + +/// Left-fold inject several chips' leaves into `layer` at every row in +/// tag-asc chip order: +/// `layer[i] = hash(hash(hash(layer[i], chips[0][i]), chips[1][i]), ...)`. +/// Mirrors `inject_matrices` in the one-shot path. +fn inject_chips_left_fold( + layer: &mut [B::Node], + chips: &[Vec], +) { + let n = layer.len(); + let updated: Vec = { + let inner = |i: usize| -> B::Node { + let mut acc = layer[i].clone(); + for chip in chips { + acc = B::hash_new_parent(&acc, &chip[i]); + } + acc + }; + #[cfg(feature = "parallel")] + { + (0..n).into_par_iter().map(inner).collect() + } + #[cfg(not(feature = "parallel"))] + { + (0..n).map(inner).collect() + } + }; + layer.clone_from_slice(&updated); +} + + /// Build layer 0 by folding all matrices at `max_height` at row `i`, in /// tag-asc order (`group` already preserves this). Row-parallel. fn build_combined_layer( @@ -597,6 +840,124 @@ mod tests { let tree = build(vec![big]); assert_eq!(tree.open(4).err(), Some(MmcsError::IndexOutOfBounds)); } + + // ---------- StreamingMmcsBuilder equivalence ---------- + + fn build_streaming( + matrices_in_spec_order: Vec<(MatrixTag, Vec)>, + ) -> Mmcs { + let mut b: StreamingMmcsBuilder = StreamingMmcsBuilder::new(); + for (tag, leaves) in matrices_in_spec_order { + b.add_matrix(tag, leaves).expect("streaming add_matrix"); + } + b.finalize().expect("streaming finalize") + } + + /// Convert an arbitrary input set into the (height desc, tag asc) + /// order required by `StreamingMmcsBuilder`. Matches the sort + /// `MmcsBuilder::finalize` does internally. + fn spec_sorted(mut v: Vec<(MatrixTag, Vec)>) -> Vec<(MatrixTag, Vec)> { + v.sort_by(|a, b| b.1.len().cmp(&a.1.len()).then(a.0.cmp(&b.0))); + v + } + + #[test] + fn streaming_root_matches_oneshot_single_matrix() { + let m = make_matrix(0xAA, 8); + let r_oneshot = *build(vec![m.clone()]).root(); + let r_stream = *build_streaming(spec_sorted(vec![m])).root(); + assert_eq!(r_oneshot, r_stream); + } + + #[test] + fn streaming_root_matches_oneshot_lambdavm_topology() { + let inputs = vec![ + make_matrix(0x01, 8), + make_matrix(0x02, 8), + make_matrix(0x03, 8), + make_matrix(0x10, 4), + make_matrix(0x11, 4), + make_matrix(0xF0, 1), + ]; + let r_oneshot = *build(inputs.clone()).root(); + let r_stream = *build_streaming(spec_sorted(inputs)).root(); + assert_eq!(r_oneshot, r_stream); + } + + #[test] + fn streaming_spec_matches_oneshot() { + let inputs = vec![ + make_matrix(0x01, 8), + make_matrix(0x02, 4), + make_matrix(0x03, 8), + make_matrix(0x04, 2), + ]; + let oneshot = build(inputs.clone()); + let stream = build_streaming(spec_sorted(inputs)); + assert_eq!(oneshot.spec(), stream.spec()); + } + + #[test] + fn streaming_rejects_height_ascending() { + let mut b: StreamingMmcsBuilder = StreamingMmcsBuilder::new(); + let (t0, l0) = make_matrix(0x01, 4); + let (t1, l1) = make_matrix(0x02, 8); + b.add_matrix(t0, l0).expect("first add"); + assert_eq!(b.add_matrix(t1, l1), Err(MmcsError::OutOfOrder)); + } + + #[test] + fn streaming_rejects_same_height_tag_descending() { + let mut b: StreamingMmcsBuilder = StreamingMmcsBuilder::new(); + let (t0, l0) = make_matrix(0x02, 4); + let (t1, l1) = make_matrix(0x01, 4); + b.add_matrix(t0, l0).expect("first add"); + assert_eq!(b.add_matrix(t1, l1), Err(MmcsError::OutOfOrder)); + } + + #[test] + fn streaming_rejects_duplicate_tag_same_height() { + // Same tag and same height violates (height desc, tag asc); the + // order check fires first. + let mut b: StreamingMmcsBuilder = StreamingMmcsBuilder::new(); + let (t, l) = make_matrix(0x01, 4); + b.add_matrix(t, l.clone()).expect("first add"); + assert_eq!(b.add_matrix(t, l), Err(MmcsError::OutOfOrder)); + } + + #[test] + fn streaming_rejects_duplicate_tag_smaller_height() { + // Same tag at a strictly smaller height passes the order check, + // so the dup-tag scan catches it instead. + let mut b: StreamingMmcsBuilder = StreamingMmcsBuilder::new(); + let (t, l) = make_matrix(0x01, 4); + b.add_matrix(t, l).expect("first add"); + let l2: Vec = vec![[0; 32]; 2]; + assert_eq!(b.add_matrix(t, l2), Err(MmcsError::DuplicateTag)); + } + + #[test] + fn streaming_rejects_empty_and_non_power_of_two() { + let mut b: StreamingMmcsBuilder = StreamingMmcsBuilder::new(); + let tag = MatrixTag::new([0; 8]); + assert_eq!(b.add_matrix(tag, Vec::new()), Err(MmcsError::EmptyMatrix)); + let bad: Vec = vec![[0; 32]; 3]; + assert_eq!(b.add_matrix(tag, bad), Err(MmcsError::NotPowerOfTwo)); + } + + #[test] + fn streaming_root_matches_oneshot_pure_same_height() { + let inputs = vec![ + make_matrix(0x01, 8), + make_matrix(0x02, 8), + make_matrix(0x03, 8), + make_matrix(0x04, 8), + make_matrix(0x05, 8), + ]; + let r_oneshot = *build(inputs.clone()).root(); + let r_stream = *build_streaming(spec_sorted(inputs)).root(); + assert_eq!(r_oneshot, r_stream); + } } #[cfg(test)] From 5d1a2d6639bbda3fb301c6b517f3b37508f02410 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Thu, 28 May 2026 00:05:33 -0300 Subject: [PATCH 16/21] =?UTF-8?q?feat(crypto/mmcs):=20Mmcs::open=5Fwith=5F?= =?UTF-8?q?leaves=20=E2=80=94=20open=20streaming-built=20MMCSes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `StreamingMmcsBuilder` discards per-chip leaves at build time, so `Mmcs::open(global_index)` — which reads `matrix.leaf_digests[idx]` to fill `MmcsOpening.matrix_leaves` — would return empty leaves and a bad opening. This commit adds the complementary API: pub fn open_with_leaves(global_index, leaf_fn) -> MmcsOpening where F: FnMut(matrix_idx, local_idx) -> B::Node The closure provides each matrix's per-row leaf at the appropriate shifted index. The prover-side use case (next commit) is to rehash a row from a per-table LDE on demand, replacing the one-shot Mmcs's internal leaf storage with on-the-fly rehashing. `Mmcs::open` is now a thin wrapper around `open_with_leaves` whose closure reads from `self.matrices[i].leaf_digests` — back-compat, no caller change. Equivalence pinned by a new test `streaming_open_with_leaves_round_trips_against_one_shot`: - Build lambda-vm-shaped topology two ways (one-shot + streaming). - Compare roots + specs (already covered). - For every global_index in [0, max_height): open one-shot, open streaming via `open_with_leaves` feeding leaves from the input set, assert `global_index`, `siblings`, and `matrix_leaves` byte-identical. - Assert the streaming opening verifies against the streaming root. 27/27 MMCS tests green (1 new on top of 26). Foundation for the per-chunk MMCS refactor: with `open_with_leaves` available, the prover can stream-build per-chunk MMCSes and rehash leaves from chunk-shared LDEs at open time. --- crypto/crypto/src/merkle_tree/mmcs.rs | 72 +++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/crypto/crypto/src/merkle_tree/mmcs.rs b/crypto/crypto/src/merkle_tree/mmcs.rs index fac4e6b2a..b4b38dc2f 100644 --- a/crypto/crypto/src/merkle_tree/mmcs.rs +++ b/crypto/crypto/src/merkle_tree/mmcs.rs @@ -483,6 +483,30 @@ impl Mmcs { } pub fn open(&self, global_index: usize) -> Result, MmcsError> { + self.open_with_leaves(global_index, |m_idx, local_idx| { + self.matrices[m_idx].leaf_digests[local_idx].clone() + }) + } + + /// Like [`Mmcs::open`] but pulls each matrix's per-row leaf from a + /// caller-supplied closure instead of `self.matrices[i].leaf_digests`. + /// Required when this `Mmcs` was produced by [`StreamingMmcsBuilder`] + /// (which discards per-chip leaves at build time): the closure + /// rehashes the row from the chip's source data on demand. + /// + /// The closure receives `(matrix_idx_in_spec_order, local_idx)` where + /// `local_idx = global_index >> log2(max_height / m.padded_height())`, + /// and must return the same digest the one-shot builder would have + /// stored at that position. Returning a wrong digest produces an + /// opening whose `verify` will fail on the prover side. + pub fn open_with_leaves( + &self, + global_index: usize, + mut leaf_fn: F, + ) -> Result, MmcsError> + where + F: FnMut(usize, usize) -> B::Node, + { let max_height = self.matrices[0].padded_height(); if global_index >= max_height { return Err(MmcsError::IndexOutOfBounds); @@ -490,10 +514,10 @@ impl Mmcs { let depth = max_height.trailing_zeros() as usize; let mut matrix_leaves: Vec<(MatrixTag, B::Node)> = Vec::with_capacity(self.matrices.len()); - for matrix in &self.matrices { + for (m_idx, matrix) in self.matrices.iter().enumerate() { let shift = (max_height / matrix.padded_height()).trailing_zeros() as usize; - let idx = global_index >> shift; - matrix_leaves.push((matrix.tag, matrix.leaf_digests[idx].clone())); + let local_idx = global_index >> shift; + matrix_leaves.push((matrix.tag, leaf_fn(m_idx, local_idx))); } let mut siblings: Vec = Vec::with_capacity(depth); @@ -945,6 +969,48 @@ mod tests { assert_eq!(b.add_matrix(tag, bad), Err(MmcsError::NotPowerOfTwo)); } + #[test] + fn streaming_open_with_leaves_round_trips_against_one_shot() { + // Lambda-vm topology built two ways: one-shot builds a fully- + // populated Mmcs whose `open` works directly; streaming builds an + // empty-leaves Mmcs whose `open_with_leaves` must produce the + // same opening when fed the same chip leaves. + let inputs = vec![ + make_matrix(0x01, 8), + make_matrix(0x02, 8), + make_matrix(0x03, 8), + make_matrix(0x10, 4), + make_matrix(0x11, 4), + make_matrix(0xF0, 1), + ]; + let oneshot = build(inputs.clone()); + let stream = build_streaming(spec_sorted(inputs.clone())); + assert_eq!(*oneshot.root(), *stream.root()); + assert_eq!(oneshot.spec(), stream.spec()); + + let sorted = spec_sorted(inputs); + let leaves_by_tag: std::collections::HashMap> = + sorted.iter().map(|(t, l)| (*t, l.clone())).collect(); + let spec = stream.spec(); + + for global_index in 0..8 { + let from_oneshot = oneshot.open(global_index).expect("oneshot open"); + let from_stream = stream + .open_with_leaves(global_index, |m_idx, local_idx| { + let tag = spec[m_idx].0; + leaves_by_tag[&tag][local_idx] + }) + .expect("streaming open_with_leaves"); + assert_eq!(from_oneshot.global_index, from_stream.global_index); + assert_eq!(from_oneshot.siblings, from_stream.siblings); + assert_eq!(from_oneshot.matrix_leaves, from_stream.matrix_leaves); + assert!( + from_stream.verify::(stream.root(), &spec), + "streaming opening must verify" + ); + } + } + #[test] fn streaming_root_matches_oneshot_pure_same_height() { let inputs = vec![ From e854e15a9e1dd298e2acc08c8e2630d226cb1b00 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Thu, 28 May 2026 00:26:28 -0300 Subject: [PATCH 17/21] refactor(stark/trace): Arc-wrap LDETraceTable columns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foundation for the per-chunk MMCS migration. The chunk-grouped main and aux MMCSes need to rehash chunk-mate LDE rows on demand at open time (the prover-side input to `Mmcs::open_with_leaves`). Without Arc-shared columns, each chunk-mate's open would have to copy ~600 MB of column data per CPU LDE — a non-starter. Change: - `LDETraceTable.main_columns: Vec>>` → `Arc>>>`. - Same for `aux_columns`. - `from_columns(Vec, Vec, ...)` still works (wraps internally with `Arc::new`) — all existing call sites unaffected. - New `from_columns_arc(Arc, Arc, ...)` for callers (next commit) that already hold Arc-shared column data. - New `main_columns_arc()` / `aux_columns_arc()` cheap-clone accessors for the chunk-shared MMCS open helpers. - `into_columns()` now returns `(Arc, Arc)` instead of `(Vec, Vec)`. No internal caller uses the old signature outside `LDETraceTable` construction (which uses Arc-aware path); external uses would have to migrate, but searching the workspace shows no such callers. No protocol change. No proof-format change. No memory cost: a single `Arc::new` on construction; deref is free; column reads via `[col][row]` still go through `Vec` Deref. Existing 151/151 stark tests pass without edits. --- crypto/stark/src/trace.rs | 58 ++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/crypto/stark/src/trace.rs b/crypto/stark/src/trace.rs index 834ffdcda..f81e1496b 100644 --- a/crypto/stark/src/trace.rs +++ b/crypto/stark/src/trace.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use crate::domain::{Domain, DomainConstants}; use crate::table::Table; #[cfg(test)] @@ -216,8 +218,13 @@ where E: IsField, F: IsSubFieldOf + IsField, { - pub(crate) main_columns: Vec>>, - pub(crate) aux_columns: Vec>>, + /// LDE columns for the main trace, Arc-wrapped so chunk-mate tables + /// can share access without copying the large column data — needed by + /// the per-chunk MMCS open path which rehashes chunk-mate rows on + /// demand. Read-only after construction. + pub(crate) main_columns: Arc>>>, + /// Same shape for aux columns. + pub(crate) aux_columns: Arc>>>, pub(crate) lde_step_size: usize, pub(crate) blowup_factor: usize, } @@ -227,16 +234,35 @@ where E: IsField, F: IsSubFieldOf, { - /// Creates a column-major LDETraceTable by consuming column vectors directly. - /// No transpose is performed — columns are stored as-is. + /// Creates a column-major LDETraceTable by consuming column vectors + /// directly. Wraps each column slice in an `Arc` so the resulting + /// table can be cheaply shared across threads and per-chunk open + /// helpers. pub fn from_columns( main_columns: Vec>>, aux_columns: Vec>>, trace_step_size: usize, blowup_factor: usize, ) -> Self { - let lde_step_size = trace_step_size * blowup_factor; + Self::from_columns_arc( + Arc::new(main_columns), + Arc::new(aux_columns), + trace_step_size, + blowup_factor, + ) + } + /// Creates an `LDETraceTable` from already-`Arc`-wrapped column data. + /// Useful when the same column data is being shared with other + /// consumers (e.g. a per-chunk MMCS open context) and the caller + /// wants to avoid re-allocating the Arc. + pub fn from_columns_arc( + main_columns: Arc>>>, + aux_columns: Arc>>>, + trace_step_size: usize, + blowup_factor: usize, + ) -> Self { + let lde_step_size = trace_step_size * blowup_factor; Self { main_columns, aux_columns, @@ -245,12 +271,30 @@ where } } - /// Consume self and return the owned column vectors. + /// Consume self and return the Arc-wrapped column vectors. Callers + /// that need to mutate or destructure should clone the inner Vecs. #[allow(clippy::type_complexity)] - pub fn into_columns(self) -> (Vec>>, Vec>>) { + pub fn into_columns( + self, + ) -> ( + Arc>>>, + Arc>>>, + ) { (self.main_columns, self.aux_columns) } + /// Cheap clone of the underlying main-column Arc. Used by per-chunk + /// MMCS open helpers that need read-only shared access without + /// owning a copy. + pub fn main_columns_arc(&self) -> Arc>>> { + Arc::clone(&self.main_columns) + } + + /// Cheap clone of the underlying aux-column Arc. See [`main_columns_arc`]. + pub fn aux_columns_arc(&self) -> Arc>>> { + Arc::clone(&self.aux_columns) + } + pub fn num_main_cols(&self) -> usize { self.main_columns.len() } From b16218083e0bb8b178bc11a66c5e31de2a2fd995 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Thu, 28 May 2026 11:03:53 -0300 Subject: [PATCH 18/21] =?UTF-8?q?feat(stark/mmcs):=20per-chunk=20MMCS=20?= =?UTF-8?q?=E2=80=94=20group=20K=20tables=20per=20MMCS,=20stream=20within?= =?UTF-8?q?=20chunk?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactors the main + aux MMCSes from one-global to per-chunk: each parallel chunk of K = `table_parallelism()` tables gets its own streaming MMCS, with chunk-mate LDEs Arc-shared so the per-query open path can rehash chunk-mate rows on demand. Final piece of the streaming plan (after `StreamingMmcsBuilder` and `Mmcs::open_with_leaves`). # Why per-chunk? The streaming MMCS drops per-chip leaves at build time, so `Mmcs::open` needs to recompute leaves on the fly via `open_with_leaves`. That closure must produce leaves for every matrix in the MMCS spec — not just the current table — because `MmcsOpening::verify` walks all max-height matrices to reconstruct layer-0. Per-chunk grouping bounds the cross-table LDE access to the K tables a chunk owns (already kept alive together by the existing parallel R2-R4 loop), avoiding the alternatives of (a) global Arc>> threaded through every fork, or (b) reordering iota sampling earlier. # Proof format `MultiProof`: - `main_mmcs_root: Commitment` → `main_mmcs_roots: Vec>` - `main_mmcs_spec: Vec<(...)>` → `main_mmcs_specs: Vec>` - `aux_mmcs_root: Option` → `aux_mmcs_roots: Vec>` - `aux_mmcs_spec: Vec<(...)>` → `aux_mmcs_specs: Vec>` - + `chunk_size: u32` (pinned `table_parallelism()` so the verifier chunks the AIR slice the same way the prover did). `None` entries in the *_roots Vecs mark chunks with no MMCS-eligible tables (all-preprocessed for main, no-aux for aux). Per-query openings shrink: each carries ≤K matrix_leaves instead of N. # Phase A / Phase C absorb order (prover + verifier match exactly) Per chunk in chunk order: for each table in spec order: - absorb its preprocessed root (preprocessed only) - absorb its per-table multiplicities root (preprocessed only) - absorb that chunk's main MMCS root (Some) or skip (None) After Phase A → sample LogUp challenges → Phase C aux mirrors main: for each chunk in chunk order: - absorb that chunk's aux MMCS root (Some) or skip (None) Then fork per-table → per-table table_contribution → rounds 2-4. # Data plumbing `MainCommit::Shared` now carries `Arc>` + `chunk_idx` instead of an `Arc` directly. The context holds the chunk's MMCS + Arc-cloned LDE columns for the chunk-mates in MMCS spec sort order. Aux mirrors via `ChunkAuxMmcsContext`. `Lde { main, aux }` columns are now `Arc>>` (built on B1's Arc-wrapped `LDETraceTable`). Each table's `Round1.lde_trace` shares the same Arc as the chunk context — no duplication. # Open path `open_deep_composition_poly` dispatches on the chunk context's MMCS via `mmcs.open_with_leaves`, with a closure that rehashes chunk-mate rows via the new `rehash_main_chip_leaf` / `rehash_aux_chip_leaf` helpers (read from chunk-shared LDE columns + matrix tag, hash with the appropriate `LEAF_DOMAIN_TAG_*`). # Verifier `multi_verify` reads `chunk_size` from the proof, walks chunks of the AIR slice, validates each chunk's expected spec against the supplied one, and absorbs roots in chunk order. `verify_rounds_2_to_4` and `verify_main_mmcs_pair` take `main_mmcs_root: Option<&Commitment>` (and similarly aux): `Shared` opening with `None` root → reject. # Tests - Existing main + aux soundness suites adapted to the per-chunk shape via `first_populated_main_chunk` / `first_populated_aux_chunk` helpers + `proof.main_mmcs_roots[chunk_idx]` field access. - New soundness test `tampered_chunk_size_rejected`: pinned chunk_size mismatch must be rejected (Vec length cross-check fires). - bin/cli/proof-size breakdown updated: now reports `main_mmcs_roots / main_mmcs_specs / aux_mmcs_roots / aux_mmcs_specs / chunk_size` as separate sections (multi-proof header), drops the obsolete `per_table_aux_merkle_root` and pre-Vec `main_mmcs_root/spec` rows. Results: 152/152 stark tests green (151 before + new `tampered_chunk_size_rejected`); 27/27 crypto mmcs tests green; 3/3 cli unit tests green; lambda-vm-prover bitwise (preprocessed-path) + non-ELF tests pass; the 77 prove_elfs failures are the same pre-existing `UnknownSyscall(5)` executor bug present on main. --- bin/cli/src/main.rs | 29 +- crypto/stark/src/proof/stark.rs | 52 +- crypto/stark/src/prover.rs | 556 +++++++++++++----- .../src/tests/mmcs_aux_soundness_tests.rs | 43 +- .../stark/src/tests/mmcs_soundness_tests.rs | 68 ++- crypto/stark/src/verifier.rs | 230 +++++--- prover/src/lib.rs | 29 +- 7 files changed, 697 insertions(+), 310 deletions(-) diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index 9e4c95ad4..dd65466b4 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -690,8 +690,11 @@ fn cmd_proof_size( let total = ser_len(&vm_proof); let multi_proof_bytes = ser_len(&vm_proof.proof); - let main_mmcs_root_bytes = ser_len(&vm_proof.proof.main_mmcs_root); - let main_mmcs_spec_bytes = ser_len(&vm_proof.proof.main_mmcs_spec); + let main_mmcs_roots_bytes = ser_len(&vm_proof.proof.main_mmcs_roots); + let main_mmcs_specs_bytes = ser_len(&vm_proof.proof.main_mmcs_specs); + let aux_mmcs_roots_bytes = ser_len(&vm_proof.proof.aux_mmcs_roots); + let aux_mmcs_specs_bytes = ser_len(&vm_proof.proof.aux_mmcs_specs); + let chunk_size_bytes = ser_len(&vm_proof.proof.chunk_size); // Sum per-section across every sub-proof so a single number captures the // contribution of, e.g., "all FRI query lists across all tables". @@ -704,14 +707,12 @@ fn cmd_proof_size( let mut s_trace_ood = 0usize; let mut s_composition_ood = 0usize; let mut s_per_table_main_root = 0usize; - let mut s_aux_root = 0usize; let mut s_precomputed_root = 0usize; let mut s_bus_public_inputs = 0usize; let s_other; for proof in &vm_proof.proof.proofs { s_per_table_main_root += ser_len(&proof.lde_trace_main_merkle_root); - s_aux_root += ser_len(&proof.lde_trace_aux_merkle_root); s_precomputed_root += ser_len(&proof.lde_trace_precomputed_merkle_root); s_trace_ood += ser_len(&proof.trace_ood_evaluations); s_composition_ood += ser_len(&proof.composition_poly_parts_ood_evaluation); @@ -730,8 +731,11 @@ fn cmd_proof_size( // Anything not captured above (composition_poly_root, fri_last_value, // nonce, public_inputs, trace_length, headers...). Calculate as the // bundle delta so the breakdown still sums to ~total. - let accounted = main_mmcs_root_bytes - + main_mmcs_spec_bytes + let accounted = main_mmcs_roots_bytes + + main_mmcs_specs_bytes + + aux_mmcs_roots_bytes + + aux_mmcs_specs_bytes + + chunk_size_bytes + s_main_trace_openings + s_precomputed_trace_openings + s_aux_trace_openings @@ -741,17 +745,18 @@ fn cmd_proof_size( + s_trace_ood + s_composition_ood + s_per_table_main_root - + s_aux_root + s_precomputed_root + s_bus_public_inputs; s_other = multi_proof_bytes.saturating_sub(accounted); let entries: Vec = vec![ - ProofSizeEntry { section: "main_mmcs_root".into(), bytes: main_mmcs_root_bytes }, - ProofSizeEntry { section: "main_mmcs_spec".into(), bytes: main_mmcs_spec_bytes }, + ProofSizeEntry { section: "main_mmcs_roots (per-chunk)".into(), bytes: main_mmcs_roots_bytes }, + ProofSizeEntry { section: "main_mmcs_specs (per-chunk)".into(), bytes: main_mmcs_specs_bytes }, + ProofSizeEntry { section: "aux_mmcs_roots (per-chunk)".into(), bytes: aux_mmcs_roots_bytes }, + ProofSizeEntry { section: "aux_mmcs_specs (per-chunk)".into(), bytes: aux_mmcs_specs_bytes }, + ProofSizeEntry { section: "chunk_size".into(), bytes: chunk_size_bytes }, ProofSizeEntry { section: "per_table_main_merkle_root (preprocessed)".into(), bytes: s_per_table_main_root }, ProofSizeEntry { section: "per_table_precomputed_merkle_root".into(), bytes: s_precomputed_root }, - ProofSizeEntry { section: "per_table_aux_merkle_root".into(), bytes: s_aux_root }, ProofSizeEntry { section: "deep_poly_openings.main_trace_polys".into(), bytes: s_main_trace_openings }, ProofSizeEntry { section: "deep_poly_openings.precomputed_trace_polys".into(), bytes: s_precomputed_trace_openings }, ProofSizeEntry { section: "deep_poly_openings.aux_trace_polys".into(), bytes: s_aux_trace_openings }, @@ -770,7 +775,7 @@ fn cmd_proof_size( total_vm_proof_bytes: total, multi_proof_bytes, sub_proof_count: vm_proof.proof.proofs.len(), - main_mmcs_spec_entries: vm_proof.proof.main_mmcs_spec.len(), + main_mmcs_spec_entries: vm_proof.proof.main_mmcs_specs.iter().map(|s| s.len()).sum::(), sections: entries.clone(), }; match serde_json::to_string_pretty(&report) { @@ -787,7 +792,7 @@ fn cmd_proof_size( println!("Total VmProof: {:>10} bytes", total); println!("MultiProof only: {:>10} bytes", multi_proof_bytes); println!("Sub-proofs: {:>10}", vm_proof.proof.proofs.len()); - println!("MMCS spec entries: {:>10}", vm_proof.proof.main_mmcs_spec.len()); + println!("MMCS spec entries: {:>10}", vm_proof.proof.main_mmcs_specs.iter().map(|s| s.len()).sum::()); println!(); println!("{:<48}{:>14}{:>10}", "section", "bytes", "% of total"); println!("{}", "-".repeat(72)); diff --git a/crypto/stark/src/proof/stark.rs b/crypto/stark/src/proof/stark.rs index 57b28f75c..cc69f7bf0 100644 --- a/crypto/stark/src/proof/stark.rs +++ b/crypto/stark/src/proof/stark.rs @@ -146,26 +146,46 @@ pub struct StarkProof, E: IsField, PI> { /// Used for multi-table proving where tables are linked via bus (LogUp). /// Returned by `Prover::multi_prove` and verified by `Verifier::multi_verify`. /// -/// Non-preprocessed tables share a single main-trace MMCS authenticated by -/// `main_mmcs_root`; `main_mmcs_spec` lists `(MatrixTag, padded_height)` -/// per committed table in the MMCS sort order. Preprocessed tables stay -/// out of the main MMCS — each carries its own per-table Merkle root in +/// Non-preprocessed tables in each chunk share a main-trace MMCS +/// authenticated by `main_mmcs_roots[chunk_idx]`. Tables are grouped into +/// chunks of `chunk_size` (the prover's `table_parallelism()` at proving +/// time, pinned in the proof so the verifier chunks the AIR slice the +/// same way). Per-chunk grouping keeps openings small (at most K matrix_leaves +/// per opening instead of N) and bounds the streaming MMCS build to one +/// chunk's K LDEs at a time. Preprocessed tables stay out of any main +/// MMCS; each carries its own per-table Merkle root in /// `StarkProof::lde_trace_main_merkle_root` plus the AIR-pinned -/// precomputed root. Both groups' roots are absorbed in spec-fixed order -/// during Phase A. +/// precomputed root. /// -/// Aux traces (only present for AIRs with LogUp interactions) share a -/// SECOND MMCS authenticated by `aux_mmcs_root`; `aux_mmcs_spec` lists -/// `(MatrixTag, padded_height)` for the subset of tables that contribute -/// aux. `aux_mmcs_root` is `None` when no table in the multi-proof has an -/// aux trace. Domain-separated from the main MMCS via `LEAF_DOMAIN_TAG_AUX` -/// so that no aux opening can authenticate a main leaf (or vice versa). +/// Phase A absorb order: for each table in spec order, absorb its +/// preprocessed root + per-table multiplicities root (preprocessed only); +/// after each chunk, absorb that chunk's main MMCS root (`Some`) or skip +/// (`None`, when the chunk has no non-preprocessed tables). +/// +/// Aux traces mirror the same chunk grouping. `aux_mmcs_roots[chunk_idx]` +/// is `None` when no table in that chunk has an aux trace. Aux MMCS +/// leaves are domain-separated from main via `LEAF_DOMAIN_TAG_AUX`. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound = "PI: serde::Serialize + serde::de::DeserializeOwned")] pub struct MultiProof, E: IsField, PI> { pub proofs: Vec>, - pub main_mmcs_root: Commitment, - pub main_mmcs_spec: Vec<(MatrixTag, usize)>, - pub aux_mmcs_root: Option, - pub aux_mmcs_spec: Vec<(MatrixTag, usize)>, + /// Per-chunk main MMCS roots in chunk order. `None` for chunks whose + /// tables are all preprocessed (no main MMCS exists for that chunk). + pub main_mmcs_roots: Vec>, + /// Per-chunk MMCS specs for the main trace, parallel to + /// `main_mmcs_roots`. Empty inner Vec when the corresponding root is + /// `None`. Each non-empty Vec lists `(MatrixTag, padded_height)` for + /// the non-preprocessed tables in that chunk in MMCS sort order + /// (height desc, tag asc). + pub main_mmcs_specs: Vec>, + /// Per-chunk aux MMCS roots. `None` for chunks with no has_aux_trace + /// tables. Parallel to `main_mmcs_roots`. + pub aux_mmcs_roots: Vec>, + /// Per-chunk aux MMCS specs. Empty inner Vec when the corresponding + /// `aux_mmcs_roots[i]` is `None`. + pub aux_mmcs_specs: Vec>, + /// Pinned chunk size. Equals the prover's `table_parallelism()` at + /// proving time. The verifier uses this to chunk the AIR slice into + /// the same per-chunk grouping the prover used. + pub chunk_size: u32, } diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index a5e2b8142..bf9a5f03e 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -32,7 +32,7 @@ use crate::proof::stark::{DeepPolynomialOpenings, MainTraceOpening, PolynomialOp use crate::storage_mode::StorageMode; use crate::table::Table; use crate::trace::LDETraceTable; -use crypto::merkle_tree::mmcs::{MatrixTag, Mmcs, MmcsBuilder, MmcsError}; +use crypto::merkle_tree::mmcs::{MatrixTag, Mmcs, MmcsError, StreamingMmcsBuilder}; use super::config::{BatchedMerkleTree, BatchedMerkleTreeBackend, Commitment}; use super::constraints::evaluator::ConstraintEvaluator; @@ -81,26 +81,43 @@ pub enum ProvingError { DiskSpill(String), } -/// Per-table commitment artifacts for the main trace under the shared -/// MMCS protocol. The `mmcs` Arc is the SAME instance for every table in -/// the multi-proof — Phase A builds it once. +/// Per-chunk main MMCS context. Shared across every non-preprocessed +/// table in a chunk: the chunk's MMCS Arc + Arc-cloned LDE columns for +/// chunk-mate non-preprocessed tables in MMCS-spec sort order. The +/// per-query open path uses this to rehash chunk-mate rows on demand +/// (the streaming MMCS dropped the per-chip leaf arrays at build time). +pub(crate) struct ChunkMainMmcsContext +where + FieldElement: AsBytes, +{ + /// Chunk-scoped MMCS (built once per chunk in Phase A). + pub(crate) mmcs: Arc>>, + /// Arc-cloned LDE columns for the non-preprocessed chunk-mates, + /// indexed in MMCS spec sort order (parallel to `mmcs.spec()`). + /// Open path closures look up `lde_columns_in_spec_order[m_idx]` to + /// rehash the row at the queried local position. + pub(crate) lde_columns_in_spec_order: Vec>>>>, +} + +/// Per-table commitment artifacts for the main trace. /// -/// `padded_height` is this table's LDE height (a power of two), needed to -/// translate the table's local FRI iota into a global MMCS index when -/// opening (see `open_deep_composition_poly`). +/// `Shared` tables borrow a per-chunk MMCS context (Arc) and remember +/// their chunk index so the verifier can look up the matching root + +/// spec in `MultiProof::main_mmcs_roots[chunk_idx]`. pub(crate) enum MainCommit where FieldElement: AsBytes, { - /// Non-preprocessed table: committed under the shared MMCS. + /// Non-preprocessed table: committed under the chunk's MMCS. Shared { - mmcs: Arc>>, + chunk_ctx: Arc>, + chunk_idx: usize, tag: MatrixTag, /// Padded height (== LDE row count); needed to translate a local - /// FRI iota into a global MMCS index. + /// FRI iota into a global MMCS index inside this chunk's MMCS. padded_height: usize, }, - /// Preprocessed table: two per-table Merkle trees, NOT in the MMCS. + /// Preprocessed table: two per-table Merkle trees, NOT in any MMCS. Preprocessed { multiplicities_tree: Arc>, multiplicities_root: Commitment, @@ -137,11 +154,13 @@ where fn share(&self) -> Self { match self { Self::Shared { - mmcs, + chunk_ctx, + chunk_idx, tag, padded_height, } => Self::Shared { - mmcs: Arc::clone(mmcs), + chunk_ctx: Arc::clone(chunk_ctx), + chunk_idx: *chunk_idx, tag: *tag, padded_height: *padded_height, }, @@ -207,16 +226,26 @@ where } } -/// Per-table aux-trace commitment under the shared aux MMCS. -/// Mirror of [`MainCommit::Shared`]: the `mmcs` Arc is shared across every -/// table that contributes an aux trace; `tag` + `padded_height` identify -/// this table's slot inside that MMCS. +/// Per-chunk aux MMCS context. Sister of [`ChunkMainMmcsContext`] for +/// the aux trace. +pub(crate) struct ChunkAuxMmcsContext +where + FieldElement: AsBytes, +{ + pub(crate) mmcs: Arc>>, + /// Arc-cloned aux LDE columns for chunk-mates with aux, in MMCS + /// spec sort order. + pub(crate) lde_columns_in_spec_order: Vec>>>>, +} + +/// Per-table aux-trace commitment under a chunk's aux MMCS. pub(crate) enum AuxCommit where FieldElement: AsBytes, { Shared { - mmcs: Arc>>, + chunk_ctx: Arc>, + chunk_idx: usize, tag: MatrixTag, padded_height: usize, }, @@ -229,11 +258,13 @@ where fn share(&self) -> Self { match self { Self::Shared { - mmcs, + chunk_ctx, + chunk_idx, tag, padded_height, } => Self::Shared { - mmcs: Arc::clone(mmcs), + chunk_ctx: Arc::clone(chunk_ctx), + chunk_idx: *chunk_idx, tag: *tag, padded_height: *padded_height, }, @@ -290,13 +321,16 @@ where bus_public_inputs: Option>, } -/// LDE columns for main (Phase A) and auxiliary (Phase C) traces, consumed by value in Phase D. +/// LDE columns for main (Phase A) and auxiliary (Phase C) traces. +/// Arc-wrapped so per-chunk MMCS contexts can hold cheap clones for the +/// open path while the originating table's `Round1.lde_trace` retains +/// the same data via Arc share (no duplication). /// -/// Memory trade-off: all N tables' LDE columns are live simultaneously between Phase A/C -/// and Phase D (O(N × cols × lde_size)). +/// Memory trade-off: all N tables' LDE columns are live simultaneously +/// between Phase A/C and Phase D (O(N × cols × lde_size)). struct Lde { - main: Vec>>, - aux: Vec>>, + main: Arc>>>, + aux: Arc>>>, } impl Round1Commitments @@ -307,7 +341,9 @@ where FieldElement: AsBytes, { /// Build a `Round1` by consuming a `Lde` and borrowing commitment data. - /// The `share` calls are cheap — only bump Arc refcounts. + /// The `share` calls are cheap — only bump Arc refcounts. The LDE + /// columns are also Arc-shared (with this chunk's MMCS contexts) so + /// the open path can rehash chunk-mate rows without copying. fn build_round1( &self, lde: Lde, @@ -315,7 +351,12 @@ where blowup_factor: usize, ) -> Round1 { Round1 { - lde_trace: LDETraceTable::from_columns(lde.main, lde.aux, step_size, blowup_factor), + lde_trace: LDETraceTable::from_columns_arc( + lde.main, + lde.aux, + step_size, + blowup_factor, + ), main: self.main.share(), aux: self.aux.as_ref().map(AuxCommit::share), rap_challenges: self.rap_challenges.clone(), @@ -512,17 +553,73 @@ fn map_mmcs_err(e: MmcsError) -> ProvingError { ProvingError::WrongParameter(format!("MMCS: {e:?}")) } -/// Build the unified main-trace MMCS from the per-table Phase A outputs. -/// Returns the root, the (tag, padded_height) spec, and the shared Arc that -/// every table's `MainCommit` borrows. +/// Rehash a single main-trace LDE row to its tagged leaf digest. Used by +/// the per-chunk open path: when `Mmcs::open_with_leaves` walks the chunk +/// MMCS spec to gather matrix_leaves at a queried position, this helper +/// recomputes each chunk-mate's leaf on demand from the chunk-shared LDE +/// columns. Mirrors what the verifier computes via `hash_tagged_row`. +pub fn rehash_main_chip_leaf( + tag: MatrixTag, + columns: &Arc>>>, + local_idx: usize, +) -> Commitment +where + F: IsField, + FieldElement: AsBytes + ByteConversion, +{ + let num_rows = columns + .first() + .map(|c| c.len()) + .expect("non-empty LDE columns"); + let br_idx = reverse_index(local_idx, num_rows as u64); + let byte_len = as ByteConversion>::BYTE_LEN; + let mut buf = vec![0u8; columns.len() * byte_len]; + for (col_idx, col) in columns.iter().enumerate() { + col[br_idx].write_bytes_be(&mut buf[col_idx * byte_len..(col_idx + 1) * byte_len]); + } + crate::mmcs_leaf::hash_tagged_row_bytes(tag, &buf) +} + +/// Aux-trace counterpart of [`rehash_main_chip_leaf`] using the AUX +/// domain separator so aux/main leaves cannot collide. +pub fn rehash_aux_chip_leaf( + tag: MatrixTag, + columns: &Arc>>>, + local_idx: usize, +) -> Commitment +where + E: IsField, + FieldElement: AsBytes + ByteConversion, +{ + let num_rows = columns + .first() + .map(|c| c.len()) + .expect("non-empty aux LDE columns"); + let br_idx = reverse_index(local_idx, num_rows as u64); + let byte_len = as ByteConversion>::BYTE_LEN; + let mut buf = vec![0u8; columns.len() * byte_len]; + for (col_idx, col) in columns.iter().enumerate() { + col[br_idx].write_bytes_be(&mut buf[col_idx * byte_len..(col_idx + 1) * byte_len]); + } + crate::mmcs_leaf::hash_tagged_row_bytes_aux(tag, &buf) +} + +/// Build a CHUNK-scoped main MMCS via [`StreamingMmcsBuilder`]. Consumes +/// the Shared phase-A outputs (drops their per-chip leaves once folded), +/// returns the chunk root + spec + an `Arc` that +/// every Shared table in the chunk borrows. +/// +/// Returns `None` for the root/context when the chunk has no Shared +/// tables (entire chunk is preprocessed). #[allow(clippy::type_complexity)] -fn build_main_mmcs( - outputs: &[MainPhaseAOutput], +fn build_chunk_main_mmcs( + shared_outputs: Vec<(MatrixTag, Vec, usize)>, + chunk_lde_for_shared: Vec<(MatrixTag, Arc>>>)>, ) -> Result< ( - Commitment, + Option, Vec<(MatrixTag, usize)>, - Arc>>, + Option>>, ), ProvingError, > @@ -530,23 +627,41 @@ where F: IsField + Send + Sync, FieldElement: AsBytes + Send + Sync, { - let mut builder: MmcsBuilder> = MmcsBuilder::new(); - for output in outputs { - if let MainPhaseAOutput::Shared { - tag, - leaves, - padded_height: _, - } = output - { - builder - .add_matrix(*tag, leaves.clone()) - .map_err(map_mmcs_err)?; - } + if shared_outputs.is_empty() { + return Ok((None, Vec::new(), None)); + } + debug_assert_eq!(shared_outputs.len(), chunk_lde_for_shared.len()); + + // Sort both vectors into MMCS spec order: height desc, tag asc. + let mut shared_outputs = shared_outputs; + shared_outputs.sort_by(|a, b| b.2.cmp(&a.2).then(a.0.cmp(&b.0))); + let lde_by_tag: std::collections::BTreeMap>>>> = + chunk_lde_for_shared.into_iter().collect(); + + let mut builder: StreamingMmcsBuilder> = + StreamingMmcsBuilder::new(); + let mut lde_columns_in_spec_order: Vec>>>> = + Vec::with_capacity(shared_outputs.len()); + for (tag, leaves, _padded_height) in shared_outputs { + let lde = lde_by_tag + .get(&tag) + .ok_or_else(|| { + ProvingError::WrongParameter(format!( + "missing chunk LDE for tag {tag:?} during chunk MMCS build" + )) + })? + .clone(); + lde_columns_in_spec_order.push(lde); + builder.add_matrix(tag, leaves).map_err(map_mmcs_err)?; } let mmcs = builder.finalize().map_err(map_mmcs_err)?; let root = *mmcs.root(); let spec = mmcs.spec(); - Ok((root, spec, Arc::new(mmcs))) + let ctx = Arc::new(ChunkMainMmcsContext { + mmcs: Arc::new(mmcs), + lde_columns_in_spec_order, + }); + Ok((Some(root), spec, Some(ctx))) } /// Tagged per-row leaf digest for the AUX-trace MMCS. Mirror of @@ -591,17 +706,18 @@ where } } -/// Build the shared AUX-trace MMCS from per-table Phase-C outputs (only -/// tables that have an aux trace participate). Returns `None`/`empty spec` -/// when no table contributes aux. +/// Build a CHUNK-scoped aux MMCS via [`StreamingMmcsBuilder`]. Sister of +/// [`build_chunk_main_mmcs`] for the aux trace. Returns `None` for root +/// and context when no chunk-mate has an aux trace. #[allow(clippy::type_complexity)] -fn build_aux_mmcs( - outputs: &[Option>], +fn build_chunk_aux_mmcs( + aux_outputs: Vec<(MatrixTag, Vec, usize)>, + chunk_aux_lde_for_shared: Vec<(MatrixTag, Arc>>>)>, ) -> Result< ( Option, Vec<(MatrixTag, usize)>, - Option>>>, + Option>>, ), ProvingError, > @@ -609,20 +725,40 @@ where E: IsField + Send + Sync, FieldElement: AsBytes + Send + Sync, { - let any = outputs.iter().any(|o| o.is_some()); - if !any { + if aux_outputs.is_empty() { return Ok((None, Vec::new(), None)); } - let mut builder: MmcsBuilder> = MmcsBuilder::new(); - for out in outputs.iter().flatten() { - builder - .add_matrix(out.tag, out.leaves.clone()) - .map_err(map_mmcs_err)?; + debug_assert_eq!(aux_outputs.len(), chunk_aux_lde_for_shared.len()); + + let mut aux_outputs = aux_outputs; + aux_outputs.sort_by(|a, b| b.2.cmp(&a.2).then(a.0.cmp(&b.0))); + let lde_by_tag: std::collections::BTreeMap>>>> = + chunk_aux_lde_for_shared.into_iter().collect(); + + let mut builder: StreamingMmcsBuilder> = + StreamingMmcsBuilder::new(); + let mut lde_columns_in_spec_order: Vec>>>> = + Vec::with_capacity(aux_outputs.len()); + for (tag, leaves, _padded_height) in aux_outputs { + let lde = lde_by_tag + .get(&tag) + .ok_or_else(|| { + ProvingError::WrongParameter(format!( + "missing chunk aux LDE for tag {tag:?} during chunk MMCS build" + )) + })? + .clone(); + lde_columns_in_spec_order.push(lde); + builder.add_matrix(tag, leaves).map_err(map_mmcs_err)?; } let mmcs = builder.finalize().map_err(map_mmcs_err)?; let root = *mmcs.root(); let spec = mmcs.spec(); - Ok((Some(root), spec, Some(Arc::new(mmcs)))) + let ctx = Arc::new(ChunkAuxMmcsContext { + mmcs: Arc::new(mmcs), + lde_columns_in_spec_order, + }); + Ok((Some(root), spec, Some(ctx))) } /// Tagged per-row leaf digest for the main-trace MMCS. @@ -987,7 +1123,14 @@ pub trait IsStarkProver< Vec::new() }; - Ok(commitment.build_round1(Lde { main, aux }, air.step_size(), domain.blowup_factor)) + Ok(commitment.build_round1( + Lde { + main: Arc::new(main), + aux: Arc::new(aux), + }, + air.step_size(), + domain.blowup_factor, + )) } /// Reconstruct Round1 for every table, print the bus balance report, and @@ -1659,7 +1802,9 @@ pub trait IsStarkProver< ); let aux_trace_polys = round_1_result.aux.as_ref().map(|aux| { - let AuxCommit::Shared { mmcs, padded_height, .. } = aux; + let AuxCommit::Shared { chunk_ctx, padded_height, .. } = aux; + let mmcs = &chunk_ctx.mmcs; + let lde_in_spec_order = &chunk_ctx.lde_columns_in_spec_order; let max_height = mmcs .spec() .first() @@ -1673,11 +1818,23 @@ pub trait IsStarkProver< let evaluations = lde_trace.gather_aux_row(reverse_index(primary, domain_size)); let evaluations_sym = lde_trace.gather_aux_row(reverse_index(sym, domain_size)); let mmcs_opening = mmcs - .open(primary << shift) - .expect("aux MMCS open: prover-side primary index in range"); + .open_with_leaves(primary << shift, |m_idx, local_idx| { + rehash_aux_chip_leaf::( + mmcs.spec()[m_idx].0, + &lde_in_spec_order[m_idx], + local_idx, + ) + }) + .expect("aux MMCS open_with_leaves: primary index in range"); let mmcs_opening_sym = mmcs - .open(sym << shift) - .expect("aux MMCS open: prover-side sym index in range"); + .open_with_leaves(sym << shift, |m_idx, local_idx| { + rehash_aux_chip_leaf::( + mmcs.spec()[m_idx].0, + &lde_in_spec_order[m_idx], + local_idx, + ) + }) + .expect("aux MMCS open_with_leaves: sym index in range"); crate::proof::stark::AuxTraceOpening::Mmcs { evaluations, evaluations_sym, @@ -1688,10 +1845,12 @@ pub trait IsStarkProver< let (main_trace_opening, precomputed_trace_opening) = match main_commit { MainCommit::Shared { - mmcs, + chunk_ctx, padded_height, .. } => { + let mmcs = &chunk_ctx.mmcs; + let lde_in_spec_order = &chunk_ctx.lde_columns_in_spec_order; let max_height = mmcs .spec() .first() @@ -1707,11 +1866,23 @@ pub trait IsStarkProver< let evaluations = lde_trace.gather_main_row(reverse_index(primary, domain_size)); let evaluations_sym = lde_trace.gather_main_row(reverse_index(sym, domain_size)); let mmcs_opening = mmcs - .open(primary << shift) - .expect("MMCS open: prover-side primary index in range"); + .open_with_leaves(primary << shift, |m_idx, local_idx| { + rehash_main_chip_leaf::( + mmcs.spec()[m_idx].0, + &lde_in_spec_order[m_idx], + local_idx, + ) + }) + .expect("main MMCS open_with_leaves: primary index in range"); let mmcs_opening_sym = mmcs - .open(sym << shift) - .expect("MMCS open: prover-side sym index in range"); + .open_with_leaves(sym << shift, |m_idx, local_idx| { + rehash_main_chip_leaf::( + mmcs.spec()[m_idx].0, + &lde_in_spec_order[m_idx], + local_idx, + ) + }) + .expect("main MMCS open_with_leaves: sym index in range"); let opening = MainTraceOpening::Mmcs { evaluations, evaluations_sym, @@ -1896,17 +2067,26 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let phase_start = Instant::now(); - let mut phase_a_outputs: Vec> = Vec::with_capacity(num_airs); - let mut main_ldes: Vec>>> = Vec::with_capacity(num_airs); + // Per-chunk MMCS: each chunk of K tables builds its own streaming + // MMCS, sharing chunk LDEs via Arc so per-query opens can rehash + // chunk-mate rows on demand. Phase A absorb order: per table in + // spec order, absorb preprocessed + main-tree roots (preprocessed + // only); after each chunk, absorb the chunk's MMCS root (`Some`) + // or skip when the chunk has no Shared tables (`None`). + let mut main_commits: Vec>> = (0..num_airs).map(|_| None).collect(); + let mut main_ldes: Vec>>>>> = + (0..num_airs).map(|_| None).collect(); + let mut main_mmcs_roots_per_chunk: Vec> = Vec::new(); + let mut main_mmcs_specs_per_chunk: Vec> = Vec::new(); for chunk_start in (0..num_airs).step_by(k) { let chunk_end = (chunk_start + k).min(num_airs); let chunk_range = chunk_start..chunk_end; #[cfg(feature = "parallel")] - let iter = chunk_range.into_par_iter(); + let iter = chunk_range.clone().into_par_iter(); #[cfg(not(feature = "parallel"))] - let iter = chunk_range; + let iter = chunk_range.clone(); let chunk_results: Vec> = iter .map(|idx| { @@ -1930,56 +2110,90 @@ pub trait IsStarkProver< }) .collect(); - // Sequential: per table, absorb its preprocessed root and then - // its own per-table multiplicities root (preprocessed only). The - // shared MMCS root is absorbed once after the loop. Order must - // match the verifier replay. - for result in chunk_results { - let (output, cached_main) = result?; + // Sequential: absorb per-table preprocessed + main-tree roots + // (preprocessed only) in order, then build this chunk's MMCS + // from the chunk's Shared outputs and absorb its root. + let mut chunk_shared_outputs: Vec<(MatrixTag, Vec, usize)> = Vec::new(); + let mut chunk_shared_ldes: Vec<(MatrixTag, Arc>>>)> = + Vec::new(); + let chunk_idx = main_mmcs_roots_per_chunk.len(); + let chunk_outputs: Vec<_> = chunk_results.into_iter().collect::>()?; + for (offset, (output, cached_main)) in chunk_outputs.into_iter().enumerate() { + let idx = chunk_start + offset; if let Some(ref pre_root) = output.precomputed_root() { transcript.append_bytes(pre_root); } if let Some(ref main_root) = output.main_tree_root() { transcript.append_bytes(main_root); } - phase_a_outputs.push(output); - main_ldes.push(cached_main); + let cached_main_arc = Arc::new(cached_main); + main_ldes[idx] = Some(Arc::clone(&cached_main_arc)); + match output { + MainPhaseAOutput::Shared { + tag, + leaves, + padded_height, + } => { + chunk_shared_outputs.push((tag, leaves, padded_height)); + chunk_shared_ldes.push((tag, cached_main_arc)); + // MainCommit::Shared placeholder filled in after chunk MMCS build. + main_commits[idx] = None; + } + MainPhaseAOutput::Preprocessed { + multiplicities_tree, + multiplicities_root, + precomputed_tree, + precomputed_root, + num_precomputed_cols, + } => { + main_commits[idx] = Some(MainCommit::Preprocessed { + multiplicities_tree, + multiplicities_root, + precomputed_tree, + precomputed_root, + num_precomputed_cols, + }); + } + } } - } - // Build the unified main-trace MMCS once over Shared (non-preprocessed) - // entries. Preprocessed tables stay out of the MMCS and keep their - // own per-table Merkle trees (already absorbed above). - let (main_mmcs_root, main_mmcs_spec, mmcs_arc) = - build_main_mmcs::(&phase_a_outputs)?; - transcript.append_bytes(&main_mmcs_root); + let (chunk_root, chunk_spec, chunk_ctx_opt) = + build_chunk_main_mmcs::(chunk_shared_outputs, chunk_shared_ldes)?; + if let Some(ref root) = chunk_root { + transcript.append_bytes(root); + } + main_mmcs_roots_per_chunk.push(chunk_root); + main_mmcs_specs_per_chunk.push(chunk_spec.clone()); + + // Fill in MainCommit::Shared for this chunk's Shared tables. + if let Some(chunk_ctx) = chunk_ctx_opt { + // chunk_spec is in MMCS sort order (height desc, tag asc). + // Use tag → padded_height lookup to populate Shared variants. + let height_by_tag: std::collections::BTreeMap = + chunk_spec.iter().copied().collect(); + for idx in chunk_range.clone() { + if main_commits[idx].is_none() { + let tag = main_tags[idx]; + if let Some(&padded_height) = height_by_tag.get(&tag) { + main_commits[idx] = Some(MainCommit::Shared { + chunk_ctx: Arc::clone(&chunk_ctx), + chunk_idx, + tag, + padded_height, + }); + } + } + } + } + } - let main_commits: Vec> = phase_a_outputs + let main_commits: Vec> = main_commits .into_iter() - .map(|o| match o { - MainPhaseAOutput::Shared { - tag, - padded_height, - leaves: _, - } => MainCommit::Shared { - mmcs: Arc::clone(&mmcs_arc), - tag, - padded_height, - }, - MainPhaseAOutput::Preprocessed { - multiplicities_tree, - multiplicities_root, - precomputed_tree, - precomputed_root, - num_precomputed_cols, - } => MainCommit::Preprocessed { - multiplicities_tree, - multiplicities_root, - precomputed_tree, - precomputed_root, - num_precomputed_cols, - }, - }) + .map(|c| c.expect("main commit populated for every table")) + .collect(); + let main_ldes: Vec>>>> = main_ldes + .into_iter() + .map(|l| l.expect("main LDE populated for every table")) .collect(); #[cfg(feature = "instruments")] @@ -2064,21 +2278,25 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let phase_start = Instant::now(); - // Per-table aux Phase-C outputs. `None` entries are tables with no - // aux trace and contribute neither leaves nor an MMCS slot. - let mut aux_outputs: Vec>> = - Vec::with_capacity(num_airs); - let mut aux_ldes: Vec>>> = + // Per-chunk aux MMCS: mirror of Phase A main, applied to the aux + // trace. Each chunk's aux MMCS root is absorbed into the SHARED + // transcript BEFORE per-table forking so every fork sees the + // same per-chunk aux binding identically. + let mut aux_commits: Vec>> = + (0..num_airs).map(|_| None).collect(); + let mut aux_ldes_arc: Vec>>>> = Vec::with_capacity(num_airs); + let mut aux_mmcs_roots_per_chunk: Vec> = Vec::new(); + let mut aux_mmcs_specs_per_chunk: Vec> = Vec::new(); for chunk_start in (0..num_airs).step_by(k) { let chunk_end = (chunk_start + k).min(num_airs); let chunk_range = chunk_start..chunk_end; #[cfg(feature = "parallel")] - let iter = chunk_range.into_par_iter(); + let iter = chunk_range.clone().into_par_iter(); #[cfg(not(feature = "parallel"))] - let iter = chunk_range; + let iter = chunk_range.clone(); let chunk_aux: Vec> = iter .map(|idx| { @@ -2126,26 +2344,60 @@ pub trait IsStarkProver< }) .collect(); - for result in chunk_aux { - let (output, cached_aux) = result?; - aux_outputs.push(output); - aux_ldes.push(cached_aux); + let chunk_idx = aux_mmcs_roots_per_chunk.len(); + let mut chunk_aux_outputs: Vec<(MatrixTag, Vec, usize)> = Vec::new(); + let mut chunk_aux_ldes: Vec<(MatrixTag, Arc>>>)> = + Vec::new(); + let chunk_outputs: Vec<_> = chunk_aux.into_iter().collect::>()?; + for (offset, (maybe_output, cached_aux)) in chunk_outputs.into_iter().enumerate() { + let idx = chunk_start + offset; + let cached_arc = Arc::new(cached_aux); + aux_ldes_arc.push(Arc::clone(&cached_arc)); + if let Some(out) = maybe_output { + let AuxPhaseCOutput { + tag, + leaves, + padded_height, + .. + } = out; + chunk_aux_outputs.push((tag, leaves, padded_height)); + chunk_aux_ldes.push((tag, cached_arc)); + aux_commits[idx] = None; // filled in after MMCS build + } else { + aux_commits[idx] = None; + } } - } - - // Build the shared aux MMCS over the non-None entries. Order is - // spec-fixed (matches `main_tags` order, filtered to has-aux). - let (aux_mmcs_root_opt, aux_mmcs_spec, aux_mmcs_arc) = - build_aux_mmcs::(&aux_outputs)?; - // Absorb the aux MMCS root into the SHARED transcript before - // forking — every table's fork inherits this binding identically. - if let Some(ref root) = aux_mmcs_root_opt { - transcript.append_bytes(root); + let (chunk_root, chunk_spec, chunk_ctx_opt) = + build_chunk_aux_mmcs::(chunk_aux_outputs, chunk_aux_ldes)?; + if let Some(ref root) = chunk_root { + transcript.append_bytes(root); + } + aux_mmcs_roots_per_chunk.push(chunk_root); + aux_mmcs_specs_per_chunk.push(chunk_spec.clone()); + + if let Some(chunk_ctx) = chunk_ctx_opt { + let height_by_tag: std::collections::BTreeMap = + chunk_spec.iter().copied().collect(); + for idx in chunk_range.clone() { + let (air, _, _) = &air_trace_pairs[idx]; + if air.has_aux_trace() { + let tag = main_tags[idx]; + if let Some(&padded_height) = height_by_tag.get(&tag) { + aux_commits[idx] = Some(AuxCommit::Shared { + chunk_ctx: Arc::clone(&chunk_ctx), + chunk_idx, + tag, + padded_height, + }); + } + } + } + } } // Pre-fork all transcripts (cheap, sequential — must match verifier ordering). - // Happens AFTER aux MMCS absorb so each fork inherits the binding. + // Happens AFTER all per-chunk aux MMCS roots have been absorbed. let mut table_transcripts: Vec<_> = (0..num_airs) .map(|idx| { let mut t = transcript.clone(); @@ -2156,28 +2408,13 @@ pub trait IsStarkProver< }) .collect(); - // Reassemble per-table aux commits from the shared MMCS Arc. - let aux_commits: Vec>> = aux_outputs - .into_iter() - .map(|o| { - o.map(|out| AuxCommit::Shared { - mmcs: Arc::clone( - aux_mmcs_arc - .as_ref() - .expect("MMCS Arc populated when at least one aux output present"), - ), - tag: out.tag, - padded_height: out.padded_height, - }) - }) - .collect(); #[allow(clippy::type_complexity)] let aux_results: Vec<( Option>, - Vec>>, + Arc>>>, )> = aux_commits .into_iter() - .zip(aux_ldes) + .zip(aux_ldes_arc) .collect(); // Build commitments and cached LDEs as separate vecs: @@ -2328,10 +2565,11 @@ pub trait IsStarkProver< Ok(MultiProof { proofs, - main_mmcs_root, - main_mmcs_spec, - aux_mmcs_root: aux_mmcs_root_opt, - aux_mmcs_spec, + main_mmcs_roots: main_mmcs_roots_per_chunk, + main_mmcs_specs: main_mmcs_specs_per_chunk, + aux_mmcs_roots: aux_mmcs_roots_per_chunk, + aux_mmcs_specs: aux_mmcs_specs_per_chunk, + chunk_size: k as u32, }) } diff --git a/crypto/stark/src/tests/mmcs_aux_soundness_tests.rs b/crypto/stark/src/tests/mmcs_aux_soundness_tests.rs index d01d4a924..cfa4828f4 100644 --- a/crypto/stark/src/tests/mmcs_aux_soundness_tests.rs +++ b/crypto/stark/src/tests/mmcs_aux_soundness_tests.rs @@ -120,11 +120,31 @@ fn first_aux_mmcs_opening_mut( .expect("baseline must have aux openings") } +/// First chunk index whose aux MMCS root is `Some`. +fn first_populated_aux_chunk(proof: &MultiProof>) -> usize { + proof + .aux_mmcs_roots + .iter() + .position(|r| r.is_some()) + .expect("at least one chunk must have an aux MMCS root in this baseline") +} + #[test_log::test] fn baseline_two_rap_tables_verify() { let (air_1, air_2, proof) = baseline_proof(); - assert!(proof.aux_mmcs_root.is_some(), "aux MMCS must be present"); - assert_eq!(proof.aux_mmcs_spec.len(), 2, "both AIRs contribute aux"); + assert!( + proof.aux_mmcs_roots.iter().any(|r| r.is_some()), + "at least one chunk's aux MMCS must be present" + ); + assert!( + proof + .aux_mmcs_specs + .iter() + .map(|s| s.len()) + .sum::() + == 2, + "both AIRs contribute aux" + ); let airs: Vec<&dyn AIR>> = vec![&air_1, &air_2]; assert!(verify(&airs, &proof), "baseline aux proof must verify"); @@ -135,7 +155,10 @@ fn tampered_aux_mmcs_root_rejected() { let (air_1, air_2, mut proof) = baseline_proof(); let airs: Vec<&dyn AIR>> = vec![&air_1, &air_2]; - let root = proof.aux_mmcs_root.as_mut().expect("baseline has root"); + let chunk_idx = first_populated_aux_chunk(&proof); + let root = proof.aux_mmcs_roots[chunk_idx] + .as_mut() + .expect("populated"); root[0] ^= 1; assert!(!verify(&airs, &proof)); } @@ -145,8 +168,12 @@ fn missing_aux_mmcs_root_rejected() { let (air_1, air_2, mut proof) = baseline_proof(); let airs: Vec<&dyn AIR>> = vec![&air_1, &air_2]; - proof.aux_mmcs_root = None; - assert!(!verify(&airs, &proof)); + let chunk_idx = first_populated_aux_chunk(&proof); + proof.aux_mmcs_roots[chunk_idx] = None; + assert!( + !verify(&airs, &proof), + "aux_mmcs_root=None while chunk has aux tables must be rejected" + ); } #[test_log::test] @@ -154,7 +181,8 @@ fn tampered_aux_mmcs_spec_height_rejected() { let (air_1, air_2, mut proof) = baseline_proof(); let airs: Vec<&dyn AIR>> = vec![&air_1, &air_2]; - proof.aux_mmcs_spec[0].1 /= 2; + let chunk_idx = first_populated_aux_chunk(&proof); + proof.aux_mmcs_specs[chunk_idx][0].1 /= 2; assert!(!verify(&airs, &proof)); } @@ -163,7 +191,8 @@ fn tampered_aux_mmcs_spec_tag_rejected() { let (air_1, air_2, mut proof) = baseline_proof(); let airs: Vec<&dyn AIR>> = vec![&air_1, &air_2]; - proof.aux_mmcs_spec[0].0 = MatrixTag::new([0xFF; 8]); + let chunk_idx = first_populated_aux_chunk(&proof); + proof.aux_mmcs_specs[chunk_idx][0].0 = MatrixTag::new([0xFF; 8]); assert!(!verify(&airs, &proof)); } diff --git a/crypto/stark/src/tests/mmcs_soundness_tests.rs b/crypto/stark/src/tests/mmcs_soundness_tests.rs index 0a690e085..ab0c8912f 100644 --- a/crypto/stark/src/tests/mmcs_soundness_tests.rs +++ b/crypto/stark/src/tests/mmcs_soundness_tests.rs @@ -26,11 +26,7 @@ type F = GoldilocksField; /// Build a baseline multi-proof over (DummyAIR, BitFlagsAIR). Both are /// non-preprocessed → every main opening is `MainTraceOpening::Mmcs`. #[allow(clippy::type_complexity)] -fn baseline_proof() -> ( - DummyAIR, - BitFlagsAIR, - MultiProof, -) { +fn baseline_proof() -> (DummyAIR, BitFlagsAIR, MultiProof) { let proof_options = ProofOptions::default_test_options(); let air_1 = DummyAIR::new(&proof_options); let air_2 = BitFlagsAIR::new(&proof_options); @@ -40,16 +36,15 @@ fn baseline_proof() -> ( &dyn AIR, &mut _, &_, - )> = vec![ - (&air_1, &mut trace_1, &()), - (&air_2, &mut trace_2, &()), - ]; - let proof = - multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + )> = vec![(&air_1, &mut trace_1, &()), (&air_2, &mut trace_2, &())]; + let proof = multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); (air_1, air_2, proof) } -fn verify(airs: &[&dyn AIR], proof: &MultiProof) -> bool { +fn verify( + airs: &[&dyn AIR], + proof: &MultiProof, +) -> bool { multi_verify_ram( airs, proof, @@ -58,12 +53,18 @@ fn verify(airs: &[&dyn AIR], p ) } -/// First-iota opening for the first table in the multi-proof, in the Mmcs -/// variant. Helper for tests that need a mutable handle into the per-query -/// MMCS opening fields. -fn first_mmcs_opening_mut( - proof: &mut MultiProof, -) -> &mut MainTraceOpening { +/// First chunk index whose main MMCS root is `Some` — i.e., the first +/// chunk that has at least one non-preprocessed table. Used by the +/// tampering tests to locate a real root/spec to mutate. +fn first_populated_main_chunk(proof: &MultiProof) -> usize { + proof + .main_mmcs_roots + .iter() + .position(|r| r.is_some()) + .expect("at least one chunk must have a main MMCS root in this baseline") +} + +fn first_mmcs_opening_mut(proof: &mut MultiProof) -> &mut MainTraceOpening { &mut proof.proofs[0].deep_poly_openings[0].main_trace_polys } @@ -80,7 +81,11 @@ fn tampered_main_mmcs_root_rejected() { let (air_1, air_2, mut proof) = baseline_proof(); let airs: Vec<&dyn AIR> = vec![&air_1, &air_2]; - proof.main_mmcs_root[0] ^= 1; + let chunk_idx = first_populated_main_chunk(&proof); + let root = proof.main_mmcs_roots[chunk_idx] + .as_mut() + .expect("populated"); + root[0] ^= 1; assert!( !verify(&airs, &proof), "tampered main MMCS root must be rejected" @@ -92,8 +97,8 @@ fn tampered_main_mmcs_spec_height_rejected() { let (air_1, air_2, mut proof) = baseline_proof(); let airs: Vec<&dyn AIR> = vec![&air_1, &air_2]; - let height = &mut proof.main_mmcs_spec[0].1; - *height /= 2; + let chunk_idx = first_populated_main_chunk(&proof); + proof.main_mmcs_specs[chunk_idx][0].1 /= 2; assert!( !verify(&airs, &proof), "spec height mismatch must be rejected" @@ -105,13 +110,25 @@ fn tampered_main_mmcs_spec_tag_rejected() { let (air_1, air_2, mut proof) = baseline_proof(); let airs: Vec<&dyn AIR> = vec![&air_1, &air_2]; - proof.main_mmcs_spec[0].0 = MatrixTag::new([0xFF; 8]); + let chunk_idx = first_populated_main_chunk(&proof); + proof.main_mmcs_specs[chunk_idx][0].0 = MatrixTag::new([0xFF; 8]); assert!( !verify(&airs, &proof), "spec tag mismatch must be rejected" ); } +#[test_log::test] +fn tampered_chunk_size_rejected() { + // Pinned chunk_size mismatch should produce verifier rejection (per-chunk + // Vec lengths no longer line up with the verifier's chunking). + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + proof.chunk_size = proof.chunk_size.saturating_add(1); + assert!(!verify(&airs, &proof), "tampered chunk_size must be rejected"); +} + #[test_log::test] fn tampered_mmcs_opening_leaf_rejected() { let (air_1, air_2, mut proof) = baseline_proof(); @@ -202,14 +219,13 @@ fn tampered_evaluations_rejected() { #[test_log::test] fn swapped_main_tags_at_verifier_rejected() { // The verifier reproduces `main_tags` from `synth_main_tags(num_airs)` - // inside `multi_verify_ram`. To simulate a verifier that "lies" about - // tag ordering we call `multi_verify` directly with a permuted slice. + // inside `multi_verify_ram`. Simulate a verifier that "lies" about + // tag ordering by calling `multi_verify` directly with a permuted slice. use crate::verifier::{IsStarkVerifier, Verifier}; let (air_1, air_2, proof) = baseline_proof(); let airs: Vec<&dyn AIR> = vec![&air_1, &air_2]; - // Sanity: with the correct (synth) tag order it passes. let correct = synth_main_tags(airs.len()); assert!( Verifier::multi_verify( @@ -222,8 +238,6 @@ fn swapped_main_tags_at_verifier_rejected() { "baseline must verify with correct tags" ); - // Swap the two tags — the spec sort order is now wrong relative to the - // prover's commitments, so the spec match check must reject. let mut swapped = correct.clone(); swapped.swap(0, 1); assert!( diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs index 569221ce0..95165a253 100644 --- a/crypto/stark/src/verifier.rs +++ b/crypto/stark/src/verifier.rs @@ -350,7 +350,7 @@ pub trait IsStarkVerifier< deep_poly_openings: &DeepPolynomialOpening, iota: usize, main_tag: crypto::merkle_tree::mmcs::MatrixTag, - main_mmcs_root: &Commitment, + main_mmcs_root: Option<&Commitment>, main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], aux_mmcs_root: Option<&Commitment>, aux_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], @@ -405,7 +405,7 @@ pub trait IsStarkVerifier< main_opening: &crate::proof::stark::MainTraceOpening, iota: usize, main_tag: crypto::merkle_tree::mmcs::MatrixTag, - main_mmcs_root: &Commitment, + main_mmcs_root: Option<&Commitment>, main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], ) -> bool where @@ -452,7 +452,7 @@ pub trait IsStarkVerifier< proof: &StarkProof, challenges: &Challenges, main_tag: crypto::merkle_tree::mmcs::MatrixTag, - main_mmcs_root: &Commitment, + main_mmcs_root: Option<&Commitment>, main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], aux_mmcs_root: Option<&Commitment>, aux_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], @@ -804,65 +804,106 @@ pub trait IsStarkVerifier< // cross-check `main_mmcs_spec` against the (tag, padded_height_lde) // pairs reproduced from the AIRs. - let mut expected_spec: Vec<(crypto::merkle_tree::mmcs::MatrixTag, usize)> = - Vec::with_capacity(airs.len()); - for (idx, (air, proof)) in airs.iter().zip(&multi_proof.proofs).enumerate() { - let lde_size = proof.trace_length * (air.options().blowup_factor as usize); - if air.is_preprocessed() { - // Preprocessed table: validate + absorb both its AIR-pinned - // precomputed root and its own per-table multiplicities root. - // Stays OUT of the shared MMCS spec. - let expected_precomputed = air.precomputed_commitment(); - match &proof.lde_trace_precomputed_merkle_root { - Some(actual) if *actual == expected_precomputed => {} - Some(actual) => { - error!( - "Preprocessed commitment MISMATCH for table {idx}: expected {:?}, got {:?}", - expected_precomputed, actual - ); - return false; + // Per-chunk Phase A replay: chunk tables of size `chunk_size`. For + // each table absorb its preprocessed root + per-table main root + // (preprocessed only); at the end of each chunk, validate the + // chunk's main MMCS spec and absorb the chunk's main MMCS root + // (`Some`) or skip (`None` when the chunk has no non-preprocessed + // tables). Must match `multi_prove` Phase A absorb order exactly. + let chunk_size = multi_proof.chunk_size as usize; + if chunk_size == 0 { + error!("multi_proof.chunk_size is zero"); + return false; + } + let expected_num_chunks = (airs.len() + chunk_size - 1) / chunk_size; + if multi_proof.main_mmcs_roots.len() != expected_num_chunks + || multi_proof.main_mmcs_specs.len() != expected_num_chunks + || multi_proof.aux_mmcs_roots.len() != expected_num_chunks + || multi_proof.aux_mmcs_specs.len() != expected_num_chunks + { + error!( + "per-chunk MMCS Vec lengths inconsistent with chunk_size={chunk_size}: expected {expected_num_chunks} chunks; got main_roots={}, main_specs={}, aux_roots={}, aux_specs={}", + multi_proof.main_mmcs_roots.len(), + multi_proof.main_mmcs_specs.len(), + multi_proof.aux_mmcs_roots.len(), + multi_proof.aux_mmcs_specs.len(), + ); + return false; + } + + for chunk_idx in 0..expected_num_chunks { + let chunk_start = chunk_idx * chunk_size; + let chunk_end = (chunk_start + chunk_size).min(airs.len()); + + let mut expected_spec: Vec<(crypto::merkle_tree::mmcs::MatrixTag, usize)> = + Vec::new(); + for idx in chunk_start..chunk_end { + let (air, proof) = (airs[idx], &multi_proof.proofs[idx]); + let lde_size = proof.trace_length * (air.options().blowup_factor as usize); + if air.is_preprocessed() { + let expected_precomputed = air.precomputed_commitment(); + match &proof.lde_trace_precomputed_merkle_root { + Some(actual) if *actual == expected_precomputed => {} + Some(actual) => { + error!( + "Preprocessed commitment MISMATCH for table {idx}: expected {:?}, got {:?}", + expected_precomputed, actual + ); + return false; + } + None => { + error!("Preprocessed table {idx} proof missing precomputed commitment"); + return false; + } } - None => { - error!("Preprocessed table {idx} proof missing precomputed commitment"); - return false; + transcript.append_bytes(&expected_precomputed); + match &proof.lde_trace_main_merkle_root { + Some(root) => transcript.append_bytes(root), + None => { + error!( + "Preprocessed table {idx} proof missing multiplicities Merkle root" + ); + return false; + } } - } - transcript.append_bytes(&expected_precomputed); - - match &proof.lde_trace_main_merkle_root { - Some(root) => transcript.append_bytes(root), - None => { + } else { + if proof.lde_trace_main_merkle_root.is_some() { error!( - "Preprocessed table {idx} proof missing multiplicities Merkle root" + "Non-preprocessed table {idx} unexpectedly supplied a per-table main root" ); return false; } + expected_spec.push((main_tags[idx], lde_size)); } - } else { - // Non-preprocessed table: nothing per-table; the shared MMCS - // root absorbed below covers its main columns. - if proof.lde_trace_main_merkle_root.is_some() { - error!( - "Non-preprocessed table {idx} unexpectedly supplied a per-table main root" - ); + } + + // Deterministic sort matches `MmcsBuilder::finalize` + // (height desc, tag asc) — same as the streaming builder. + expected_spec.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0))); + if expected_spec != multi_proof.main_mmcs_specs[chunk_idx] { + error!( + "chunk {chunk_idx} main_mmcs_spec mismatch: expected {:?}, got {:?}", + expected_spec, multi_proof.main_mmcs_specs[chunk_idx], + ); + return false; + } + match ( + &multi_proof.main_mmcs_roots[chunk_idx], + expected_spec.is_empty(), + ) { + (Some(root), false) => transcript.append_bytes(root), + (None, true) => {} + (Some(_), true) => { + error!("chunk {chunk_idx} main_mmcs_root present but no Shared tables"); + return false; + } + (None, false) => { + error!("chunk {chunk_idx} main_mmcs_root missing but Shared tables exist"); return false; } - expected_spec.push((main_tags[idx], lde_size)); } } - // Deterministic sort matches `MmcsBuilder::finalize` (height desc, tag asc). - expected_spec.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0))); - if expected_spec != multi_proof.main_mmcs_spec { - error!( - "main_mmcs_spec mismatch: expected {:?}, got {:?}", - expected_spec, multi_proof.main_mmcs_spec, - ); - return false; - } - - transcript.append_bytes(&multi_proof.main_mmcs_root); - // ===================================================================== // Round 1, Phase B: Sample shared LogUp challenges // ===================================================================== @@ -906,32 +947,45 @@ pub trait IsStarkVerifier< // SHARED transcript replaces the per-table aux root absorb of the // pre-MMCS protocol. Verify the spec mirrors the prover-side // filtered-by-has_aux_trace order before binding. - let mut expected_aux_spec: Vec<(crypto::merkle_tree::mmcs::MatrixTag, usize)> = - Vec::new(); - for (idx, (air, proof)) in airs.iter().zip(&multi_proof.proofs).enumerate() { - if air.has_aux_trace() { - let lde_size = proof.trace_length * (air.options().blowup_factor as usize); - expected_aux_spec.push((main_tags[idx], lde_size)); + // Per-chunk Phase C replay (aux). Mirrors Phase A: for each chunk, + // validate the aux spec + absorb the aux MMCS root (or skip when + // the chunk has no aux-bearing tables). Must match `multi_prove` + // Phase C absorb order exactly. + for chunk_idx in 0..expected_num_chunks { + let chunk_start = chunk_idx * chunk_size; + let chunk_end = (chunk_start + chunk_size).min(airs.len()); + + let mut expected_aux_spec: Vec<(crypto::merkle_tree::mmcs::MatrixTag, usize)> = + Vec::new(); + for idx in chunk_start..chunk_end { + let (air, proof) = (airs[idx], &multi_proof.proofs[idx]); + if air.has_aux_trace() { + let lde_size = proof.trace_length * (air.options().blowup_factor as usize); + expected_aux_spec.push((main_tags[idx], lde_size)); + } } - } - expected_aux_spec.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0))); - if expected_aux_spec != multi_proof.aux_mmcs_spec { - error!( - "aux_mmcs_spec mismatch: expected {:?}, got {:?}", - expected_aux_spec, multi_proof.aux_mmcs_spec, - ); - return false; - } - match (&multi_proof.aux_mmcs_root, expected_aux_spec.is_empty()) { - (Some(root), false) => transcript.append_bytes(root), - (None, true) => {} - (Some(_), true) => { - error!("aux_mmcs_root present but no AIR has an aux trace"); + expected_aux_spec.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0))); + if expected_aux_spec != multi_proof.aux_mmcs_specs[chunk_idx] { + error!( + "chunk {chunk_idx} aux_mmcs_spec mismatch: expected {:?}, got {:?}", + expected_aux_spec, multi_proof.aux_mmcs_specs[chunk_idx], + ); return false; } - (None, false) => { - error!("aux_mmcs_root missing but some AIR has an aux trace"); - return false; + match ( + &multi_proof.aux_mmcs_roots[chunk_idx], + expected_aux_spec.is_empty(), + ) { + (Some(root), false) => transcript.append_bytes(root), + (None, true) => {} + (Some(_), true) => { + error!("chunk {chunk_idx} aux_mmcs_root present but no aux tables"); + return false; + } + (None, false) => { + error!("chunk {chunk_idx} aux_mmcs_root missing but aux tables exist"); + return false; + } } } @@ -957,17 +1011,27 @@ pub trait IsStarkVerifier< table_transcript.append_field_element(&bpi.table_contribution); } - // Rounds 2-4: verify (per-table MMCS context threaded through). + // Per-chunk lookup: each table's main / aux MMCS root + spec + // come from its chunk. + let table_chunk_idx = idx / chunk_size; + let main_root_for_chunk = + multi_proof.main_mmcs_roots[table_chunk_idx].as_ref(); + let main_spec_for_chunk: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)] = + &multi_proof.main_mmcs_specs[table_chunk_idx]; + let aux_root_for_chunk = multi_proof.aux_mmcs_roots[table_chunk_idx].as_ref(); + let aux_spec_for_chunk: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)] = + &multi_proof.aux_mmcs_specs[table_chunk_idx]; + if !Self::verify_rounds_2_to_4( *air, proof, &mut table_transcript, lookup_challenges.clone(), main_tags[idx], - &multi_proof.main_mmcs_root, - &multi_proof.main_mmcs_spec, - multi_proof.aux_mmcs_root.as_ref(), - &multi_proof.aux_mmcs_spec, + main_root_for_chunk, + main_spec_for_chunk, + aux_root_for_chunk, + aux_spec_for_chunk, ) { error!( "Table {} failed verify_rounds_2_to_4 (num_constraints={}, trace_cols={})", @@ -1183,7 +1247,7 @@ pub trait IsStarkVerifier< transcript: &mut impl IsStarkTranscript, rap_challenges: Vec>, main_tag: crypto::merkle_tree::mmcs::MatrixTag, - main_mmcs_root: &Commitment, + main_mmcs_root: Option<&Commitment>, main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], aux_mmcs_root: Option<&Commitment>, aux_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], @@ -1302,7 +1366,7 @@ fn verify_main_mmcs_pair_inner( main_opening: &crate::proof::stark::MainTraceOpening, iota: usize, main_tag: crypto::merkle_tree::mmcs::MatrixTag, - main_mmcs_root: &Commitment, + main_mmcs_root: Option<&Commitment>, main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], ) -> bool where @@ -1322,6 +1386,12 @@ where MainTraceOpening::Tree(_) => return false, }; + // Shared opening requires a chunk MMCS root; if missing, reject. + let main_mmcs_root = match main_mmcs_root { + Some(r) => r, + None => return false, + }; + let table_idx = match main_mmcs_spec.iter().position(|(t, _)| *t == main_tag) { Some(i) => i, None => return false, diff --git a/prover/src/lib.rs b/prover/src/lib.rs index dc5073ac9..25146e01a 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -504,23 +504,34 @@ impl VmAirs { /// Replay the prover's Phase A (main trace commitments) to recover the shared /// LogUp challenges (z, alpha). Mirrors `multi_verify` Phase A absorb order: -/// for each table, absorb its precomputed root and (preprocessed only) its -/// per-table multiplicities Merkle root; then absorb the shared main-trace -/// MMCS root once at the end. +/// for each chunk of `chunk_size` tables, in order, absorb each table's +/// preprocessed + per-table multiplicities root (preprocessed only); then, +/// after each chunk, absorb that chunk's main MMCS root (`Some`) or skip +/// (`None`, when the chunk has no non-preprocessed tables). pub(crate) fn replay_transcript_phase_a( airs: &[&dyn AIR], multi_proof: &MultiProof, transcript: &mut DefaultTranscript, ) -> (FieldElement, FieldElement) { - for (air, proof) in airs.iter().zip(&multi_proof.proofs) { - if air.is_preprocessed() { - transcript.append_bytes(&air.precomputed_commitment()); - if let Some(root) = &proof.lde_trace_main_merkle_root { - transcript.append_bytes(root); + let chunk_size = multi_proof.chunk_size as usize; + let num_chunks = multi_proof.main_mmcs_roots.len(); + for chunk_idx in 0..num_chunks { + let chunk_start = chunk_idx * chunk_size; + let chunk_end = (chunk_start + chunk_size).min(airs.len()); + for idx in chunk_start..chunk_end { + let air = airs[idx]; + let proof = &multi_proof.proofs[idx]; + if air.is_preprocessed() { + transcript.append_bytes(&air.precomputed_commitment()); + if let Some(root) = &proof.lde_trace_main_merkle_root { + transcript.append_bytes(root); + } } } + if let Some(root) = &multi_proof.main_mmcs_roots[chunk_idx] { + transcript.append_bytes(root); + } } - transcript.append_bytes(&multi_proof.main_mmcs_root); let z: FieldElement = transcript.sample_field_element(); let alpha: FieldElement = transcript.sample_field_element(); (z, alpha) From c22fca9e881bb428779da1d4f32c9b5e358a786e Mon Sep 17 00:00:00 2001 From: diegokingston Date: Thu, 28 May 2026 11:58:01 -0300 Subject: [PATCH 19/21] =?UTF-8?q?feat(stark/mmcs):=20composition=20MMCS=20?= =?UTF-8?q?=E2=80=94=20per-chunk=20over=20composition=20polys?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Third MMCS after main + aux. Each chunk's tables share a composition MMCS built between R2 LDE build and R3 z-sample. Drops per-table composition_poly_root from StarkProof entirely; the chunk root is absorbed into each chunk-mate's fork once per chunk. # Why structurally different from main/aux Composition polys are built INSIDE the per-table R2-R4 loop after each fork samples its own beta. Per-chunk MMCS therefore needs a join point mid-rounds: R2a (per-table, parallel): sample beta + build composition LDE + tagged row-pair leaves [chunk join] build chunk composition MMCS via StreamingMmcsBuilder; absorb root into each chunk-mate's fork R2b → R3 → R4 (per-table): sample z, run OOD + FRI + opens The old `prove_rounds_2_to_4` is now `prove_round_2a` (R2 LDE build) + `prove_rounds_2b_to_4` (everything after the chunk join). Verifier mirrors: `replay_rounds_after_round_1` takes an Option<&Commitment> chunk-comp root and absorbs it between beta and z (replacing the old per-table `composition_poly_root` absorb). # Leaf hash + helpers Composition leaves hash a row PAIR (br_0 || br_1) rather than a single row, so domain separation needs its own helpers: - `LEAF_DOMAIN_TAG_COMPOSITION = "LAMBDAVM_COMP_MMCS_LEAF_V1"`. - `hash_tagged_row_pair_bytes_composition` (bytes flavour). - `hash_tagged_row_pair_composition` (elements flavour). - `compute_tagged_leaves_row_pair_bit_reversed_composition` (prover-side bulk-leaf computation). - `rehash_comp_chip_leaf` (open-time on-demand leaf for chunk-mates). # Proof format `StarkProof`: - drops `composition_poly_root: Commitment` (now at MultiProof / chunk level). `DeepPolynomialOpening`: - `composition_poly: PolynomialOpenings` → `CompositionTraceOpening` (new enum, single `Mmcs` variant with `evaluations`, `evaluations_sym`, and ONE `mmcs_opening` — the row-pair leaf covers both rows). `MultiProof`: - `comp_mmcs_roots: Vec>` (parallel to main/aux, always Some in practice since every table has a composition poly). - `comp_mmcs_specs: Vec>` — chunk specs in MMCS sort order. Padded height = lde_size / 2 (row-pair count). # Architecture - `ChunkCompMmcsContext` — sister of main/aux chunk contexts. - `CompCommit::Shared { chunk_ctx, chunk_idx, tag, padded_height }`. - `Round2` now holds `Arc>>` LDE + `CompCommit`. No per-table Merkle tree. - `R2aResult` — handoff carrier between R2a and the chunk join. - `build_chunk_comp_mmcs` — mirror of `build_chunk_main_mmcs` / `build_chunk_aux_mmcs`; uses `StreamingMmcsBuilder`. # Open + verifier - `open_composition_poly` produces `CompositionTraceOpening::Mmcs` via `chunk_ctx.mmcs.open_with_leaves` + the rehash closure. - `verify_comp_mmcs_pair_inner` rehashes the row-pair leaf, checks `matrix_leaves[table_idx]`, authenticates against the chunk root. - `step_4_verify_trace_and_composition_openings` threads `comp_mmcs_root` + `comp_mmcs_spec` through to the per-query check. - `multi_verify` adds per-chunk comp-spec validation (every chunk has Some root; spec sorted height-desc, tag-asc). # Tests - 3 new composition soundness tests: tampered root / spec height / missing root all rejected. - 2 new mmcs_leaf tests: composition domain separates from main+aux; bytes-flavour matches element-flavour. - `cli proof-size` breakdown gains `comp_mmcs_roots (per-chunk)` and `comp_mmcs_specs (per-chunk)` sections. Results: 157/157 stark tests green (152 prior + 3 new comp soundness + 2 new mmcs_leaf comp tests). cli 3/3, lambda-vm-prover bitwise (preprocessed) tests pass, 77 prove_elfs failures are the pre-existing UnknownSyscall(5) executor bug present on main. # Performance note R2a and R2b-R4 within a chunk are sequential rather than per-table parallel — `chunk_size = table_parallelism()` is small (typically 8), and the dominant cost in R2 (constraint eval, FFT) already has internal rayon parallelism. Keeping the chunk loop sequential avoids `par_iter_mut()` on transcript slices that breaks with downstream generic `IsStarkTranscript` bounds. --- bin/cli/src/main.rs | 6 + crypto/stark/src/mmcs_leaf.rs | 90 +++ crypto/stark/src/proof/stark.rs | 48 +- crypto/stark/src/prover.rs | 560 ++++++++++++++---- .../stark/src/tests/mmcs_soundness_tests.rs | 52 ++ crypto/stark/src/verifier.rs | 183 +++++- 6 files changed, 800 insertions(+), 139 deletions(-) diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index dd65466b4..86a6dbddf 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -694,6 +694,8 @@ fn cmd_proof_size( let main_mmcs_specs_bytes = ser_len(&vm_proof.proof.main_mmcs_specs); let aux_mmcs_roots_bytes = ser_len(&vm_proof.proof.aux_mmcs_roots); let aux_mmcs_specs_bytes = ser_len(&vm_proof.proof.aux_mmcs_specs); + let comp_mmcs_roots_bytes = ser_len(&vm_proof.proof.comp_mmcs_roots); + let comp_mmcs_specs_bytes = ser_len(&vm_proof.proof.comp_mmcs_specs); let chunk_size_bytes = ser_len(&vm_proof.proof.chunk_size); // Sum per-section across every sub-proof so a single number captures the @@ -735,6 +737,8 @@ fn cmd_proof_size( + main_mmcs_specs_bytes + aux_mmcs_roots_bytes + aux_mmcs_specs_bytes + + comp_mmcs_roots_bytes + + comp_mmcs_specs_bytes + chunk_size_bytes + s_main_trace_openings + s_precomputed_trace_openings @@ -754,6 +758,8 @@ fn cmd_proof_size( ProofSizeEntry { section: "main_mmcs_specs (per-chunk)".into(), bytes: main_mmcs_specs_bytes }, ProofSizeEntry { section: "aux_mmcs_roots (per-chunk)".into(), bytes: aux_mmcs_roots_bytes }, ProofSizeEntry { section: "aux_mmcs_specs (per-chunk)".into(), bytes: aux_mmcs_specs_bytes }, + ProofSizeEntry { section: "comp_mmcs_roots (per-chunk)".into(), bytes: comp_mmcs_roots_bytes }, + ProofSizeEntry { section: "comp_mmcs_specs (per-chunk)".into(), bytes: comp_mmcs_specs_bytes }, ProofSizeEntry { section: "chunk_size".into(), bytes: chunk_size_bytes }, ProofSizeEntry { section: "per_table_main_merkle_root (preprocessed)".into(), bytes: s_per_table_main_root }, ProofSizeEntry { section: "per_table_precomputed_merkle_root".into(), bytes: s_precomputed_root }, diff --git a/crypto/stark/src/mmcs_leaf.rs b/crypto/stark/src/mmcs_leaf.rs index 447f9650f..6a995fa2c 100644 --- a/crypto/stark/src/mmcs_leaf.rs +++ b/crypto/stark/src/mmcs_leaf.rs @@ -43,6 +43,13 @@ pub const LEAF_DOMAIN_TAG_MAIN: &[u8] = LEAF_DOMAIN_TAG; /// other. pub const LEAF_DOMAIN_TAG_AUX: &[u8] = b"LAMBDAVM_AUX_MMCS_LEAF_V1"; +/// Versioned domain separator for COMPOSITION-trace MMCS leaves. +/// Composition leaves hash a PAIR of rows (br_0 || br_1) instead of a +/// single row — the legacy `keccak_leaves_row_pair_bit_reversed` shape. +/// Distinct from main/aux so no composition opening can authenticate a +/// main or aux leaf. +pub const LEAF_DOMAIN_TAG_COMPOSITION: &[u8] = b"LAMBDAVM_COMP_MMCS_LEAF_V1"; + /// Synthesize `n` distinct [`MatrixTag`]s derived from positional index. /// Useful for generic stark tests where the caller does not own a stable /// chip-type assignment. Production code in lambda-vm uses @@ -75,6 +82,18 @@ pub fn hash_tagged_row_bytes_aux(tag: MatrixTag, row_bytes_be: &[u8]) -> Commitm hash_with_domain(LEAF_DOMAIN_TAG_AUX, tag, row_bytes_be) } +/// Hash a COMPOSITION-trace MMCS leaf from a pre-concatenated `(br_0 || +/// br_1)` byte buffer — i.e. the two row-pair rows written big-endian, +/// `part_0_row_0 || part_1_row_0 || ... || part_0_row_1 || part_1_row_1 +/// || ...`. Uses [`LEAF_DOMAIN_TAG_COMPOSITION`]. +#[inline] +pub fn hash_tagged_row_pair_bytes_composition( + tag: MatrixTag, + row_pair_bytes_be: &[u8], +) -> Commitment { + hash_with_domain(LEAF_DOMAIN_TAG_COMPOSITION, tag, row_pair_bytes_be) +} + #[inline] fn hash_with_domain(domain: &[u8], tag: MatrixTag, row_bytes_be: &[u8]) -> Commitment { let mut h = Keccak256::new(); @@ -105,6 +124,34 @@ where hash_tagged_row_inner::(LEAF_DOMAIN_TAG_AUX, tag, row) } +/// Convenience: hash a COMPOSITION-trace row-pair from two slices of +/// field elements (the parts evaluated at `br_0` and `br_1`), each +/// `num_parts` long. +pub fn hash_tagged_row_pair_composition( + tag: MatrixTag, + parts_at_br_0: &[FieldElement], + parts_at_br_1: &[FieldElement], +) -> Commitment +where + E: IsField, + FieldElement: ByteConversion, +{ + debug_assert_eq!(parts_at_br_0.len(), parts_at_br_1.len()); + let byte_len = as ByteConversion>::BYTE_LEN; + let num_parts = parts_at_br_0.len(); + let mut buf = vec![0u8; 2 * num_parts * byte_len]; + let mut offset = 0; + for fe in parts_at_br_0 { + fe.write_bytes_be(&mut buf[offset..offset + byte_len]); + offset += byte_len; + } + for fe in parts_at_br_1 { + fe.write_bytes_be(&mut buf[offset..offset + byte_len]); + offset += byte_len; + } + hash_tagged_row_pair_bytes_composition(tag, &buf) +} + #[inline] fn hash_tagged_row_inner( domain: &[u8], @@ -158,6 +205,49 @@ mod tests { assert_ne!(main_digest, aux_digest); } + #[test] + fn composition_domain_separates_from_main_and_aux() { + // Same row-pair under composition MUST differ from main + aux + // domains so a composition opening can't authenticate a main or + // aux leaf. + let tag = MatrixTag::new([0xCC; 8]); + let row0 = vec![FE::from(1u64), FE::from(2u64)]; + let row1 = vec![FE::from(3u64), FE::from(4u64)]; + let comp_digest = hash_tagged_row_pair_composition(tag, &row0, &row1); + + // Build the equivalent flat byte buffer manually and run it + // through the main + aux single-domain helpers. + let byte_len = ::BYTE_LEN; + let mut flat = vec![0u8; (row0.len() + row1.len()) * byte_len]; + let mut offset = 0; + for fe in row0.iter().chain(row1.iter()) { + fe.write_bytes_be(&mut flat[offset..offset + byte_len]); + offset += byte_len; + } + let main_digest = hash_tagged_row_bytes(tag, &flat); + let aux_digest = hash_tagged_row_bytes_aux(tag, &flat); + assert_ne!(comp_digest, main_digest); + assert_ne!(comp_digest, aux_digest); + } + + #[test] + fn composition_bytes_helper_matches_composition_element_helper() { + let tag = MatrixTag::new([5; 8]); + let row0 = vec![FE::from(10u64), FE::from(20u64)]; + let row1 = vec![FE::from(30u64), FE::from(40u64)]; + let from_elements = hash_tagged_row_pair_composition(tag, &row0, &row1); + + let byte_len = ::BYTE_LEN; + let mut flat = vec![0u8; 2 * row0.len() * byte_len]; + let mut offset = 0; + for fe in row0.iter().chain(row1.iter()) { + fe.write_bytes_be(&mut flat[offset..offset + byte_len]); + offset += byte_len; + } + let from_bytes = hash_tagged_row_pair_bytes_composition(tag, &flat); + assert_eq!(from_elements, from_bytes); + } + #[test] fn aux_bytes_helper_matches_aux_element_helper() { // The bytes-flavoured helper and the element-flavoured helper must diff --git a/crypto/stark/src/proof/stark.rs b/crypto/stark/src/proof/stark.rs index cc69f7bf0..32ac76184 100644 --- a/crypto/stark/src/proof/stark.rs +++ b/crypto/stark/src/proof/stark.rs @@ -88,10 +88,45 @@ impl AuxTraceOpening { } } +/// Per-query composition-trace opening. Sister of [`MainTraceOpening`] +/// and [`AuxTraceOpening`] for the composition polynomial parts. Always +/// `Mmcs`: every table has a composition polynomial, and the chunk-scoped +/// composition MMCS commits to all of them. +/// +/// Composition leaves are hashed in row-PAIR form (`br_0` + `br_1`). +/// A single MMCS opening covers both rows since they share the same +/// leaf in the underlying tree. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[serde(bound = "")] +pub enum CompositionTraceOpening { + Mmcs { + /// Parts at `br_0`. + evaluations: Vec>, + /// Parts at `br_1` (sym row). + evaluations_sym: Vec>, + /// Single MMCS opening for the row-pair leaf. + mmcs_opening: MmcsOpening, + }, +} + +impl CompositionTraceOpening { + pub fn evaluations(&self) -> &[FieldElement] { + match self { + Self::Mmcs { evaluations, .. } => evaluations, + } + } + + pub fn evaluations_sym(&self) -> &[FieldElement] { + match self { + Self::Mmcs { evaluations_sym, .. } => evaluations_sym, + } + } +} + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] #[serde(bound = "")] pub struct DeepPolynomialOpening, E: IsField> { - pub composition_poly: PolynomialOpenings, + pub composition_poly: CompositionTraceOpening, pub main_trace_polys: MainTraceOpening, /// For preprocessed tables: openings for precomputed columns. /// These are verified against the hardcoded precomputed commitment. @@ -118,8 +153,6 @@ pub struct StarkProof, E: IsField, PI> { pub lde_trace_precomputed_merkle_root: Option, // tⱼ(zgᵏ) pub trace_ood_evaluations: Table, - // Commitments to Hᵢ - pub composition_poly_root: Commitment, // Hᵢ(z^N) pub composition_poly_parts_ood_evaluation: Vec>, // [pₖ] @@ -184,6 +217,15 @@ pub struct MultiProof, E: IsField, PI> { /// Per-chunk aux MMCS specs. Empty inner Vec when the corresponding /// `aux_mmcs_roots[i]` is `None`. pub aux_mmcs_specs: Vec>, + /// Per-chunk composition MMCS roots. Always `Some` (every table has a + /// composition polynomial), but stored as `Option` for shape parity + /// with main/aux. Parallel to `main_mmcs_roots`. + pub comp_mmcs_roots: Vec>, + /// Per-chunk composition MMCS specs. Each non-empty Vec lists + /// `(MatrixTag, padded_height)` for the chunk-mate composition + /// polynomials in MMCS sort order. `padded_height` is the row-pair + /// count = `lde_size / 2`. + pub comp_mmcs_specs: Vec>, /// Pinned chunk size. Equals the prover's `table_parallelism()` at /// proving time. The verifier uses this to chunk the AIR slice into /// the same per-chunk grouping the prover used. diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index bf9a5f03e..72f2a822f 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -27,7 +27,9 @@ use rayon::prelude::{ use crate::debug::validate_trace; use crate::fri; use crate::lookup::LOGUP_NUM_CHALLENGES; -use crate::proof::stark::{DeepPolynomialOpenings, MainTraceOpening, PolynomialOpenings}; +use crate::proof::stark::{ + CompositionTraceOpening, DeepPolynomialOpenings, MainTraceOpening, PolynomialOpenings, +}; #[cfg(feature = "disk-spill")] use crate::storage_mode::StorageMode; use crate::table::Table; @@ -433,17 +435,74 @@ pub fn table_parallelism() -> usize { } /// A container for the results of the second round of the STARK Prove protocol. +/// Per-chunk composition MMCS context. +pub(crate) struct ChunkCompMmcsContext +where + FieldElement: AsBytes, +{ + pub(crate) mmcs: Arc>>, + /// Arc-cloned composition LDE columns for chunk-mates, in MMCS spec + /// sort order. Used by the per-query open path to rehash composition + /// row-pair leaves on demand. + pub(crate) lde_columns_in_spec_order: Vec>>>>, +} + +/// Per-table composition-trace commitment under the chunk's composition MMCS. +pub(crate) enum CompCommit +where + FieldElement: AsBytes, +{ + Shared { + chunk_ctx: Arc>, + chunk_idx: usize, + tag: MatrixTag, + /// Padded height = lde_size / 2 (row-pair leaves). + padded_height: usize, + }, +} + +impl CompCommit +where + FieldElement: AsBytes, +{ + fn share(&self) -> Self { + match self { + Self::Shared { + chunk_ctx, + chunk_idx, + tag, + padded_height, + } => Self::Shared { + chunk_ctx: Arc::clone(chunk_ctx), + chunk_idx: *chunk_idx, + tag: *tag, + padded_height: *padded_height, + }, + } + } +} + +/// Per-table Round 2 partial — produced by `round_2a_build_composition_lde` +/// before the chunk composition MMCS is built. +pub(crate) struct R2aResult +where + FieldElement: AsBytes, +{ + pub(crate) lde_composition_poly_evaluations: Arc>>>, + pub(crate) composition_leaves: Vec, + pub(crate) padded_height: usize, +} + pub(crate) struct Round2 where F: IsField, FieldElement: AsBytes, { - /// Evaluations of the composition polynomial parts over the LDE domain. - pub(crate) lde_composition_poly_evaluations: Vec>>, - /// The Merkle tree built to compute the commitment to the composition polynomial parts. - pub(crate) composition_poly_merkle_tree: BatchedMerkleTree, - /// The commitment to the composition polynomial parts. - pub(crate) composition_poly_root: Commitment, + /// Evaluations of the composition polynomial parts over the LDE + /// domain (Arc-shared with the chunk composition MMCS context). + pub(crate) lde_composition_poly_evaluations: Arc>>>, + /// This table's slot inside the chunk's composition MMCS. + pub(crate) comp: CompCommit, } /// A container for the results of the third round of the STARK Prove protocol. @@ -761,6 +820,137 @@ where Ok((Some(root), spec, Some(ctx))) } +/// Tagged per-row-PAIR leaf digest for the COMPOSITION-trace MMCS. +pub fn compute_tagged_leaves_row_pair_bit_reversed_composition( + parts: &[Vec>], + tag: MatrixTag, +) -> Vec +where + E: IsField, + FieldElement: AsBytes + Sync + Send + ByteConversion, +{ + let num_parts = parts.len(); + if num_parts == 0 { + return Vec::new(); + } + let num_rows = parts[0].len(); + if num_rows == 0 { + return Vec::new(); + } + let num_leaves = num_rows / 2; + debug_assert!(num_rows.is_power_of_two()); + let byte_len = as ByteConversion>::BYTE_LEN; + let total_bytes = 2 * num_parts * byte_len; + let hash_leaf_pair = |buf: &mut [u8], leaf_idx: usize| -> Commitment { + let br_0 = reverse_index(2 * leaf_idx, num_rows as u64); + let br_1 = reverse_index(2 * leaf_idx + 1, num_rows as u64); + let mut offset = 0; + for part in parts.iter() { + part[br_0].write_bytes_be(&mut buf[offset..offset + byte_len]); + offset += byte_len; + } + for part in parts.iter() { + part[br_1].write_bytes_be(&mut buf[offset..offset + byte_len]); + offset += byte_len; + } + crate::mmcs_leaf::hash_tagged_row_pair_bytes_composition(tag, buf) + }; + #[cfg(feature = "parallel")] + { + (0..num_leaves) + .into_par_iter() + .map_init(|| vec![0u8; total_bytes], |buf, i| hash_leaf_pair(buf, i)) + .collect() + } + #[cfg(not(feature = "parallel"))] + { + let mut buf = vec![0u8; total_bytes]; + (0..num_leaves).map(|i| hash_leaf_pair(&mut buf, i)).collect() + } +} + +/// Build a CHUNK-scoped composition MMCS via StreamingMmcsBuilder. +#[allow(clippy::type_complexity)] +fn build_chunk_comp_mmcs( + comp_outputs: Vec<(MatrixTag, Vec, usize)>, + chunk_comp_lde: Vec<(MatrixTag, Arc>>>)>, +) -> Result< + ( + Option, + Vec<(MatrixTag, usize)>, + Option>>, + ), + ProvingError, +> +where + E: IsField + Send + Sync, + FieldElement: AsBytes + Send + Sync, +{ + if comp_outputs.is_empty() { + return Ok((None, Vec::new(), None)); + } + debug_assert_eq!(comp_outputs.len(), chunk_comp_lde.len()); + let mut comp_outputs = comp_outputs; + comp_outputs.sort_by(|a, b| b.2.cmp(&a.2).then(a.0.cmp(&b.0))); + let lde_by_tag: std::collections::BTreeMap>>>> = + chunk_comp_lde.into_iter().collect(); + let mut builder: StreamingMmcsBuilder> = + StreamingMmcsBuilder::new(); + let mut lde_columns_in_spec_order: Vec>>>> = + Vec::with_capacity(comp_outputs.len()); + for (tag, leaves, _padded_height) in comp_outputs { + let lde = lde_by_tag + .get(&tag) + .ok_or_else(|| { + ProvingError::WrongParameter(format!( + "missing chunk composition LDE for tag {tag:?}" + )) + })? + .clone(); + lde_columns_in_spec_order.push(lde); + builder.add_matrix(tag, leaves).map_err(map_mmcs_err)?; + } + let mmcs = builder.finalize().map_err(map_mmcs_err)?; + let root = *mmcs.root(); + let spec = mmcs.spec(); + let ctx = Arc::new(ChunkCompMmcsContext { + mmcs: Arc::new(mmcs), + lde_columns_in_spec_order, + }); + Ok((Some(root), spec, Some(ctx))) +} + +/// Rehash a composition-trace row-PAIR leaf for the open path. +pub fn rehash_comp_chip_leaf( + tag: MatrixTag, + parts: &Arc>>>, + local_idx: usize, +) -> Commitment +where + E: IsField, + FieldElement: AsBytes + ByteConversion, +{ + let num_rows = parts + .first() + .map(|c| c.len()) + .expect("composition LDE columns non-empty by construction"); + let num_parts = parts.len(); + let byte_len = as ByteConversion>::BYTE_LEN; + let br_0 = reverse_index(2 * local_idx, num_rows as u64); + let br_1 = reverse_index(2 * local_idx + 1, num_rows as u64); + let mut buf = vec![0u8; 2 * num_parts * byte_len]; + let mut offset = 0; + for part in parts.iter() { + part[br_0].write_bytes_be(&mut buf[offset..offset + byte_len]); + offset += byte_len; + } + for part in parts.iter() { + part[br_1].write_bytes_be(&mut buf[offset..offset + byte_len]); + offset += byte_len; + } + crate::mmcs_leaf::hash_tagged_row_pair_bytes_composition(tag, &buf) +} + /// Tagged per-row leaf digest for the main-trace MMCS. pub fn compute_tagged_leaves_bit_reversed( columns: &[Vec>], @@ -1282,15 +1472,20 @@ pub trait IsStarkProver< .expect("LDE evaluation should succeed") } - /// Returns the result of the second round of the STARK Prove protocol. - fn round_2_compute_composition_polynomial( + /// Round 2 phase A: build the composition LDE parts + tagged leaves + /// for the chunk MMCS, WITHOUT committing yet. The chunk MMCS is + /// built externally once every chunk-mate has returned their + /// [`R2aResult`]; only then does the resulting chunk root get + /// absorbed back into each fork and R3 sampling proceeds. + fn round_2a_build_composition_lde( air: &dyn AIR, pub_inputs: &PI, domain: &Domain, round_1_result: &Round1, transition_coefficients: &[FieldElement], boundary_coefficients: &[FieldElement], - ) -> Result, ProvingError> + tag: MatrixTag, + ) -> Result, ProvingError> where FieldElement: AsBytes, FieldElement: AsBytes, @@ -1355,21 +1550,24 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let t_sub = Instant::now(); - let Some((composition_poly_merkle_tree, composition_poly_root)) = - Self::commit_composition_polynomial(&lde_composition_poly_parts_evaluations) - else { + let composition_leaves = + compute_tagged_leaves_row_pair_bit_reversed_composition::( + &lde_composition_poly_parts_evaluations, + tag, + ); + if composition_leaves.is_empty() { return Err(ProvingError::EmptyCommitment); - }; + } + let padded_height = composition_leaves.len(); #[cfg(feature = "instruments")] let merkle_dur = t_sub.elapsed(); - #[cfg(feature = "instruments")] crate::instruments::store_r2_sub(constraints_dur, fft_dur, merkle_dur); - Ok(Round2 { - lde_composition_poly_evaluations: lde_composition_poly_parts_evaluations, - composition_poly_merkle_tree, - composition_poly_root, + Ok(R2aResult { + lde_composition_poly_evaluations: Arc::new(lde_composition_poly_parts_evaluations), + composition_leaves, + padded_height, }) } @@ -1709,22 +1907,41 @@ pub trait IsStarkProver< .collect() } - /// Computes values and validity proofs of the evaluations of the composition polynomial parts - /// at the domain value corresponding to the FRI query challenge `index` and its symmetric - /// element. + /// Compute the composition-poly opening for one query against the + /// chunk composition MMCS. The opening's `mmcs_opening` carries + /// matrix_leaves for every chunk-mate's composition matrix; the + /// closure rehashes those row-pair leaves on demand from the + /// chunk-shared LDE columns. fn open_composition_poly( - composition_poly_merkle_tree: &BatchedMerkleTree, + comp: &CompCommit, lde_composition_poly_evaluations: &[Vec>], index: usize, - ) -> PolynomialOpenings + ) -> CompositionTraceOpening where FieldElement: AsBytes + Sync + Send, - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + ByteConversion, { - let proof = composition_poly_merkle_tree - .get_proof_by_pos(index) - .unwrap(); + let CompCommit::Shared { chunk_ctx, .. } = comp; + let mmcs = &chunk_ctx.mmcs; + let lde_in_spec_order = &chunk_ctx.lde_columns_in_spec_order; + + // Composition row-pair leaves are indexed by row-pair, so the + // opening's global_index equals the query index directly (no + // shift). Per-table local index = global_index >> shift, which + // is 0 when all chunk-mates share the max height. + let local_idx = index; + let mmcs_opening = mmcs + .open_with_leaves(local_idx, |m_idx, local_idx_in_matrix| { + rehash_comp_chip_leaf::( + mmcs.spec()[m_idx].0, + &lde_in_spec_order[m_idx], + local_idx_in_matrix, + ) + }) + .expect("composition MMCS open_with_leaves: index in range"); + // Build the (evaluations, evaluations_sym) field arrays from this + // table's composition LDE — same layout as the legacy opening. let lde_composition_poly_parts_evaluation: Vec<_> = lde_composition_poly_evaluations .iter() .flat_map(|part| { @@ -1734,20 +1951,21 @@ pub trait IsStarkProver< ] }) .collect(); + let evaluations = lde_composition_poly_parts_evaluation + .clone() + .into_iter() + .step_by(2) + .collect(); + let evaluations_sym = lde_composition_poly_parts_evaluation + .into_iter() + .skip(1) + .step_by(2) + .collect(); - PolynomialOpenings { - proof: proof.clone(), - proof_sym: proof, - evaluations: lde_composition_poly_parts_evaluation - .clone() - .into_iter() - .step_by(2) - .collect(), - evaluations_sym: lde_composition_poly_parts_evaluation - .into_iter() - .skip(1) - .step_by(2) - .collect(), + CompositionTraceOpening::Mmcs { + evaluations, + evaluations_sym, + mmcs_opening, } } @@ -1796,7 +2014,7 @@ pub trait IsStarkProver< for index in indexes_to_open.iter() { let composition_openings = Self::open_composition_poly( - &round_2_result.composition_poly_merkle_tree, + &round_2_result.comp, &round_2_result.lde_composition_poly_evaluations, *index, ); @@ -2467,32 +2685,148 @@ pub trait IsStarkProver< crate::instruments::TableSubOps, )> = Vec::with_capacity(num_airs); - let mut proofs = Vec::with_capacity(num_airs); + let mut proofs: Vec>> = + (0..num_airs).map(|_| None).collect(); + let mut comp_mmcs_roots_per_chunk: Vec> = Vec::new(); + let mut comp_mmcs_specs_per_chunk: Vec> = Vec::new(); let mut lde_drain = cached_ldes.into_iter(); for chunk_start in (0..num_airs).step_by(k) { let chunk_end = (chunk_start + k).min(num_airs); let chunk_size = chunk_end - chunk_start; + let chunk_idx = comp_mmcs_roots_per_chunk.len(); let chunk_ldes: Vec> = lde_drain.by_ref().take(chunk_size).collect(); let chunk_commitments = &commitments[chunk_start..chunk_end]; - let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; - - #[cfg(feature = "parallel")] - let iter = chunk_ldes - .into_par_iter() - .zip(chunk_commitments.par_iter()) - .zip(chunk_transcripts.par_iter_mut()) - .enumerate(); - #[cfg(not(feature = "parallel"))] - let iter = chunk_ldes + // Build Round1 per-table sequentially (build_round1 only bumps + // Arc refcounts), then run R2a in parallel. + let chunk_round1: Vec> = chunk_ldes .into_iter() .zip(chunk_commitments.iter()) + .enumerate() + .map(|(j, (lde, commitment))| { + let idx = chunk_start + j; + let (air, _, _) = &air_trace_pairs[idx]; + let domain = &domains[idx]; + commitment.build_round1(lde, air.step_size(), domain.blowup_factor) + }) + .collect(); + + // Bind per-table table_contribution into forks before sampling beta. + for (j, round_1_result) in chunk_round1.iter().enumerate() { + let idx = chunk_start + j; + if let Some(ref bpi) = round_1_result.bus_public_inputs { + table_transcripts[idx].append_field_element(&bpi.table_contribution); + } + } + + // Phase R2a (sequential within chunk): sample beta + build + // composition LDE + tagged leaves per table. Internal + // parallelism inside constraint eval / FFT keeps cores busy. + // K is small (chunk size = table_parallelism()), so per-table + // serialization here costs little. + let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; + let r2a_iter = chunk_round1 + .iter() .zip(chunk_transcripts.iter_mut()) .enumerate(); - let chunk_results: Vec> = iter - .map(|(j, ((lde, commitment), table_transcript))| { + #[allow(clippy::type_complexity)] + let r2a_results: Vec>, + Vec>, + R2aResult, + ), + ProvingError, + >> = r2a_iter + .map(|(j, (round_1_result, table_transcript))| { + let idx = chunk_start + j; + let (air, _, pub_inputs) = &air_trace_pairs[idx]; + let domain = &domains[idx]; + let tag = main_tags[idx]; + let (tc, bc, r2a) = Self::prove_round_2a( + *air, + *pub_inputs, + round_1_result, + table_transcript, + domain, + tag, + )?; + Ok((j, tc, bc, r2a)) + }) + .collect(); + + // Sequential: collect R2a outputs in chunk-local-index order; + // build chunk composition MMCS over them. + let mut chunk_r2a: Vec>, + Vec>, + R2aResult, + )>> = (0..chunk_size).map(|_| None).collect(); + for r in r2a_results { + let (j, tc, bc, r2a) = r?; + chunk_r2a[j] = Some((tc, bc, r2a)); + } + + let mut chunk_comp_outputs: Vec<(MatrixTag, Vec, usize)> = Vec::new(); + let mut chunk_comp_ldes: Vec<(MatrixTag, Arc>>>)> = + Vec::new(); + for (j, entry) in chunk_r2a.iter().enumerate() { + let idx = chunk_start + j; + let tag = main_tags[idx]; + let (_, _, r2a) = entry.as_ref().expect("R2a populated"); + chunk_comp_outputs.push((tag, r2a.composition_leaves.clone(), r2a.padded_height)); + chunk_comp_ldes.push((tag, Arc::clone(&r2a.lde_composition_poly_evaluations))); + } + + let (chunk_comp_root, chunk_comp_spec, chunk_comp_ctx_opt) = + build_chunk_comp_mmcs::(chunk_comp_outputs, chunk_comp_ldes)?; + // Absorb chunk composition root into EACH chunk-mate's fork. + if let Some(ref root) = chunk_comp_root { + for idx in chunk_start..chunk_end { + table_transcripts[idx].append_bytes(root); + } + } + comp_mmcs_roots_per_chunk.push(chunk_comp_root); + comp_mmcs_specs_per_chunk.push(chunk_comp_spec.clone()); + + let chunk_comp_ctx = chunk_comp_ctx_opt + .expect("chunk has at least one composition matrix (every table has comp)"); + let height_by_tag: std::collections::BTreeMap = + chunk_comp_spec.iter().copied().collect(); + + // Reassemble per-table Round2 from R2a + chunk MMCS context. + let mut chunk_round2: Vec> = Vec::with_capacity(chunk_size); + for j in 0..chunk_size { + let idx = chunk_start + j; + let tag = main_tags[idx]; + let (_, _, r2a) = chunk_r2a[j].take().unwrap(); + let padded_height = *height_by_tag.get(&tag).expect("spec contains tag"); + chunk_round2.push(Round2 { + lde_composition_poly_evaluations: r2a.lde_composition_poly_evaluations, + comp: CompCommit::Shared { + chunk_ctx: Arc::clone(&chunk_comp_ctx), + chunk_idx, + tag, + padded_height, + }, + }); + } + + // Phase R2b → R4 (sequential within chunk): each fork has + // the chunk comp root absorbed; sample z, run R3 OOD + R4 + // FRI. Same rationale as R2a above. + let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; + let r2b_iter = chunk_round1 + .iter() + .zip(chunk_round2.iter()) + .zip(chunk_transcripts.iter_mut()) + .enumerate(); + + let chunk_results: Vec> = r2b_iter + .map(|(j, ((round_1_result, round_2_result), table_transcript))| { let idx = chunk_start + j; let (air, trace, pub_inputs) = &air_trace_pairs[idx]; let _ = trace; // used by instruments @@ -2501,18 +2835,11 @@ pub trait IsStarkProver< #[cfg(feature = "instruments")] let table_start = Instant::now(); - // Build Round1 from cached LDE (consumed by value, no recomputation). - let round_1_result = - commitment.build_round1(lde, air.step_size(), domain.blowup_factor); - - if let Some(ref bpi) = round_1_result.bus_public_inputs { - table_transcript.append_field_element(&bpi.table_contribution); - } - - let proof = Self::prove_rounds_2_to_4( + let proof = Self::prove_rounds_2b_to_4( *air, *pub_inputs, - &round_1_result, + round_1_result, + round_2_result, table_transcript, domain, )?; @@ -2529,24 +2856,34 @@ pub trait IsStarkProver< }; #[cfg(feature = "instruments")] - return Ok((proof, table_timing)); + return Ok((j, proof, table_timing)); #[cfg(not(feature = "instruments"))] - Ok(proof) + Ok((j, proof)) }) .collect(); for result in chunk_results { #[cfg(feature = "instruments")] { - let (proof, timing) = result?; - proofs.push(proof); + let (j, proof, timing) = result?; + let idx = chunk_start + j; + proofs[idx] = Some(proof); table_timings.push(timing); } #[cfg(not(feature = "instruments"))] - proofs.push(result?); + { + let (j, proof) = result?; + let idx = chunk_start + j; + proofs[idx] = Some(proof); + } } } + let proofs: Vec> = proofs + .into_iter() + .map(|p| p.expect("every table emits a proof")) + .collect(); + #[cfg(feature = "instruments")] { // Store timing data for the top-level report in prove_with_options. @@ -2569,6 +2906,8 @@ pub trait IsStarkProver< main_mmcs_specs: main_mmcs_specs_per_chunk, aux_mmcs_roots: aux_mmcs_roots_per_chunk, aux_mmcs_specs: aux_mmcs_specs_per_chunk, + comp_mmcs_roots: comp_mmcs_roots_per_chunk, + comp_mmcs_specs: comp_mmcs_specs_per_chunk, chunk_size: k as u32, }) } @@ -2607,24 +2946,30 @@ pub trait IsStarkProver< // TODO: propagate errors instead of unwrap() in open_deep_composition_poly and FRI operations /// Executes rounds 2-4 and generates a STARK proof for the trace `main_trace` with public inputs `pub_inputs`. /// Warning: the transcript must be safely initializated before passing it to this method. - fn prove_rounds_2_to_4( + /// Part A of Round 2: sample beta + build the composition LDE parts + /// + compute tagged row-pair leaves for the chunk composition MMCS. + /// Returns the artefacts the chunk-level MMCS build consumes + /// alongside this table's tag. + fn prove_round_2a( air: &dyn AIR, pub_inputs: &PI, round_1_result: &Round1, transcript: &mut impl IsStarkTranscript, domain: &Domain, - ) -> Result, ProvingError> + tag: MatrixTag, + ) -> Result< + ( + Vec>, + Vec>, + R2aResult, + ), + ProvingError, + > where FieldElement: AsBytes, FieldElement: AsBytes, PI: Send + Sync + Clone, { - info!("Started proof generation..."); - - // =================================== - // ==========| Round 2 |========== - // =================================== - // <<<< Receive challenge: 𝛽 let beta = transcript.sample_field_element(); let trace_length = domain.interpolation_domain_size; @@ -2637,35 +2982,47 @@ pub trait IsStarkProver< ) .constraints .len(); - let num_transition_constraints = air.context().num_transition_constraints; - let mut coefficients: Vec<_> = core::iter::successors(Some(FieldElement::one()), |x| Some(x * &beta)) .take(num_boundary_constraints + num_transition_constraints) .collect(); - let transition_coefficients: Vec<_> = coefficients.drain(..num_transition_constraints).collect(); let boundary_coefficients = coefficients; - - let round_2_result = Self::round_2_compute_composition_polynomial( + let r2a = Self::round_2a_build_composition_lde( air, pub_inputs, domain, round_1_result, &transition_coefficients, &boundary_coefficients, + tag, )?; + Ok((transition_coefficients, boundary_coefficients, r2a)) + } - // >>>> Send commitments: [H₁], [H₂] - transcript.append_bytes(&round_2_result.composition_poly_root); - - // =================================== - // ==========| Round 3 |========== - // =================================== + /// Part B of Round 2 onward: assumes the chunk composition MMCS root + /// has been absorbed into `transcript` already. Runs the absorb of + /// the per-table H_i values, R3 OOD, and R4 FRI + opens, producing + /// the final per-table StarkProof. + #[allow(clippy::too_many_arguments)] + fn prove_rounds_2b_to_4( + air: &dyn AIR, + pub_inputs: &PI, + round_1_result: &Round1, + round_2_result: &Round2, + transcript: &mut impl IsStarkTranscript, + domain: &Domain, + ) -> Result, ProvingError> + where + FieldElement: AsBytes, + FieldElement: AsBytes, + PI: Send + Sync + Clone, + { + info!("Started proof generation (post-R2 chunk join)..."); - // <<<< Receive challenge: z + // <<<< Receive challenge: z (transcript already saw chunk comp root) let z = transcript.sample_z_ood( &domain.lde_roots_of_unity_coset, &domain.trace_roots_of_unity, @@ -2677,7 +3034,7 @@ pub trait IsStarkProver< air, domain, round_1_result, - &round_2_result, + round_2_result, &z, ); #[cfg(feature = "instruments")] @@ -2699,15 +3056,11 @@ pub trait IsStarkProver< // =================================== // ==========| Round 4 |========== // =================================== - - // Part of this round is running FRI, which is an interactive - // protocol on its own. Therefore we pass it the transcript - // to simulate the interactions with the verifier. let round_4_result = Self::round_4_compute_and_run_fri_on_the_deep_composition_polynomial( air, domain, round_1_result, - &round_2_result, + round_2_result, &round_3_result, &z, transcript, @@ -2735,32 +3088,17 @@ pub trait IsStarkProver< info!("End proof generation"); Ok(StarkProof { - // For preprocessed tables: per-table Merkle root over multiplicities - // (preprocessed tables stay out of the shared main-trace MMCS). lde_trace_main_merkle_root: round_1_result.main.main_tree_root(), - // For preprocessed tables: commitment to precomputed columns only lde_trace_precomputed_merkle_root: round_1_result.main.precomputed_root(), - // tⱼ(zgᵏ) trace_ood_evaluations: round_3_result.trace_ood_evaluations, - // [H₁] and [H₂] - composition_poly_root: round_2_result.composition_poly_root, - // Hᵢ(z^N) composition_poly_parts_ood_evaluation: round_3_result .composition_poly_parts_ood_evaluation, - // [pₖ] fri_layers_merkle_roots: round_4_result.fri_layers_merkle_roots, - // pₙ fri_last_value: round_4_result.fri_last_value, - // Open(p₀(D₀), 𝜐ₛ), Open(pₖ(Dₖ), −𝜐ₛ^(2ᵏ)) query_list: round_4_result.query_list, - // Open(H₁(D_LDE, 𝜐₀), Open(H₂(D_LDE, 𝜐₀), Open(tⱼ(D_LDE), 𝜐₀) - // Open(H₁(D_LDE, -𝜐ᵢ), Open(H₂(D_LDE, -𝜐ᵢ), Open(tⱼ(D_LDE), -𝜐ᵢ) deep_poly_openings: round_4_result.deep_poly_openings, - // nonce obtained from grinding nonce: round_4_result.nonce, - // Bus interaction public inputs (for boundary constraints and bus balance check) bus_public_inputs: round_1_result.bus_public_inputs.clone(), - // Public inputs for boundary constraints public_inputs: pub_inputs.clone(), trace_length: domain.interpolation_domain_size, }) diff --git a/crypto/stark/src/tests/mmcs_soundness_tests.rs b/crypto/stark/src/tests/mmcs_soundness_tests.rs index ab0c8912f..71448e64d 100644 --- a/crypto/stark/src/tests/mmcs_soundness_tests.rs +++ b/crypto/stark/src/tests/mmcs_soundness_tests.rs @@ -251,3 +251,55 @@ fn swapped_main_tags_at_verifier_rejected() { "swapped main_tags must be rejected" ); } + +// ---------- Composition MMCS soundness ---------- + +fn first_populated_comp_chunk(proof: &MultiProof) -> usize { + proof + .comp_mmcs_roots + .iter() + .position(|r| r.is_some()) + .expect("at least one chunk must have a comp MMCS root in this baseline") +} + +#[test_log::test] +fn tampered_comp_mmcs_root_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + let chunk_idx = first_populated_comp_chunk(&proof); + let root = proof.comp_mmcs_roots[chunk_idx] + .as_mut() + .expect("populated"); + root[0] ^= 1; + assert!( + !verify(&airs, &proof), + "tampered composition MMCS root must be rejected" + ); +} + +#[test_log::test] +fn tampered_comp_mmcs_spec_height_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + let chunk_idx = first_populated_comp_chunk(&proof); + proof.comp_mmcs_specs[chunk_idx][0].1 /= 2; + assert!( + !verify(&airs, &proof), + "composition spec height mismatch must be rejected" + ); +} + +#[test_log::test] +fn missing_comp_mmcs_root_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + let chunk_idx = first_populated_comp_chunk(&proof); + proof.comp_mmcs_roots[chunk_idx] = None; + assert!( + !verify(&airs, &proof), + "missing composition MMCS root must be rejected (every chunk must have one)" + ); +} diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs index 95165a253..dc614fb85 100644 --- a/crypto/stark/src/verifier.rs +++ b/crypto/stark/src/verifier.rs @@ -422,26 +422,28 @@ pub trait IsStarkVerifier< /// Verify opening Open(Hᵢ(D_LDE), 𝜐) and Open(Hᵢ(D_LDE), -𝜐) for all parts Hᵢof the composition /// polynomial, where 𝜐 and -𝜐 are the elements corresponding to the index challenge `iota`. + /// Verify the composition-trace MMCS opening pair for one query. + /// Rehashes the row-pair leaf using the COMPOSITION domain + /// separator, checks it matches `matrix_leaves[table_idx]`, and + /// authenticates against the chunk's composition root + spec. fn verify_composition_poly_opening( deep_poly_openings: &DeepPolynomialOpening, - composition_poly_merkle_root: &Commitment, - iota: &usize, + comp_mmcs_root: Option<&Commitment>, + comp_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], + main_tag: crypto::merkle_tree::mmcs::MatrixTag, + iota: usize, ) -> bool where FieldElement: AsBytes + Sync + Send, - FieldElement: AsBytes + Sync + Send, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, { - let mut value = deep_poly_openings.composition_poly.evaluations.clone(); - value.extend_from_slice(&deep_poly_openings.composition_poly.evaluations_sym); - - deep_poly_openings - .composition_poly - .proof - .verify::>( - composition_poly_merkle_root, - *iota, - &value, - ) + verify_comp_mmcs_pair_inner::( + &deep_poly_openings.composition_poly, + iota, + main_tag, + comp_mmcs_root, + comp_mmcs_spec, + ) } /// Verifies the validity of the purported values of the trace polynomials and the composition polynomial @@ -456,6 +458,8 @@ pub trait IsStarkVerifier< main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], aux_mmcs_root: Option<&Commitment>, aux_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], + comp_mmcs_root: Option<&Commitment>, + comp_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], ) -> bool where FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, @@ -468,8 +472,10 @@ pub trait IsStarkVerifier< .all(|(iota_n, deep_poly_opening)| { Self::verify_composition_poly_opening( deep_poly_opening, - &proof.composition_poly_root, - iota_n, + comp_mmcs_root, + comp_mmcs_spec, + main_tag, + *iota_n, ) && Self::verify_trace_openings( proof, deep_poly_opening, @@ -636,7 +642,7 @@ pub trait IsStarkVerifier< challenges, &lde_base, lde_aux, - &opening.composition_poly.evaluations, + opening.composition_poly.evaluations(), )?); // Mirror for the symmetric query point. @@ -660,7 +666,7 @@ pub trait IsStarkVerifier< challenges, &lde_base_sym, lde_aux_sym, - &opening.composition_poly.evaluations_sym, + opening.composition_poly.evaluations_sym(), )?); } Some((deep_poly_evaluations, deep_poly_evaluations_sym)) @@ -820,13 +826,19 @@ pub trait IsStarkVerifier< || multi_proof.main_mmcs_specs.len() != expected_num_chunks || multi_proof.aux_mmcs_roots.len() != expected_num_chunks || multi_proof.aux_mmcs_specs.len() != expected_num_chunks + || multi_proof.comp_mmcs_roots.len() != expected_num_chunks + || multi_proof.comp_mmcs_specs.len() != expected_num_chunks { error!( - "per-chunk MMCS Vec lengths inconsistent with chunk_size={chunk_size}: expected {expected_num_chunks} chunks; got main_roots={}, main_specs={}, aux_roots={}, aux_specs={}", + "per-chunk MMCS Vec lengths inconsistent with chunk_size={chunk_size}: \ + expected {expected_num_chunks} chunks; got main_roots={}, main_specs={}, \ + aux_roots={}, aux_specs={}, comp_roots={}, comp_specs={}", multi_proof.main_mmcs_roots.len(), multi_proof.main_mmcs_specs.len(), multi_proof.aux_mmcs_roots.len(), multi_proof.aux_mmcs_specs.len(), + multi_proof.comp_mmcs_roots.len(), + multi_proof.comp_mmcs_specs.len(), ); return false; } @@ -989,6 +1001,41 @@ pub trait IsStarkVerifier< } } + // Per-chunk composition MMCS spec validation. Every table has a + // composition polynomial, so every chunk has Some(root). The + // composition root is NOT absorbed here at the shared-transcript + // level — it gets absorbed PER-TABLE inside `verify_rounds_2_to_4` + // between sampling beta and sampling z (mirroring the prover, + // which absorbs it into each chunk-mate's fork at that point). + for chunk_idx in 0..expected_num_chunks { + let chunk_start = chunk_idx * chunk_size; + let chunk_end = (chunk_start + chunk_size).min(airs.len()); + + let mut expected_comp_spec: Vec<(crypto::merkle_tree::mmcs::MatrixTag, usize)> = + Vec::new(); + for idx in chunk_start..chunk_end { + let proof = &multi_proof.proofs[idx]; + let lde_size = + proof.trace_length * (airs[idx].options().blowup_factor as usize); + // Composition MMCS padded height = lde_size / 2 (row-pair leaves). + expected_comp_spec.push((main_tags[idx], lde_size / 2)); + } + expected_comp_spec.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0))); + if expected_comp_spec != multi_proof.comp_mmcs_specs[chunk_idx] { + error!( + "chunk {chunk_idx} comp_mmcs_spec mismatch: expected {:?}, got {:?}", + expected_comp_spec, multi_proof.comp_mmcs_specs[chunk_idx], + ); + return false; + } + if multi_proof.comp_mmcs_roots[chunk_idx].is_none() { + error!( + "chunk {chunk_idx} comp_mmcs_root missing (every chunk must commit at least one composition matrix)" + ); + return false; + } + } + // ===================================================================== // Rounds 2-4: Forked per table // ===================================================================== @@ -1011,8 +1058,8 @@ pub trait IsStarkVerifier< table_transcript.append_field_element(&bpi.table_contribution); } - // Per-chunk lookup: each table's main / aux MMCS root + spec - // come from its chunk. + // Per-chunk lookup: each table's main / aux / comp MMCS + // root + spec come from its chunk. let table_chunk_idx = idx / chunk_size; let main_root_for_chunk = multi_proof.main_mmcs_roots[table_chunk_idx].as_ref(); @@ -1021,6 +1068,10 @@ pub trait IsStarkVerifier< let aux_root_for_chunk = multi_proof.aux_mmcs_roots[table_chunk_idx].as_ref(); let aux_spec_for_chunk: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)] = &multi_proof.aux_mmcs_specs[table_chunk_idx]; + let comp_root_for_chunk = + multi_proof.comp_mmcs_roots[table_chunk_idx].as_ref(); + let comp_spec_for_chunk: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)] = + &multi_proof.comp_mmcs_specs[table_chunk_idx]; if !Self::verify_rounds_2_to_4( *air, @@ -1032,6 +1083,8 @@ pub trait IsStarkVerifier< main_spec_for_chunk, aux_root_for_chunk, aux_spec_for_chunk, + comp_root_for_chunk, + comp_spec_for_chunk, ) { error!( "Table {} failed verify_rounds_2_to_4 (num_constraints={}, trace_cols={})", @@ -1102,12 +1155,18 @@ pub trait IsStarkVerifier< /// Replays rounds 2, 3 and 4 of the protocol for a given proof, assuming round 1 has /// already been replayed and the RAP challenges are known. + /// + /// `comp_mmcs_root` is this table's chunk composition MMCS root, + /// absorbed between beta and z sampling. The prover absorbs the + /// same root into each chunk-mate's fork. + #[allow(clippy::too_many_arguments)] fn replay_rounds_after_round_1( air: &dyn AIR, proof: &StarkProof, domain: &VerifierDomain, transcript: &mut impl IsStarkTranscript, rap_challenges: Vec>, + comp_mmcs_root: Option<&Commitment>, ) -> Challenges where FieldElement: AsBytes, @@ -1138,8 +1197,11 @@ pub trait IsStarkVerifier< let transition_coeffs: Vec<_> = coefficients.drain(..num_transition_constraints).collect(); let boundary_coeffs = coefficients; - // <<<< Receive commitments: [H₁], [H₂] - transcript.append_bytes(&proof.composition_poly_root); + // <<<< Receive commitment: chunk composition MMCS root (one + // absorb per chunk-mate's fork, mirroring `multi_prove`). + if let Some(root) = comp_mmcs_root { + transcript.append_bytes(root); + } // =================================== // ==========| Round 3 |========== @@ -1251,6 +1313,8 @@ pub trait IsStarkVerifier< main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], aux_mmcs_root: Option<&Commitment>, aux_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], + comp_mmcs_root: Option<&Commitment>, + comp_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], ) -> bool where FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, @@ -1268,8 +1332,14 @@ pub trait IsStarkVerifier< #[cfg(feature = "instruments")] let timer1 = Instant::now(); - let challenges = - Self::replay_rounds_after_round_1(air, proof, &domain, transcript, rap_challenges); + let challenges = Self::replay_rounds_after_round_1( + air, + proof, + &domain, + transcript, + rap_challenges, + comp_mmcs_root, + ); // verify grinding let security_bits = air.context().proof_options.grinding_factor; @@ -1335,6 +1405,8 @@ pub trait IsStarkVerifier< main_mmcs_spec, aux_mmcs_root, aux_mmcs_spec, + comp_mmcs_root, + comp_mmcs_spec, ) { #[cfg(not(feature = "test_fiat_shamir"))] error!("DEEP Composition Polynomial verification failed"); @@ -1488,3 +1560,64 @@ where mmcs_opening_sym.verify::>(aux_mmcs_root, aux_mmcs_spec); ok && ok_sym } + +/// Composition-trace counterpart of [`verify_main_mmcs_pair_inner`]. Uses +/// `LEAF_DOMAIN_TAG_COMPOSITION` for rehash; the leaf hashes a row-PAIR +/// rather than a single row, so the opening covers both `evaluations` +/// (row 0 / br_0) and `evaluations_sym` (row 1 / br_1) under one MMCS +/// opening — no separate `_sym` opening at this layer (the underlying +/// tree's leaves are already row-pairs). +fn verify_comp_mmcs_pair_inner( + comp_opening: &crate::proof::stark::CompositionTraceOpening, + iota: usize, + main_tag: crypto::merkle_tree::mmcs::MatrixTag, + comp_mmcs_root: Option<&Commitment>, + comp_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], +) -> bool +where + E: IsField, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, +{ + use crate::mmcs_leaf::hash_tagged_row_pair_composition; + use crate::proof::stark::CompositionTraceOpening; + + let comp_mmcs_root = match comp_mmcs_root { + Some(r) => r, + None => return false, + }; + let CompositionTraceOpening::Mmcs { + evaluations, + evaluations_sym, + mmcs_opening, + } = comp_opening; + + let table_idx = match comp_mmcs_spec.iter().position(|(t, _)| *t == main_tag) { + Some(i) => i, + None => return false, + }; + let table_height = comp_mmcs_spec[table_idx].1; + let max_height = match comp_mmcs_spec.first().map(|(_, h)| *h) { + Some(h) => h, + None => return false, + }; + if !table_height.is_power_of_two() || max_height < table_height { + return false; + } + let shift = (max_height / table_height).trailing_zeros() as usize; + // Composition opens at row-pair index iota, so the global index in + // the chunk MMCS is iota shifted up by the chunk-mate's depth diff. + let g_index = iota << shift; + if mmcs_opening.global_index != g_index { + return false; + } + + let leaf = hash_tagged_row_pair_composition::(main_tag, evaluations, evaluations_sym); + let leaves = &mmcs_opening.matrix_leaves; + if table_idx >= leaves.len() { + return false; + } + if leaves[table_idx].0 != main_tag || leaves[table_idx].1 != leaf { + return false; + } + mmcs_opening.verify::>(comp_mmcs_root, comp_mmcs_spec) +} From 7512928c5ef77a70a1fbc352fed7036600655530 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Thu, 28 May 2026 12:15:45 -0300 Subject: [PATCH 20/21] feat(stark/fri): linear_combine_evaluations helper for Phase D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mathematical primitive for batched FRI: combine N same-size evaluation vectors into one using successive powers of δ_fri. A bucket's chip-DEEP evaluations combine into one polynomial; FRI commits + opens only that combined polynomial, while per-chip openings (main/aux/comp) still authenticate each individual D_i value at the bucket-shared iotas. Singleton bucket is a fast path (no combination needed). Empty input is a usage bug (debug-asserted). Not yet wired up — prover + verifier integration lands in the follow-up chunk-join refactor. 4/4 new fri:: unit tests green. 157 prior stark tests unaffected. --- crypto/stark/src/fri/mod.rs | 98 +++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/crypto/stark/src/fri/mod.rs b/crypto/stark/src/fri/mod.rs index bbb988bd1..42682947e 100644 --- a/crypto/stark/src/fri/mod.rs +++ b/crypto/stark/src/fri/mod.rs @@ -15,6 +15,49 @@ use self::fri_functions::{ compute_coset_twiddles_inv, fold_evaluations_in_place, update_twiddles_in_place, }; +/// Linearly combine N same-size evaluation vectors into one, using +/// successive powers of `delta_fri`: +/// +/// `out[i] = sources[0][i] + delta_fri * sources[1][i] + delta_fri^2 * +/// sources[2][i] + ...` +/// +/// This is the mathematical primitive behind Phase D batched FRI: a +/// bucket's chip-DEEP-LDEs are combined into one polynomial whose +/// low-degree-ness implies each summand's. FRI then commits + opens +/// just that combined polynomial. +/// +/// Empty `sources` is a usage bug — caller must pre-filter. +/// All `sources[i]` must share the same length; debug-asserted. +pub fn linear_combine_evaluations( + sources: &[&[FieldElement]], + delta_fri: &FieldElement, +) -> Vec> { + debug_assert!( + !sources.is_empty(), + "linear_combine_evaluations: caller must supply at least one source" + ); + let n = sources[0].len(); + debug_assert!( + sources.iter().all(|s| s.len() == n), + "linear_combine_evaluations: all source vectors must share length" + ); + + if sources.len() == 1 { + // Singleton bucket: combining one polynomial is the identity. + return sources[0].to_vec(); + } + + let mut out = sources[0].to_vec(); + let mut coeff = delta_fri.clone(); + for src in &sources[1..] { + for (o, s) in out.iter_mut().zip(src.iter()) { + *o = &*o + &coeff * s; + } + coeff = &coeff * delta_fri; + } + out +} + /// FRI commit phase from pre-computed bit-reversed evaluations, skipping the /// initial FFT. Use this when the caller already has the evaluation vector /// (e.g. from a fused LDE pipeline). @@ -126,3 +169,58 @@ where .collect() } } + +#[cfg(test)] +mod tests { + use super::*; + use math::field::goldilocks::GoldilocksField; + + type FE = FieldElement; + + #[test] + fn linear_combine_singleton_is_identity() { + let v = vec![FE::from(7u64), FE::from(11u64), FE::from(13u64), FE::from(17u64)]; + let delta = FE::from(99u64); + let out = linear_combine_evaluations(&[&v[..]], &delta); + assert_eq!(out, v); + } + + #[test] + fn linear_combine_two_sources_uses_horner_in_delta() { + // out[i] = a[i] + delta * b[i] + let a = vec![FE::from(1u64), FE::from(2u64), FE::from(3u64)]; + let b = vec![FE::from(10u64), FE::from(20u64), FE::from(30u64)]; + let delta = FE::from(5u64); + let out = linear_combine_evaluations(&[&a[..], &b[..]], &delta); + let expected: Vec = a + .iter() + .zip(b.iter()) + .map(|(x, y)| x + &delta * y) + .collect(); + assert_eq!(out, expected); + } + + #[test] + fn linear_combine_three_sources_powers_of_delta() { + // out[i] = a[i] + delta * b[i] + delta^2 * c[i] + let a = vec![FE::from(1u64), FE::from(0u64)]; + let b = vec![FE::from(0u64), FE::from(1u64)]; + let c = vec![FE::from(1u64), FE::from(1u64)]; + let delta = FE::from(3u64); + let out = linear_combine_evaluations(&[&a[..], &b[..], &c[..]], &delta); + let delta_sq = &delta * δ + // out[0] = 1 + 3*0 + 9*1 = 10 + // out[1] = 0 + 3*1 + 9*1 = 12 + assert_eq!(out[0], FE::from(1u64) + &delta_sq); + assert_eq!(out[1], FE::from(3u64) + &delta_sq); + } + + #[test] + fn linear_combine_zero_delta_keeps_only_first_source() { + let a = vec![FE::from(7u64), FE::from(7u64)]; + let b = vec![FE::from(99u64), FE::from(99u64)]; + let zero = FE::from(0u64); + let out = linear_combine_evaluations(&[&a[..], &b[..]], &zero); + assert_eq!(out, a); + } +} From c8025237a884b90ad8aaceaf15d50b52d4225378 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Thu, 28 May 2026 15:53:18 -0300 Subject: [PATCH 21/21] feat(stark/fri): per-(chunk, lde_size) batched FRI + streaming bucket combine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase D of the MMCS streaming plan: one FRI commit per height bucket inside a chunk. Bucket-mates' DEEP composition polynomials are linearly combined with successive powers of δ_fri (sampled from the chunk-shared, post-OOD-broadcast transcript) and a single FRI commit + grinding + query phase runs on the combined polynomial. Drops 4 per-table proof fields; adds one `MultiProof` field. Built on top of (and is the final piece of) the streaming MMCS plan: trace-MMCS (b1621808) and composition-MMCS (c22fca9e) shave ~10-15% of proof bytes; batched FRI is where the rest of the proof-size win lives. # Proof format `StarkProof` drops: - `fri_layers_merkle_roots: Vec` - `fri_last_value: FieldElement` - `query_list: Vec>` - `nonce: Option` `MultiProof` gains: - `fri_chunk_buckets: Vec>>` `ChunkBucketFri { lde_size, members, layer_roots, last_value, decommitments, nonce }` — bucket members listed in canonical chunk-local-index order so δ_fri^i powers are reproducible verifier-side. # Bucket-seed transcript (canonical OOD broadcast) For chunk c, after Phase A+B+C absorbs, the bucket seed extends a clone of the pre-fork shared state by appending, in exact order: 1. per chunk-mate j in chunk-local order: table_contribution_j (when bus_public_inputs is Some) 2. chunk composition MMCS root (when Some) 3. per chunk-mate j in chunk-local order: trace_ood_evaluations columns + composition_poly_parts_ood_evaluation Per bucket b: bucket_transcript = bucket_seed.clone(), append `(lde_size as u64).to_le_bytes()`, sample δ_fri, then standard commit_phase_from_evaluations / grinding / iotas interaction. The verifier reconstructs the bucket seed from `MultiProof` data alone (table contributions from `bus_public_inputs`, comp root from `comp_mmcs_roots`, OODs from each `StarkProof.trace_ood_evaluations` + `composition_poly_parts_ood_evaluation`). # Prover (multi_prove) R2b → R3.5 split into a light per-fork pass + heavy bucket-level pass: - `prove_rounds_2b_to_3_5(...)` per chunk-mate (now parallel via rayon `par_iter_mut()` on the chunk transcript slice): sample z, compute OOD, absorb own OOD, sample γ, build the DEEP coefficient layout (`DeepCoeffs { z, gammas, trace_term_coeffs, lde_size }`). Does NOT build the DEEP LDE. - Build `bucket_seed` (canonical replay on `pre_fork_transcript`). - Bucket chunk-mates by `lde_size` (first-encounter order on chunk-local index). - Per bucket: clone bucket_seed, append lde_size, sample δ_fri. Streaming combine: per member, `compute_deep_lde_with_coeffs` → fold into the bucket accumulator with δ_fri^i_local → drop. First member of every bucket reuses its DEEP LDE as the accumulator base (no zero-init copy in the singleton case). Then commit_phase_from_evaluations / grind / sample iotas / query_phase on the combined polynomial. - Per chunk-mate: open trace / aux / comp / precomputed at the bucket-shared iotas, assemble per-table `StarkProof`. Iotas come from the bucket cache; no transcript replay needed. # Verifier (multi_verify) - `replay_rounds_after_round_1` stops at γ — leaves `zetas`, `iotas`, and `grinding_seed` empty in the returned `Challenges`. - `verify_rounds_2_to_4` split into `replay_and_verify_step_2` (per-fork, returns `Option`) + `verify_step_4_at_iotas` (driven from `multi_verify` after the bucket FRI sets iotas). - `step_3_verify_fri` / `verify_query_and_sym_openings` replaced by `verify_bucket_fri_query` which takes the bucket's `layer_roots` + `last_value` + per-iota `FriDecommitment` externally and runs the standard FRI fold check against the combined-D evaluations. - `multi_verify` chunk loop: 1. Per chunk-mate: fork transcript → bind table_contribution → `replay_and_verify_step_2` → store Challenges. 2. Build `bucket_seed` from `MultiProof` data. 3. Validate `fri_chunk_buckets[chunk_idx]` structure: bucket count matches the expected lde_size grouping; per bucket lde_size and member tag order match. 4. Per bucket: replay layer-root absorbs + zetas + last_value + grinding + iotas (cached in `bucket_iotas_cache`). Reconstruct each member's D_i evaluations via the existing barycentric helper, combine with δ_fri^i, run `verify_bucket_fri_query` per iota. 5. Per chunk-mate: `verify_step_4_at_iotas` using cached iotas. # Performance considerations (F1 fixes) The first draft of D5 retained K × DEEP LDE simultaneously and lost per-chunk-mate parallelism in R2b-R3.5 (c22fca9e had serialised it): proving time on the workload regressed ~117% while memory only dropped ~10%. Three fixes shipped together: - F1.1 (streaming bucket combine): DEEP_i materialises one at a time inside the bucket loop and folds into the accumulator with δ_fri^i, then drops. Peak DEEP memory inside the bucket loop is 2 × |LDE| (current member + accumulator), restored from K × |LDE|. - F1.2 (restore per-chunk-mate parallelism): `chunk_transcripts. par_iter_mut().enumerate()` runs R2b → R3 → DEEP-coeffs concurrently per chunk-mate. `chunk_airs` + `chunk_num_rows` are extracted before the parallel iter so we don't capture the Sync-unfriendly `&mut TraceTable` inside `air_trace_pairs`. - F1.3 (verifier iota cache): `bucket_iotas_cache: Vec>` is populated during the FRI fold check pass and reused in the step 4 pass, eliminating a redundant clone+replay of the bucket transcript per chunk-mate. # Tests (D7) New `tests/bucket_fri_soundness_tests.rs` (7 tests): - baseline_phase_d_proof_verifies - tampered_bucket_last_value_rejected - tampered_bucket_layer_root_rejected - truncated_bucket_decommitments_rejected - missing_chunk_buckets_rejected - wrong_bucket_lde_size_rejected - swapped_member_order_rejected `bin/cli/proof-size` breakdown: drops `fri_layers_merkle_roots` and `fri_query_list`; adds `fri_chunk_buckets (per-chunk batched FRI)`. # Results - `cargo test -p stark --lib` → 168/168 (161 prior + 7 new bucket FRI soundness tests). - `cargo test -p crypto --lib` → 73/73. - `cargo test -p cli` → 3/3. - `cargo test -p lambda-vm-prover --lib tests::bitwise*` → 29/29 (preprocessed-AIR exercise). - 77 `tests::prove_elfs_tests::*` failures are the unchanged pre-existing `UnknownSyscall(5)` executor bug, present on main. # Out of scope (follow-ups) - Fuse compute-DEEP + bucket-combine into a single per-LDE-point pass (eliminate the intermediate `Vec` for the current member). - Adaptive chunk sizing by LDE memory budget (replace fixed `chunk_size = table_parallelism()` with a bin-packing decision). - Disk-spill `StarkProof` + `ChunkBucketFri` between chunks for very-large-table workloads. --- bin/cli/src/main.rs | 19 +- crypto/stark/src/proof/stark.rs | 59 +- crypto/stark/src/prover.rs | 509 ++++++++++----- .../src/tests/bucket_fri_soundness_tests.rs | 168 +++++ crypto/stark/src/tests/mod.rs | 1 + crypto/stark/src/verifier.rs | 604 ++++++++++++------ 6 files changed, 973 insertions(+), 387 deletions(-) create mode 100644 crypto/stark/src/tests/bucket_fri_soundness_tests.rs diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs index 86a6dbddf..6e310264b 100644 --- a/bin/cli/src/main.rs +++ b/bin/cli/src/main.rs @@ -697,6 +697,8 @@ fn cmd_proof_size( let comp_mmcs_roots_bytes = ser_len(&vm_proof.proof.comp_mmcs_roots); let comp_mmcs_specs_bytes = ser_len(&vm_proof.proof.comp_mmcs_specs); let chunk_size_bytes = ser_len(&vm_proof.proof.chunk_size); + // Phase D: per-(chunk, bucket) batched FRI. + let fri_chunk_buckets_bytes = ser_len(&vm_proof.proof.fri_chunk_buckets); // Sum per-section across every sub-proof so a single number captures the // contribution of, e.g., "all FRI query lists across all tables". @@ -704,8 +706,6 @@ fn cmd_proof_size( let mut s_precomputed_trace_openings = 0usize; let mut s_aux_trace_openings = 0usize; let mut s_composition_openings = 0usize; - let mut s_fri_query_list = 0usize; - let mut s_fri_layers_roots = 0usize; let mut s_trace_ood = 0usize; let mut s_composition_ood = 0usize; let mut s_per_table_main_root = 0usize; @@ -718,8 +718,6 @@ fn cmd_proof_size( s_precomputed_root += ser_len(&proof.lde_trace_precomputed_merkle_root); s_trace_ood += ser_len(&proof.trace_ood_evaluations); s_composition_ood += ser_len(&proof.composition_poly_parts_ood_evaluation); - s_fri_query_list += ser_len(&proof.query_list); - s_fri_layers_roots += ser_len(&proof.fri_layers_merkle_roots); s_bus_public_inputs += ser_len(&proof.bus_public_inputs); for opening in &proof.deep_poly_openings { @@ -730,9 +728,8 @@ fn cmd_proof_size( } } - // Anything not captured above (composition_poly_root, fri_last_value, - // nonce, public_inputs, trace_length, headers...). Calculate as the - // bundle delta so the breakdown still sums to ~total. + // Anything not captured above (public_inputs, trace_length, headers...). + // Calculate as the bundle delta so the breakdown still sums to ~total. let accounted = main_mmcs_roots_bytes + main_mmcs_specs_bytes + aux_mmcs_roots_bytes @@ -740,12 +737,11 @@ fn cmd_proof_size( + comp_mmcs_roots_bytes + comp_mmcs_specs_bytes + chunk_size_bytes + + fri_chunk_buckets_bytes + s_main_trace_openings + s_precomputed_trace_openings + s_aux_trace_openings + s_composition_openings - + s_fri_query_list - + s_fri_layers_roots + s_trace_ood + s_composition_ood + s_per_table_main_root @@ -767,12 +763,11 @@ fn cmd_proof_size( ProofSizeEntry { section: "deep_poly_openings.precomputed_trace_polys".into(), bytes: s_precomputed_trace_openings }, ProofSizeEntry { section: "deep_poly_openings.aux_trace_polys".into(), bytes: s_aux_trace_openings }, ProofSizeEntry { section: "deep_poly_openings.composition_poly".into(), bytes: s_composition_openings }, - ProofSizeEntry { section: "fri_layers_merkle_roots".into(), bytes: s_fri_layers_roots }, - ProofSizeEntry { section: "fri_query_list".into(), bytes: s_fri_query_list }, + ProofSizeEntry { section: "fri_chunk_buckets (per-chunk batched FRI)".into(), bytes: fri_chunk_buckets_bytes }, ProofSizeEntry { section: "trace_ood_evaluations".into(), bytes: s_trace_ood }, ProofSizeEntry { section: "composition_poly_parts_ood_evaluation".into(), bytes: s_composition_ood }, ProofSizeEntry { section: "bus_public_inputs".into(), bytes: s_bus_public_inputs }, - ProofSizeEntry { section: "other (headers / public_inputs / nonce / ...)".into(), bytes: s_other }, + ProofSizeEntry { section: "other (headers / public_inputs / ...)".into(), bytes: s_other }, ]; if json { diff --git a/crypto/stark/src/proof/stark.rs b/crypto/stark/src/proof/stark.rs index 32ac76184..7fb68751e 100644 --- a/crypto/stark/src/proof/stark.rs +++ b/crypto/stark/src/proof/stark.rs @@ -155,17 +155,12 @@ pub struct StarkProof, E: IsField, PI> { pub trace_ood_evaluations: Table, // Hᵢ(z^N) pub composition_poly_parts_ood_evaluation: Vec>, - // [pₖ] - pub fri_layers_merkle_roots: Vec, - // pₙ - pub fri_last_value: FieldElement, - // Open(pₖ(Dₖ), −𝜐ₛ^(2ᵏ)) - pub query_list: Vec>, - // Open(H₁(D_LDE, 𝜐ᵢ), Open(H₂(D_LDE, 𝜐ᵢ), Open(tⱼ(D_LDE), 𝜐ᵢ) - // Open(H₁(D_LDE, -𝜐ᵢ), Open(H₂(D_LDE, -𝜐ᵢ), Open(tⱼ(D_LDE), -𝜐ᵢ) + // Per-query openings of THIS table's main / aux / composition / precomputed + // data, indexed at the SHARED bucket iotas (Phase D batched FRI). The FRI + // commit + last value + query decommitments + grinding nonce now live at + // chunk-bucket level in `MultiProof::fri_chunk_buckets`; this proof only + // carries the per-table trace authentication. pub deep_poly_openings: DeepPolynomialOpenings, - // nonce obtained from grinding - pub nonce: Option, // Bus interaction public inputs for the accumulated column. // Contains the table contribution (L), used for: // 1. Circular constraint offset: L/N per row @@ -230,4 +225,48 @@ pub struct MultiProof, E: IsField, PI> { /// proving time. The verifier uses this to chunk the AIR slice into /// the same per-chunk grouping the prover used. pub chunk_size: u32, + /// Per-(chunk, lde_size-bucket) batched FRI instances. Outer Vec is + /// indexed by chunk (parallel to `main_mmcs_roots` etc.); inner Vec + /// lists buckets in canonical first-encounter (chunk-local-index + /// ascending) order. Each `ChunkBucketFri` carries the FRI layer + /// roots, last value, per-iota decommitments, and grinding nonce + /// for ONE linearly-combined DEEP composition polynomial committing + /// to every bucket-mate's individual D_i (combined with successive + /// powers of the bucket's `delta_fri` challenge). + pub fri_chunk_buckets: Vec>>, +} + +/// Phase D — per-(chunk, lde_size) batched FRI instance. +/// +/// One per height bucket inside a chunk: bucket-mates' individual DEEP +/// composition polynomials are linearly combined with successive powers +/// of `delta_fri` (sampled from the chunk-shared, post-OOD-broadcast +/// transcript), and a single FRI commit + grinding + query is run on +/// the combined polynomial. The `members` list pins the canonical +/// bucket-local order used to derive `delta_fri^i` on the verifier side; +/// reordering the list rejects the proof. +/// +/// `decommitments` length equals `air.options().fri_number_of_queries` +/// (one decommitment per shared iota). `nonce` is `Some` when the +/// AIR's grinding factor > 0 (`None` otherwise). +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[serde(bound = "")] +pub struct ChunkBucketFri { + /// LDE size shared by every bucket-mate. Equal to + /// `trace_length * blowup_factor` for each member. + pub lde_size: u32, + /// Bucket-mate tags in the canonical bucket-local order (matches + /// chunk-local index ascending). Index `i` here corresponds to + /// `delta_fri^i` in the linear combination. + pub members: Vec, + /// `[pₖ]` for k = 1..num_layers. + pub layer_roots: Vec, + /// `pₙ` — the final folded constant. + pub last_value: FieldElement, + /// One FRI decommitment per shared iota (the bucket transcript + /// samples a single iota list reused by every bucket-mate's + /// per-table opening). + pub decommitments: Vec>, + /// Grinding nonce, when `grinding_factor > 0`. + pub nonce: Option, } diff --git a/crypto/stark/src/prover.rs b/crypto/stark/src/prover.rs index 72f2a822f..bd913d4d1 100644 --- a/crypto/stark/src/prover.rs +++ b/crypto/stark/src/prover.rs @@ -39,7 +39,6 @@ use crypto::merkle_tree::mmcs::{MatrixTag, Mmcs, MmcsError, StreamingMmcsBuilder use super::config::{BatchedMerkleTree, BatchedMerkleTreeBackend, Commitment}; use super::constraints::evaluator::ConstraintEvaluator; use super::domain::{Domain, DomainConstants}; -use super::fri::fri_decommit::FriDecommitment; use super::grinding; use super::lookup::BusPublicInputs; use super::proof::stark::{DeepPolynomialOpening, MultiProof, StarkProof}; @@ -507,26 +506,26 @@ where /// A container for the results of the third round of the STARK Prove protocol. pub(crate) struct Round3 { - /// Evaluations of the trace polynomials, main ans auxiliary, at the out-of-domain challenge. - trace_ood_evaluations: Table, + /// Evaluations of the trace polynomials, main and auxiliary, at the out-of-domain challenge. + pub(crate) trace_ood_evaluations: Table, /// Evaluations of the composition polynomial parts at the out-of-domain challenge. - composition_poly_parts_ood_evaluation: Vec>, + pub(crate) composition_poly_parts_ood_evaluation: Vec>, } -/// A container for the results of the fourth round of the STARK Prove protocol. -pub(crate) struct Round4, E: IsField> { - /// The final value resulting from folding the Deep composition polynomial all the way down to a constant value. - fri_last_value: FieldElement, - /// The commitments to the fold polynomials of the inner layers of FRI. - fri_layers_merkle_roots: Vec, - /// The values and proofs of validity of the evaluations of the trace polynomials and the composition polynomials - /// parts at the domain values corresponding to the FRI query challenges and their symmetric counterparts. - deep_poly_openings: DeepPolynomialOpenings, - /// The values and proofs of validity of the evaluations of the fold polynomials of the inner - /// layers of FRI at the values corresponding to the symmetrics of the FRI query challenges. - query_list: Vec>, - /// The proof of work nonce. - nonce: Option, +/// Per-table DEEP composition coefficient layout sampled at R3.5 from +/// the per-fork transcript. Stored across chunk-mates so the bucket FRI +/// loop can rebuild DEEP_i LDEs on demand (one at a time) and fold them +/// into the bucket-combined accumulator without retaining K full DEEPs +/// in memory simultaneously. +pub(crate) struct DeepCoeffs { + pub(crate) z: FieldElement, + /// γⱼ for the composition-poly OOD H_j parts. + pub(crate) gammas: Vec>, + /// γ′ⱼₖ for the trace-column OOD terms, grouped by column. + pub(crate) trace_term_coeffs: Vec>>, + /// LDE size — matches `round_1.lde_trace`'s length and is the size + /// of the DEEP LDE this table contributes to its bucket. + pub(crate) lde_size: usize, } /// Returns the evaluations of the polynomial `p` over the lde domain defined by the given @@ -1631,23 +1630,24 @@ pub trait IsStarkProver< } } - /// Returns the result of the fourth round of the STARK Prove protocol. - fn round_4_compute_and_run_fri_on_the_deep_composition_polynomial( + /// Round 3.5 part A — sample γ from the per-fork transcript and + /// build the DEEP composition coefficient layout. Cheap (a few + /// field elements + Vec allocations); the heavy LDE compute is + /// deferred to [`compute_deep_lde_with_coeffs`] which runs **inside + /// the bucket loop**, so DEEP_i is materialised one at a time and + /// folded into the bucket accumulator with `δ_fri^i` before + /// dropping. + fn sample_deep_coeffs( air: &dyn AIR, domain: &Domain, - round_1_result: &Round1, round_2_result: &Round2, - round_3_result: &Round3, - z: &FieldElement, + z: FieldElement, transcript: &mut impl IsStarkTranscript, - ) -> Round4 + ) -> DeepCoeffs where FieldElement: AsBytes, FieldElement: AsBytes, { - let coset_offset_u64 = air.context().proof_options.coset_offset; - let coset_offset = FieldElement::::from(coset_offset_u64); - let gamma = transcript.sample_field_element(); let n_terms_composition_poly = round_2_result.lde_composition_poly_evaluations.len(); @@ -1670,84 +1670,64 @@ pub trait IsStarkProver< // <<<< Receive challenges: 𝛾ⱼ, 𝛾ⱼ' let gammas = deep_composition_coefficients; - // Compute p₀ (deep composition polynomial) as N evaluations on trace-size coset + let lde_size = domain.lde_roots_of_unity_coset.len(); + DeepCoeffs { + z, + gammas, + trace_term_coeffs, + lde_size, + } + } + + /// Round 3.5 part B — compute the DEEP composition polynomial on + /// the LDE coset (bit-reverse permuted, ready for FRI). Pure + /// computation, no transcript interaction. Called once per + /// chunk-mate inside the bucket loop so we can drop each DEEP_i + /// after folding it into the bucket-shared accumulator. + fn compute_deep_lde_with_coeffs( + domain: &Domain, + round_1_result: &Round1, + round_2_result: &Round2, + round_3_result: &Round3, + coeffs: &DeepCoeffs, + ) -> Vec> + where + FieldElement: AsBytes, + FieldElement: AsBytes, + { #[cfg(feature = "instruments")] let t_sub = Instant::now(); let deep_evals = Self::compute_deep_composition_poly_evaluations( &round_1_result.lde_trace, round_2_result, round_3_result, - z, + &coeffs.z, domain, &domain.trace_primitive_root, - &gammas, - &trace_term_coeffs, + &coeffs.gammas, + &coeffs.trace_term_coeffs, ); #[cfg(feature = "instruments")] let other_dur_1 = t_sub.elapsed(); - // DEEP evaluations are already at 2N LDE points — just bit-reverse for FRI. - // No iFFT+FFT extension needed (Plonky3-style direct LDE computation). - let domain_size = domain.lde_roots_of_unity_coset.len(); + // DEEP evaluations are already at 2N LDE points; bit-reverse + // to make them FRI-input compatible without an FFT extension. #[cfg(feature = "instruments")] let t_sub = Instant::now(); - let mut lde_evals = deep_evals; - in_place_bit_reverse_permute(&mut lde_evals); + let mut deep_lde = deep_evals; + in_place_bit_reverse_permute(&mut deep_lde); #[cfg(feature = "instruments")] let r4_fft_dur = t_sub.elapsed(); - // FRI commit phase from pre-computed evaluations #[cfg(feature = "instruments")] - let t_sub = Instant::now(); - let (fri_last_value, fri_layers) = - fri::commit_phase_from_evaluations::( - domain.root_order as usize, - lde_evals, - transcript, - &coset_offset, - domain_size, - ); - #[cfg(feature = "instruments")] - let r4_merkle_dur = t_sub.elapsed(); - - // grinding: generate nonce and append it to the transcript - #[cfg(feature = "instruments")] - let t_sub = Instant::now(); - let security_bits = air.context().proof_options.grinding_factor; - let mut nonce = None; - if security_bits > 0 { - let nonce_value = grinding::generate_nonce(&transcript.state(), security_bits) - .expect("nonce not found"); - transcript.append_bytes(&nonce_value.to_be_bytes()); - nonce = Some(nonce_value); - } - - let number_of_queries = air.options().fri_number_of_queries; - let iotas = Self::sample_query_indexes(number_of_queries, domain, transcript); - - let query_list = fri::query_phase(&fri_layers, &iotas); - - let fri_layers_merkle_roots: Vec<_> = fri_layers - .iter() - .map(|layer| layer.merkle_tree.root) - .collect(); - - let deep_poly_openings = - Self::open_deep_composition_poly(domain, round_1_result, round_2_result, &iotas); - - #[cfg(feature = "instruments")] - { - let queries_dur = t_sub.elapsed(); - crate::instruments::store_r4_sub(r4_fft_dur, r4_merkle_dur, other_dur_1, queries_dur); - } + crate::instruments::store_r4_sub( + r4_fft_dur, + std::time::Duration::ZERO, + other_dur_1, + std::time::Duration::ZERO, + ); - Round4 { - fri_last_value, - fri_layers_merkle_roots, - deep_poly_openings, - query_list, - nonce, - } + deep_lde } fn sample_query_indexes( @@ -2614,6 +2594,12 @@ pub trait IsStarkProver< } } + // Capture pre-fork shared transcript state. Phase D's per-chunk + // bucket seed clones this and replays chunk-local data + // (table_contributions, comp root, all chunk-mate OODs) canonically + // so every bucket-mate reaches the same δ_fri / iotas state. + let pre_fork_transcript = transcript.clone(); + // Pre-fork all transcripts (cheap, sequential — must match verifier ordering). // Happens AFTER all per-chunk aux MMCS roots have been absorbed. let mut table_transcripts: Vec<_> = (0..num_airs) @@ -2689,6 +2675,8 @@ pub trait IsStarkProver< (0..num_airs).map(|_| None).collect(); let mut comp_mmcs_roots_per_chunk: Vec> = Vec::new(); let mut comp_mmcs_specs_per_chunk: Vec> = Vec::new(); + let mut fri_chunk_buckets_per_chunk: Vec>> = + Vec::new(); let mut lde_drain = cached_ldes.into_iter(); for chunk_start in (0..num_airs).step_by(k) { let chunk_end = (chunk_start + k).min(num_airs); @@ -2815,68 +2803,279 @@ pub trait IsStarkProver< }); } - // Phase R2b → R4 (sequential within chunk): each fork has - // the chunk comp root absorbed; sample z, run R3 OOD + R4 - // FRI. Same rationale as R2a above. + // Phase R2b → R3.5 (parallel within chunk via rayon): each + // fork already saw the chunk comp root; sample z, compute + + // absorb own OOD, sample γ + DEEP coeffs. The heavy DEEP LDE + // computation is deferred to the bucket loop below. Rayon + // restores per-chunk-mate parallelism that c22fca9e had + // serialised — safe because each fork owns its mutable + // transcript slot and the rest of the captured state is + // read-only and Sync. + let chunk_airs: Vec<&dyn AIR< + Field = Field, + FieldExtension = FieldExtension, + PublicInputs = PI, + >> = (chunk_start..chunk_end) + .map(|i| air_trace_pairs[i].0) + .collect(); + #[cfg(feature = "instruments")] + let chunk_air_names: Vec = + chunk_airs.iter().map(|a| a.name().to_string()).collect(); + #[cfg(feature = "instruments")] + let chunk_num_rows: Vec = (chunk_start..chunk_end) + .map(|i| air_trace_pairs[i].1.num_rows()) + .collect(); + let chunk_transcripts = &mut table_transcripts[chunk_start..chunk_end]; - let r2b_iter = chunk_round1 - .iter() - .zip(chunk_round2.iter()) - .zip(chunk_transcripts.iter_mut()) - .enumerate(); - let chunk_results: Vec> = r2b_iter - .map(|(j, ((round_1_result, round_2_result), table_transcript))| { - let idx = chunk_start + j; - let (air, trace, pub_inputs) = &air_trace_pairs[idx]; - let _ = trace; // used by instruments - let domain = &domains[idx]; + #[cfg(feature = "parallel")] + let r2b_iter = chunk_transcripts.par_iter_mut().enumerate(); + #[cfg(not(feature = "parallel"))] + let r2b_iter = chunk_transcripts.iter_mut().enumerate(); - #[cfg(feature = "instruments")] + #[cfg(feature = "instruments")] + #[allow(clippy::type_complexity)] + let r2b_results: Vec, DeepCoeffs, (String, usize, std::time::Duration, crate::instruments::TableSubOps)), ProvingError>> = r2b_iter + .map(|(j, table_transcript)| { + let air = chunk_airs[j]; + let round_1_result = &chunk_round1[j]; + let round_2_result = &chunk_round2[j]; + let domain = &domains[chunk_start + j]; let table_start = Instant::now(); - - let proof = Self::prove_rounds_2b_to_4( - *air, - *pub_inputs, + let (round_3, deep_coeffs) = Self::prove_rounds_2b_to_3_5( + air, round_1_result, round_2_result, table_transcript, domain, )?; + let sub_ops = crate::instruments::take_round_sub_ops().unwrap_or_default(); + let timing = ( + chunk_air_names[j].clone(), + chunk_num_rows[j], + table_start.elapsed(), + sub_ops, + ); + Ok((j, round_3, deep_coeffs, timing)) + }) + .collect(); - #[cfg(feature = "instruments")] - let table_timing = { - let sub_ops = crate::instruments::take_round_sub_ops().unwrap_or_default(); - ( - air.name().to_string(), - trace.num_rows(), - table_start.elapsed(), - sub_ops, - ) - }; - - #[cfg(feature = "instruments")] - return Ok((j, proof, table_timing)); - #[cfg(not(feature = "instruments"))] - Ok((j, proof)) + #[cfg(not(feature = "instruments"))] + #[allow(clippy::type_complexity)] + let r2b_results: Vec, DeepCoeffs), ProvingError>> = r2b_iter + .map(|(j, table_transcript)| { + let air = chunk_airs[j]; + let round_1_result = &chunk_round1[j]; + let round_2_result = &chunk_round2[j]; + let domain = &domains[chunk_start + j]; + let (round_3, deep_coeffs) = Self::prove_rounds_2b_to_3_5( + air, + round_1_result, + round_2_result, + table_transcript, + domain, + )?; + Ok((j, round_3, deep_coeffs)) }) .collect(); - for result in chunk_results { + // Collect R2b results in chunk-local-index order. + let mut chunk_round3: Vec>> = + (0..chunk_size).map(|_| None).collect(); + let mut chunk_deep_coeffs: Vec>> = + (0..chunk_size).map(|_| None).collect(); + for r in r2b_results { #[cfg(feature = "instruments")] { - let (j, proof, timing) = result?; - let idx = chunk_start + j; - proofs[idx] = Some(proof); + let (j, r3, dc, timing) = r?; + chunk_round3[j] = Some(r3); + chunk_deep_coeffs[j] = Some(dc); table_timings.push(timing); } #[cfg(not(feature = "instruments"))] { - let (j, proof) = result?; + let (j, r3, dc) = r?; + chunk_round3[j] = Some(r3); + chunk_deep_coeffs[j] = Some(dc); + } + } + let chunk_round3: Vec> = chunk_round3 + .into_iter() + .map(|r| r.expect("R3 populated for every chunk-mate")) + .collect(); + let chunk_deep_coeffs: Vec> = chunk_deep_coeffs + .into_iter() + .map(|d| d.expect("DEEP coeffs populated for every chunk-mate")) + .collect(); + + // Chunk join 2: bucket-shared transcript built by canonical replay + // of chunk-local data on the pre-fork state. Verifier reconstructs + // identical seed from proof data only. + let mut bucket_seed = pre_fork_transcript.clone(); + for j in 0..chunk_size { + if let Some(ref bpi) = chunk_round1[j].bus_public_inputs { + bucket_seed.append_field_element(&bpi.table_contribution); + } + } + if let Some(ref root) = comp_mmcs_roots_per_chunk[chunk_idx] { + bucket_seed.append_bytes(root); + } + for j in 0..chunk_size { + let round_3 = &chunk_round3[j]; + for col in round_3.trace_ood_evaluations.columns().iter() { + for elem in col.iter() { + bucket_seed.append_field_element(elem); + } + } + for elem in round_3.composition_poly_parts_ood_evaluation.iter() { + bucket_seed.append_field_element(elem); + } + } + + // Bucket by lde_size (first-encounter order). + let mut bucket_indices: Vec> = Vec::new(); + let mut bucket_lde_sizes: Vec = Vec::new(); + for j in 0..chunk_size { + let sz = chunk_deep_coeffs[j].lde_size; + match bucket_lde_sizes.iter().position(|&s| s == sz) { + Some(b) => bucket_indices[b].push(j), + None => { + bucket_lde_sizes.push(sz); + bucket_indices.push(vec![j]); + } + } + } + + let mut chunk_buckets: Vec> = + Vec::with_capacity(bucket_indices.len()); + let mut bucket_iotas_per_bucket: Vec> = + Vec::with_capacity(bucket_indices.len()); + + for (members, &lde_size) in bucket_indices.iter().zip(bucket_lde_sizes.iter()) { + let mut bt = bucket_seed.clone(); + bt.append_bytes(&(lde_size as u64).to_le_bytes()); + let delta_fri: FieldElement = bt.sample_field_element(); + + let leader_idx = chunk_start + members[0]; + let (leader_air, _, _) = &air_trace_pairs[leader_idx]; + let leader_domain = &domains[leader_idx]; + let coset_offset = + FieldElement::::from(leader_air.context().proof_options.coset_offset); + + // Streaming bucket combine: build each member's DEEP LDE + // one at a time, fold into the bucket accumulator with + // δ_fri^i, then drop. Peak DEEP memory inside this loop: + // 2 × |LDE| (current member + accumulator). + let mut combined: Vec> = + vec![FieldElement::::zero(); lde_size]; + let mut delta_power = FieldElement::::one(); + for (i_local, &j) in members.iter().enumerate() { let idx = chunk_start + j; - proofs[idx] = Some(proof); + let domain_j = &domains[idx]; + let round_1_j = &chunk_round1[j]; + let round_2_j = &chunk_round2[j]; + let round_3_j = &chunk_round3[j]; + let coeffs_j = &chunk_deep_coeffs[j]; + let deep_lde_j = Self::compute_deep_lde_with_coeffs( + domain_j, + round_1_j, + round_2_j, + round_3_j, + coeffs_j, + ); + debug_assert_eq!(deep_lde_j.len(), lde_size); + if i_local == 0 { + // First member: avoid the multiply-by-one in the + // common singleton-bucket case. + combined = deep_lde_j; + } else { + for (acc, src) in combined.iter_mut().zip(deep_lde_j.iter()) { + *acc = &*acc + &delta_power * src; + } + } + delta_power = &delta_power * &delta_fri; + } + + let (last_value, fri_layers) = + fri::commit_phase_from_evaluations::( + leader_domain.root_order as usize, + combined, + &mut bt, + &coset_offset, + lde_size, + ); + + let security_bits = leader_air.context().proof_options.grinding_factor; + let nonce = if security_bits > 0 { + let nonce_value = grinding::generate_nonce(&bt.state(), security_bits) + .expect("bucket-FRI grinding nonce not found"); + bt.append_bytes(&nonce_value.to_be_bytes()); + Some(nonce_value) + } else { + None + }; + + let number_of_queries = leader_air.options().fri_number_of_queries; + let iotas = Self::sample_query_indexes(number_of_queries, leader_domain, &mut bt); + let decommitments = fri::query_phase(&fri_layers, &iotas); + let layer_roots: Vec = fri_layers + .iter() + .map(|layer| layer.merkle_tree.root) + .collect(); + let member_tags: Vec = members + .iter() + .map(|&j| main_tags[chunk_start + j]) + .collect(); + + chunk_buckets.push(crate::proof::stark::ChunkBucketFri { + lde_size: lde_size as u32, + members: member_tags, + layer_roots, + last_value, + decommitments, + nonce, + }); + bucket_iotas_per_bucket.push(iotas); + } + fri_chunk_buckets_per_chunk.push(chunk_buckets); + + let mut member_bucket_idx: Vec = vec![0; chunk_size]; + for (b, members) in bucket_indices.iter().enumerate() { + for &j in members.iter() { + member_bucket_idx[j] = b; } } + + // Per chunk-mate: open at bucket-shared iotas + assemble StarkProof. + for j in 0..chunk_size { + let idx = chunk_start + j; + let (_, _, pub_inputs) = &air_trace_pairs[idx]; + let domain = &domains[idx]; + let round_1_result = &chunk_round1[j]; + let round_2_result = &chunk_round2[j]; + let bucket_idx = member_bucket_idx[j]; + let iotas = &bucket_iotas_per_bucket[bucket_idx]; + let deep_poly_openings = Self::open_deep_composition_poly( + domain, + round_1_result, + round_2_result, + iotas, + ); + let round_3 = &chunk_round3[j]; + let proof = StarkProof { + lde_trace_main_merkle_root: round_1_result.main.main_tree_root(), + lde_trace_precomputed_merkle_root: round_1_result.main.precomputed_root(), + trace_ood_evaluations: round_3.trace_ood_evaluations.clone(), + composition_poly_parts_ood_evaluation: round_3 + .composition_poly_parts_ood_evaluation + .clone(), + deep_poly_openings, + bus_public_inputs: round_1_result.bus_public_inputs.clone(), + public_inputs: (*pub_inputs).clone(), + trace_length: domain.interpolation_domain_size, + }; + proofs[idx] = Some(proof); + } } let proofs: Vec> = proofs @@ -2909,6 +3108,7 @@ pub trait IsStarkProver< comp_mmcs_roots: comp_mmcs_roots_per_chunk, comp_mmcs_specs: comp_mmcs_specs_per_chunk, chunk_size: k as u32, + fri_chunk_buckets: fri_chunk_buckets_per_chunk, }) } @@ -3002,19 +3202,26 @@ pub trait IsStarkProver< Ok((transition_coefficients, boundary_coefficients, r2a)) } - /// Part B of Round 2 onward: assumes the chunk composition MMCS root - /// has been absorbed into `transcript` already. Runs the absorb of - /// the per-table H_i values, R3 OOD, and R4 FRI + opens, producing - /// the final per-table StarkProof. + /// Part B of Round 2 through R3.5 (light): assumes the chunk + /// composition MMCS root has been absorbed into the per-fork + /// `transcript`. Runs z-sample, OOD computation + own-OOD absorb, + /// then samples γ + builds the DEEP composition **coefficients**. + /// + /// Does NOT build the DEEP LDE — that's deferred to + /// [`compute_deep_lde_with_coeffs`] which runs inside the bucket + /// FRI loop, so DEEP_i is materialised one at a time, folded into + /// the bucket accumulator with δ_fri^i, then dropped. #[allow(clippy::too_many_arguments)] - fn prove_rounds_2b_to_4( + fn prove_rounds_2b_to_3_5( air: &dyn AIR, - pub_inputs: &PI, round_1_result: &Round1, round_2_result: &Round2, transcript: &mut impl IsStarkTranscript, domain: &Domain, - ) -> Result, ProvingError> + ) -> Result< + (Round3, DeepCoeffs), + ProvingError, + > where FieldElement: AsBytes, FieldElement: AsBytes, @@ -3054,15 +3261,15 @@ pub trait IsStarkProver< } // =================================== - // ==========| Round 4 |========== + // ==========| Round 3.5 |========== // =================================== - let round_4_result = Self::round_4_compute_and_run_fri_on_the_deep_composition_polynomial( + // Sample γ + build DEEP coefficients (lightweight). The DEEP LDE + // itself is computed later inside the bucket FRI loop. + let deep_coeffs = Self::sample_deep_coeffs( air, domain, - round_1_result, round_2_result, - &round_3_result, - &z, + z, transcript, ); @@ -3071,37 +3278,21 @@ pub trait IsStarkProver< let zero = std::time::Duration::ZERO; let (r2_constraints, r2_fft, r2_merkle) = crate::instruments::take_r2_sub().unwrap_or((zero, zero, zero)); - let (r4_fft, r4_merkle, r4_deep_comp, r4_queries) = - crate::instruments::take_r4_sub().unwrap_or((zero, zero, zero, zero)); crate::instruments::store_round_sub_ops(crate::instruments::TableSubOps { constraints: r2_constraints, comp_decompose: r2_fft, comp_commit: r2_merkle, ood: round_3_dur, - deep_comp: r4_deep_comp, - deep_extend: r4_fft, - fri_commit: r4_merkle, - queries: r4_queries, + deep_comp: zero, + deep_extend: zero, + fri_commit: zero, + queries: zero, }); } info!("End proof generation"); - Ok(StarkProof { - lde_trace_main_merkle_root: round_1_result.main.main_tree_root(), - lde_trace_precomputed_merkle_root: round_1_result.main.precomputed_root(), - trace_ood_evaluations: round_3_result.trace_ood_evaluations, - composition_poly_parts_ood_evaluation: round_3_result - .composition_poly_parts_ood_evaluation, - fri_layers_merkle_roots: round_4_result.fri_layers_merkle_roots, - fri_last_value: round_4_result.fri_last_value, - query_list: round_4_result.query_list, - deep_poly_openings: round_4_result.deep_poly_openings, - nonce: round_4_result.nonce, - bus_public_inputs: round_1_result.bus_public_inputs.clone(), - public_inputs: pub_inputs.clone(), - trace_length: domain.interpolation_domain_size, - }) + Ok((round_3_result, deep_coeffs)) } } diff --git a/crypto/stark/src/tests/bucket_fri_soundness_tests.rs b/crypto/stark/src/tests/bucket_fri_soundness_tests.rs new file mode 100644 index 000000000..10db5169f --- /dev/null +++ b/crypto/stark/src/tests/bucket_fri_soundness_tests.rs @@ -0,0 +1,168 @@ +//! Phase D — per-(chunk, lde_size) batched FRI soundness tests. +//! +//! Every test starts from a baseline-valid multi-proof, then tampers +//! with a single field on the bucket-FRI path inside `MultiProof:: +//! fri_chunk_buckets` and asserts the verifier rejects. Pre-existing +//! main / aux / composition MMCS path soundness is covered by +//! `mmcs_soundness_tests`, `mmcs_aux_soundness_tests`, and the +//! composition tests inside `mmcs_soundness_tests`. + +use crypto::fiat_shamir::default_transcript::DefaultTranscript; +use math::field::{element::FieldElement, goldilocks::GoldilocksField}; + +use crate::examples::{ + bit_flags::{self, BitFlagsAIR}, + dummy_air::{self, DummyAIR}, +}; +use crate::proof::options::ProofOptions; +use crate::proof::stark::MultiProof; +use crate::test_utils::{multi_prove_ram, multi_verify_ram}; +use crate::traits::AIR; + +type F = GoldilocksField; + +#[allow(clippy::type_complexity)] +fn baseline_proof() -> (DummyAIR, BitFlagsAIR, MultiProof) { + let proof_options = ProofOptions::default_test_options(); + let air_1 = DummyAIR::new(&proof_options); + let air_2 = BitFlagsAIR::new(&proof_options); + let mut trace_1 = dummy_air::dummy_trace::(16); + let mut trace_2 = bit_flags::bit_prefix_flag_trace(32); + let air_trace_pairs: Vec<( + &dyn AIR, + &mut _, + &_, + )> = vec![(&air_1, &mut trace_1, &()), (&air_2, &mut trace_2, &())]; + let proof = multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::::new(&[])).unwrap(); + (air_1, air_2, proof) +} + +fn verify( + airs: &[&dyn AIR], + proof: &MultiProof, +) -> bool { + multi_verify_ram( + airs, + proof, + &mut DefaultTranscript::::new(&[]), + &FieldElement::zero(), + ) +} + +/// Locate the first chunk whose `fri_chunk_buckets` is non-empty and the +/// first bucket inside it. Used by tampering tests to find a real bucket +/// to mutate. +fn first_bucket_mut( + proof: &mut MultiProof, +) -> (usize, usize) { + let chunk_idx = proof + .fri_chunk_buckets + .iter() + .position(|c| !c.is_empty()) + .expect("baseline has at least one non-empty fri_chunk_buckets entry"); + (chunk_idx, 0) +} + +#[test_log::test] +fn baseline_phase_d_proof_verifies() { + let (air_1, air_2, proof) = baseline_proof(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + assert!(verify(&airs, &proof), "baseline Phase D proof must verify"); + // Sanity: fri_chunk_buckets is parallel to per-chunk MMCS vecs. + assert_eq!(proof.fri_chunk_buckets.len(), proof.main_mmcs_roots.len()); + // Every populated bucket must have non-empty members + at least one + // decommitment per fri query. + for chunk in &proof.fri_chunk_buckets { + for bucket in chunk { + assert!(!bucket.members.is_empty()); + assert!(!bucket.decommitments.is_empty()); + } + } +} + +#[test_log::test] +fn tampered_bucket_last_value_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let (ci, bi) = first_bucket_mut(&mut proof); + proof.fri_chunk_buckets[ci][bi].last_value = + &proof.fri_chunk_buckets[ci][bi].last_value + FieldElement::::one(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn tampered_bucket_layer_root_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let (ci, bi) = first_bucket_mut(&mut proof); + if proof.fri_chunk_buckets[ci][bi].layer_roots.is_empty() { + // Trivially-small LDE: no committed FRI layers to tamper with; + // tampering last_value above already covers that case. + return; + } + proof.fri_chunk_buckets[ci][bi].layer_roots[0][0] ^= 0xFF; + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn truncated_bucket_decommitments_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let (ci, bi) = first_bucket_mut(&mut proof); + assert!(!proof.fri_chunk_buckets[ci][bi].decommitments.is_empty()); + proof.fri_chunk_buckets[ci][bi].decommitments.pop(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn missing_chunk_buckets_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + // Wipe a chunk's bucket list; verifier checks bucket count matches + // the lde-size grouping expected from the AIRs in the chunk. + let (ci, _) = first_bucket_mut(&mut proof); + proof.fri_chunk_buckets[ci].clear(); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn wrong_bucket_lde_size_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + let (ci, bi) = first_bucket_mut(&mut proof); + let actual = proof.fri_chunk_buckets[ci][bi].lde_size; + // Bump to a different power of two — verifier reconstructs expected + // lde_size from per-AIR blowup × trace_length and rejects mismatch. + proof.fri_chunk_buckets[ci][bi].lde_size = actual.wrapping_mul(2); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + assert!(!verify(&airs, &proof)); +} + +#[test_log::test] +fn swapped_member_order_rejected() { + let (air_1, air_2, mut proof) = baseline_proof(); + // Find a bucket with ≥ 2 members and swap their order. The verifier + // requires bucket members in canonical chunk-local-index order so a + // tag swap shifts δ_fri^i powers and rejects the combined FRI. + let target = proof + .fri_chunk_buckets + .iter_mut() + .enumerate() + .find_map(|(ci, c)| c.iter_mut().enumerate().find_map(|(bi, b)| { + if b.members.len() >= 2 { Some((ci, bi)) } else { None } + })); + let Some((ci, bi)) = target else { + // Single-table-per-bucket baseline — swap is not applicable; in + // practice every chunk-mate becomes its own singleton bucket here. + return; + }; + proof.fri_chunk_buckets[ci][bi].members.swap(0, 1); + let airs: Vec<&dyn AIR> = + vec![&air_1, &air_2]; + assert!(!verify(&airs, &proof)); +} diff --git a/crypto/stark/src/tests/mod.rs b/crypto/stark/src/tests/mod.rs index b42b2abd9..d0fe0530a 100644 --- a/crypto/stark/src/tests/mod.rs +++ b/crypto/stark/src/tests/mod.rs @@ -1,4 +1,5 @@ pub mod air_tests; +pub mod bucket_fri_soundness_tests; pub mod bus_tests; pub mod domain_cache_stats; pub mod fri_tests; diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs index dc614fb85..4069267e7 100644 --- a/crypto/stark/src/verifier.rs +++ b/crypto/stark/src/verifier.rs @@ -238,54 +238,18 @@ pub trait IsStarkVerifier< composition_poly_claimed_ood_evaluation == composition_poly_ood_evaluation } - /// Reconstructs the Deep composition polynomial evaluations at the challenge indices values using the provided - /// openings of the trace polynomials and the composition polynomial parts. It then uses these to verify that the - /// FRI decommitments are valid and correspond to the Deep composition polynomial. - fn step_3_verify_fri( + /// Reconstruct the per-table DEEP composition evaluations `D_i(iota)` and + /// `D_i(-iota)` for ONE table at every query index. Used by the + /// chunk-bucket FRI verification (Phase D) to combine bucket-mates + /// into the polynomial actually committed by FRI. + fn reconstruct_d_evaluations_for_table( proof: &StarkProof, domain: &VerifierDomain, challenges: &Challenges, - ) -> bool - where - FieldElement: AsBytes + Sync + Send, - FieldElement: AsBytes + Sync + Send, - { - let (deep_poly_evaluations, deep_poly_evaluations_sym) = - match Self::reconstruct_deep_composition_poly_evaluations_for_all_queries( - challenges, domain, proof, - ) { - Some(pair) => pair, - None => return false, - }; - - // verify FRI - let mut evaluation_point_inverse = challenges - .iotas - .iter() - .map(|iota| Self::query_challenge_to_evaluation_point(*iota, false, domain)) - .collect::>>(); - // Any zero evaluation point means a malformed query index, reject. - if FieldElement::inplace_batch_inverse(&mut evaluation_point_inverse).is_err() { - return false; - } - - proof - .query_list - .iter() - .zip(&challenges.iotas) - .zip(evaluation_point_inverse) - .enumerate() - .all(|(i, ((proof_s, iota_s), eval))| { - Self::verify_query_and_sym_openings( - proof, - &challenges.zetas, - *iota_s, - proof_s, - eval, - &deep_poly_evaluations[i], - &deep_poly_evaluations_sym[i], - ) - }) + ) -> Option> { + Self::reconstruct_deep_composition_poly_evaluations_for_all_queries( + challenges, domain, proof, + ) } /// Returns the field element element of the domain `domain` corresponding to the given FRI query index challenge `iota`. @@ -514,16 +478,16 @@ pub trait IsStarkVerifier< ) } - /// Verify a single FRI query - /// `zetas`: the vector of all challenges sent by the verifier to the prover at the commit - /// phase to fold polynomials. - /// `iota`: the index challenge of this FRI query. This index uniquely determines two elements 𝜐 and -𝜐 - /// of the evaluation domain of FRI layer 0. - /// `evaluation_point_inv`: precomputed value of 𝜐⁻¹. - /// `deep_composition_evaluation`: precomputed value of p₀(𝜐), where p₀ is the deep composition polynomial. - /// `deep_composition_evaluation_sym`: precomputed value of p₀(-𝜐), where p₀ is the deep composition polynomial. - fn verify_query_and_sym_openings( - proof: &StarkProof, + /// Verify a single bucket-FRI query. + /// + /// `fri_layers_merkle_roots` / `fri_last_value` come from the bucket + /// (`ChunkBucketFri`), not from any per-table proof. `deep_composition_*` + /// is `D_combined(±iota)` — the linear combination of bucket-mates' + /// reconstructed D_i evaluations with successive powers of `delta_fri`. + #[allow(clippy::too_many_arguments)] + fn verify_bucket_fri_query( + fri_layers_merkle_roots: &[Commitment], + fri_last_value: &FieldElement, zetas: &[FieldElement], iota: usize, fri_decommitment: &FriDecommitment, @@ -535,7 +499,6 @@ pub trait IsStarkVerifier< FieldElement: AsBytes + Sync + Send, FieldElement: AsBytes + Sync + Send, { - let fri_layers_merkle_roots = &proof.fri_layers_merkle_roots; let evaluation_point_vec: Vec> = core::iter::successors(Some(evaluation_point_inv.square()), |evaluation_point| { Some(evaluation_point.square()) @@ -551,16 +514,11 @@ pub trait IsStarkVerifier< (p0_eval + p0_eval_sym) + evaluation_point_inv * &zetas[0] * (p0_eval - p0_eval_sym); let mut index = iota; - // Handle case with 0 FRI layers (trace_length <= 2) - // In this case, the fold loop below doesn't iterate, so we need to verify - // the final value directly here. + // 0-layer FRI (trivially small LDE): folded p0 must equal the bucket's last_value. if fri_layers_merkle_roots.is_empty() { - return v == proof.fri_last_value; + return v == *fri_last_value; } - // For each FRI layer, starting from the layer 1: use the proof to verify the validity of values pᵢ(−𝜐^(2ⁱ)) (given by the prover) and - // pᵢ(𝜐^(2ⁱ)) (computed on the previous iteration by the verifier). Then use them to obtain pᵢ₊₁(𝜐^(2ⁱ⁺¹)). - // Finally, check that the final value coincides with the given by the prover. fri_layers_merkle_roots .iter() .enumerate() @@ -574,9 +532,6 @@ pub trait IsStarkVerifier< (((i, merkle_root), auth_path_sym), evaluation_sym), evaluation_point_inv, )| { - // Verify opening Open(pᵢ(Dₖ), −𝜐^(2ⁱ)) and Open(pᵢ(Dₖ), 𝜐^(2ⁱ)). - // `v` is pᵢ(𝜐^(2ⁱ)). - // `evaluation_sym` is pᵢ(−𝜐^(2ⁱ)). let openings_ok = Self::verify_fri_layer_openings( merkle_root, auth_path_sym, @@ -585,19 +540,13 @@ pub trait IsStarkVerifier< index, ); - // Update `v` with next value pᵢ₊₁(𝜐^(2ⁱ⁺¹)). v = (&v + evaluation_sym) + evaluation_point_inv * &zetas[i + 1] * (&v - evaluation_sym); - - // Update index for next iteration. The index of the squares in the next layer - // is obtained by halving the current index. This is due to the bit-reverse - // ordering of the elements in the Merkle tree. index >>= 1; if i < fri_decommitment.layers_evaluations_sym.len() - 1 { result & openings_ok } else { - // Check that final value is the given by the prover - result & (v == proof.fri_last_value) & openings_ok + result & (v == *fri_last_value) & openings_ok } }, ) @@ -1037,63 +986,333 @@ pub trait IsStarkVerifier< } // ===================================================================== - // Rounds 2-4: Forked per table + // Rounds 2 → 3.5 per-fork replay + per-chunk bucket FRI (Phase D) // ===================================================================== - // Each table gets an independent transcript fork (cloned from the - // shared state after the aux MMCS absorb above, domain-separated by - // table index). This matches the prover's forking and makes - // per-table verification independent. + // Per chunk-mate: build fork, replay through γ + step 2 verify. + // Then per chunk: build the bucket-shared transcript, verify each + // height bucket's batched FRI, and use the bucket-shared iotas to + // authenticate every per-query trace / aux / composition opening. + + let num_tables = airs.len(); + let pre_fork_transcript = transcript.clone(); + let mut challenges_per_table: Vec>> = + (0..num_tables).map(|_| None).collect(); for (idx, (air, proof)) in airs.iter().zip(&multi_proof.proofs).enumerate() { - // Must match prover: fork with domain separator for multi-table, - // use original transcript directly for single-table. - let num_tables = airs.len(); let mut table_transcript = transcript.clone(); if num_tables > 1 { table_transcript.append_bytes(&(idx as u64).to_le_bytes()); } - - // Bind table_contribution (L) to transcript, matching prover. if let Some(ref bpi) = proof.bus_public_inputs { table_transcript.append_field_element(&bpi.table_contribution); } - // Per-chunk lookup: each table's main / aux / comp MMCS - // root + spec come from its chunk. let table_chunk_idx = idx / chunk_size; - let main_root_for_chunk = - multi_proof.main_mmcs_roots[table_chunk_idx].as_ref(); - let main_spec_for_chunk: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)] = - &multi_proof.main_mmcs_specs[table_chunk_idx]; - let aux_root_for_chunk = multi_proof.aux_mmcs_roots[table_chunk_idx].as_ref(); - let aux_spec_for_chunk: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)] = - &multi_proof.aux_mmcs_specs[table_chunk_idx]; let comp_root_for_chunk = multi_proof.comp_mmcs_roots[table_chunk_idx].as_ref(); - let comp_spec_for_chunk: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)] = - &multi_proof.comp_mmcs_specs[table_chunk_idx]; - if !Self::verify_rounds_2_to_4( + let chal = match Self::replay_and_verify_step_2( *air, proof, &mut table_transcript, lookup_challenges.clone(), - main_tags[idx], - main_root_for_chunk, - main_spec_for_chunk, - aux_root_for_chunk, - aux_spec_for_chunk, comp_root_for_chunk, - comp_spec_for_chunk, ) { + Some(c) => c, + None => { + error!( + "Table {} failed replay_and_verify_step_2 (num_constraints={}, trace_cols={})", + idx, + air.context().num_transition_constraints, + air.context().trace_columns + ); + return false; + } + }; + challenges_per_table[idx] = Some(chal); + } + + // Per-chunk: build bucket_seed (canonical replay on pre-fork state), + // validate fri_chunk_buckets[chunk_idx] structure, verify each + // bucket's batched FRI, then per chunk-mate verify step 4. + if multi_proof.fri_chunk_buckets.len() != expected_num_chunks { + error!( + "fri_chunk_buckets outer length {} != expected_num_chunks {}", + multi_proof.fri_chunk_buckets.len(), + expected_num_chunks, + ); + return false; + } + + for chunk_idx in 0..expected_num_chunks { + let chunk_start = chunk_idx * chunk_size; + let chunk_end = (chunk_start + chunk_size).min(num_tables); + + // bucket_seed: clone pre-fork shared state + canonical replay. + let mut bucket_seed = pre_fork_transcript.clone(); + for idx in chunk_start..chunk_end { + if let Some(ref bpi) = multi_proof.proofs[idx].bus_public_inputs { + bucket_seed.append_field_element(&bpi.table_contribution); + } + } + if let Some(ref root) = multi_proof.comp_mmcs_roots[chunk_idx] { + bucket_seed.append_bytes(root); + } + for idx in chunk_start..chunk_end { + let p = &multi_proof.proofs[idx]; + for col in p.trace_ood_evaluations.columns().iter() { + for elem in col.iter() { + bucket_seed.append_field_element(elem); + } + } + for elem in p.composition_poly_parts_ood_evaluation.iter() { + bucket_seed.append_field_element(elem); + } + } + + // Expected bucketing: first-encounter order by lde_size. + let mut expected_bucket_indices: Vec> = Vec::new(); + let mut expected_bucket_lde_sizes: Vec = Vec::new(); + for j in 0..(chunk_end - chunk_start) { + let idx = chunk_start + j; + let lde_size = multi_proof.proofs[idx].trace_length + * airs[idx].options().blowup_factor as usize; + match expected_bucket_lde_sizes.iter().position(|&s| s == lde_size) { + Some(b) => expected_bucket_indices[b].push(j), + None => { + expected_bucket_lde_sizes.push(lde_size); + expected_bucket_indices.push(vec![j]); + } + } + } + + let chunk_buckets = &multi_proof.fri_chunk_buckets[chunk_idx]; + if chunk_buckets.len() != expected_bucket_indices.len() { error!( - "Table {} failed verify_rounds_2_to_4 (num_constraints={}, trace_cols={})", - idx, - air.context().num_transition_constraints, - air.context().trace_columns + "chunk {chunk_idx}: bucket count {} != expected {}", + chunk_buckets.len(), + expected_bucket_indices.len(), ); return false; } + + // map chunk-local-index → bucket index (for step 4 dispatch). + let mut member_bucket_idx: Vec = vec![0; chunk_end - chunk_start]; + // Cache bucket iotas: derived once during FRI verification, + // reused in step 4 without re-cloning the bucket transcript. + let mut bucket_iotas_cache: Vec> = + Vec::with_capacity(chunk_buckets.len()); + + for (b, bucket) in chunk_buckets.iter().enumerate() { + let expected_members = &expected_bucket_indices[b]; + let expected_lde_size = expected_bucket_lde_sizes[b]; + if bucket.lde_size as usize != expected_lde_size { + error!( + "chunk {chunk_idx} bucket {b}: lde_size {} != expected {}", + bucket.lde_size, expected_lde_size, + ); + return false; + } + if bucket.members.len() != expected_members.len() { + error!( + "chunk {chunk_idx} bucket {b}: members.len {} != expected {}", + bucket.members.len(), + expected_members.len(), + ); + return false; + } + for (mi, &j) in expected_members.iter().enumerate() { + let expected_tag = main_tags[chunk_start + j]; + if bucket.members[mi] != expected_tag { + error!( + "chunk {chunk_idx} bucket {b} member {mi}: tag mismatch", + ); + return false; + } + member_bucket_idx[j] = b; + } + + // Verify the bucket FRI: replay layer-root absorbs, sample + // zetas, absorb last_value, grinding, sample iotas, and run + // per-iota combined-D fold check. + let leader_idx = chunk_start + expected_members[0]; + let leader_air = airs[leader_idx]; + let leader_domain = + new_verifier_domain(leader_air, multi_proof.proofs[leader_idx].trace_length); + + let mut bt = bucket_seed.clone(); + bt.append_bytes(&(bucket.lde_size as u64).to_le_bytes()); + let delta_fri: FieldElement = bt.sample_field_element(); + + let mut zetas: Vec> = + Vec::with_capacity(bucket.layer_roots.len() + 1); + for root in &bucket.layer_roots { + let z = bt.sample_field_element(); + bt.append_bytes(root); + zetas.push(z); + } + zetas.push(bt.sample_field_element()); + bt.append_field_element(&bucket.last_value); + + let security_bits = leader_air.context().proof_options.grinding_factor; + if security_bits > 0 { + let nonce = match bucket.nonce { + Some(n) => n, + None => { + error!( + "chunk {chunk_idx} bucket {b}: grinding required but nonce missing", + ); + return false; + } + }; + let grinding_seed = bt.state(); + if !grinding::is_valid_nonce(&grinding_seed, nonce, security_bits) { + #[cfg(not(feature = "test_fiat_shamir"))] + error!("chunk {chunk_idx} bucket {b}: grinding factor not satisfied"); + return false; + } + bt.append_bytes(&nonce.to_be_bytes()); + } else if bucket.nonce.is_some() { + error!( + "chunk {chunk_idx} bucket {b}: nonce present but grinding disabled", + ); + return false; + } + + let number_of_queries = leader_air.options().fri_number_of_queries; + let iotas = + Self::sample_query_indexes(number_of_queries, &leader_domain, &mut bt); + + if bucket.decommitments.len() != iotas.len() { + error!( + "chunk {chunk_idx} bucket {b}: decommitments {} != iotas {}", + bucket.decommitments.len(), + iotas.len(), + ); + return false; + } + + // Reconstruct per-bucket-mate D_i(iota±) for every iota. + let mut per_member_d: Vec> = + Vec::with_capacity(expected_members.len()); + for &j in expected_members.iter() { + let idx = chunk_start + j; + let chal = challenges_per_table[idx] + .as_ref() + .expect("step-2 succeeded → challenges populated"); + // Replace the challenge's empty iotas with bucket iotas. + let chal_with_iotas = Challenges { + z: chal.z.clone(), + boundary_coeffs: chal.boundary_coeffs.clone(), + transition_coeffs: chal.transition_coeffs.clone(), + trace_term_coeffs: chal.trace_term_coeffs.clone(), + gammas: chal.gammas.clone(), + zetas: zetas.clone(), + iotas: iotas.clone(), + rap_challenges: chal.rap_challenges.clone(), + grinding_seed: [0u8; 32], + }; + let member_domain = + new_verifier_domain(airs[idx], multi_proof.proofs[idx].trace_length); + let pair = match Self::reconstruct_d_evaluations_for_table( + &multi_proof.proofs[idx], + &member_domain, + &chal_with_iotas, + ) { + Some(pair) => pair, + None => { + error!( + "chunk {chunk_idx} bucket {b} member {j}: D reconstruction failed", + ); + return false; + } + }; + // chal_with_iotas only needed inside the call. + let _ = chal_with_iotas; + per_member_d.push(pair); + } + + // Per-iota: combine D_i with successive powers of δ_fri, + // verify FRI fold authenticates and reaches bucket.last_value. + let mut evaluation_point_inv = iotas + .iter() + .map(|iota| { + Self::query_challenge_to_evaluation_point(*iota, false, &leader_domain) + }) + .collect::>>(); + if FieldElement::inplace_batch_inverse(&mut evaluation_point_inv).is_err() { + error!( + "chunk {chunk_idx} bucket {b}: query evaluation point not invertible", + ); + return false; + } + + for (q, &iota) in iotas.iter().enumerate() { + let mut d_iota = FieldElement::::zero(); + let mut d_iota_sym = FieldElement::::zero(); + let mut coeff = FieldElement::::one(); + for (i_local, member_d) in per_member_d.iter().enumerate() { + d_iota = d_iota + &coeff * &member_d.0[q]; + d_iota_sym = d_iota_sym + &coeff * &member_d.1[q]; + if i_local + 1 < per_member_d.len() { + coeff = coeff * &delta_fri; + } + } + + if !Self::verify_bucket_fri_query( + &bucket.layer_roots, + &bucket.last_value, + &zetas, + iota, + &bucket.decommitments[q], + evaluation_point_inv[q].clone(), + &d_iota, + &d_iota_sym, + ) { + #[cfg(not(feature = "test_fiat_shamir"))] + error!( + "chunk {chunk_idx} bucket {b} query {q}: FRI fold verification failed", + ); + return false; + } + } + bucket_iotas_cache.push(iotas); + } + + // Per chunk-mate: step 4 at its bucket's iotas (cached above, + // no transcript replay needed). + for j in 0..(chunk_end - chunk_start) { + let idx = chunk_start + j; + let b = member_bucket_idx[j]; + let iotas = &bucket_iotas_cache[b]; + + let proof = &multi_proof.proofs[idx]; + let main_root = multi_proof.main_mmcs_roots[chunk_idx].as_ref(); + let main_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)] = + &multi_proof.main_mmcs_specs[chunk_idx]; + let aux_root = multi_proof.aux_mmcs_roots[chunk_idx].as_ref(); + let aux_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)] = + &multi_proof.aux_mmcs_specs[chunk_idx]; + let comp_root = multi_proof.comp_mmcs_roots[chunk_idx].as_ref(); + let comp_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)] = + &multi_proof.comp_mmcs_specs[chunk_idx]; + + if !Self::verify_step_4_at_iotas( + proof, + iotas, + main_tags[idx], + main_root, + main_spec, + aux_root, + aux_spec, + comp_root, + comp_spec, + ) { + #[cfg(not(feature = "test_fiat_shamir"))] + error!("Table {idx}: step 4 trace/comp openings failed at bucket iotas"); + return false; + } + } } // ===================================================================== @@ -1227,8 +1446,12 @@ pub trait IsStarkVerifier< } // =================================== - // ==========| Round 4 |========== + // ==========| Round 3.5 |========== // =================================== + // Sample γ from the per-fork transcript; build the per-table + // DEEP composition coefficient layout. The FRI commit + iotas + // happen at chunk-bucket level (verified separately) — this + // replay stops at γ. let num_terms_composition_poly = proof.composition_poly_parts_ood_evaluation.len(); let num_terms_trace = @@ -1251,50 +1474,19 @@ pub trait IsStarkVerifier< // <<<< Receive challenges: 𝛾ⱼ, 𝛾ⱼ' let gammas = deep_composition_coefficients; - // FRI commit phase - let merkle_roots = &proof.fri_layers_merkle_roots; - let mut zetas = merkle_roots - .iter() - .map(|root| { - // >>>> Send challenge 𝜁ₖ - let element = transcript.sample_field_element(); - // <<<< Receive commitment: [pₖ] (the first one is [p₀]) - transcript.append_bytes(root); - element - }) - .collect::>>(); - - // >>>> Send challenge 𝜁ₙ₋₁ - zetas.push(transcript.sample_field_element()); - - // <<<< Receive value: pₙ - transcript.append_field_element(&proof.fri_last_value); - - // Receive grinding value - let security_bits = air.context().proof_options.grinding_factor; - let mut grinding_seed = [0u8; 32]; - if security_bits > 0 - && let Some(nonce_value) = proof.nonce - { - grinding_seed = transcript.state(); - transcript.append_bytes(&nonce_value.to_be_bytes()); - } - - // FRI query phase - // <<<< Send challenges 𝜄ₛ (iota_s) - let number_of_queries = air.options().fri_number_of_queries; - let iotas = Self::sample_query_indexes(number_of_queries, domain, transcript); - + // zetas / iotas / grinding_seed are populated by the chunk-bucket + // FRI verification step in `multi_verify` (Phase D). The per-fork + // transcript ends here. Challenges { z, boundary_coeffs, transition_coeffs, trace_term_coeffs, gammas, - zetas, - iotas, + zetas: Vec::new(), + iotas: Vec::new(), rap_challenges, - grinding_seed, + grinding_seed: [0u8; 32], } } @@ -1302,31 +1494,29 @@ pub trait IsStarkVerifier< /// /// `main_*` / `aux_*` come from the shared multi-proof and authenticate /// the per-table trace openings in step 4. + /// Replays per-fork rounds 2 → 3.5 for one table and runs step 2 + /// (composition-polynomial OOD consistency). Returns the per-fork + /// Challenges populated up through γ — `zetas`, `iotas`, and + /// `grinding_seed` remain empty and are filled in by the chunk-bucket + /// FRI verification (Phase D). + /// + /// Step 4 (trace openings at iotas) is split into + /// [`verify_step_4_at_iotas`] driven by `multi_verify` after the + /// bucket FRI sets each chunk-mate's iota list. #[allow(clippy::too_many_arguments)] - fn verify_rounds_2_to_4( + fn replay_and_verify_step_2( air: &dyn AIR, proof: &StarkProof, transcript: &mut impl IsStarkTranscript, rap_challenges: Vec>, - main_tag: crypto::merkle_tree::mmcs::MatrixTag, - main_mmcs_root: Option<&Commitment>, - main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], - aux_mmcs_root: Option<&Commitment>, - aux_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], comp_mmcs_root: Option<&Commitment>, - comp_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], - ) -> bool + ) -> Option> where FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, { let domain = new_verifier_domain(air, proof.trace_length); - // Verify there are enough queries - if proof.query_list.len() < air.options().fri_number_of_queries { - return false; - } - #[cfg(feature = "instruments")] println!("- Started step 1: Recover challenges"); #[cfg(feature = "instruments")] @@ -1341,19 +1531,8 @@ pub trait IsStarkVerifier< comp_mmcs_root, ); - // verify grinding - let security_bits = air.context().proof_options.grinding_factor; - if security_bits > 0 { - let nonce_is_valid = proof.nonce.is_some_and(|nonce_value| { - grinding::is_valid_nonce(&challenges.grinding_seed, nonce_value, security_bits) - }); - - if !nonce_is_valid { - #[cfg(not(feature = "test_fiat_shamir"))] - error!("Grinding factor not satisfied"); - return false; - } - } + // Grinding + iotas + FRI verification moved to chunk-bucket level + // in `multi_verify` (Phase D batched FRI). #[cfg(feature = "instruments")] let elapsed1 = timer1.elapsed(); @@ -1368,7 +1547,7 @@ pub trait IsStarkVerifier< if !Self::step_2_verify_claimed_composition_polynomial(air, proof, &domain, &challenges) { #[cfg(not(feature = "test_fiat_shamir"))] error!("Composition Polynomial verification failed"); - return false; + return None; } #[cfg(feature = "instruments")] @@ -1380,11 +1559,8 @@ pub trait IsStarkVerifier< #[cfg(feature = "instruments")] let timer3 = Instant::now(); - if !Self::step_3_verify_fri(proof, &domain, &challenges) { - #[cfg(not(feature = "test_fiat_shamir"))] - error!("FRI verification failed"); - return false; - } + // FRI verification (Phase D) is driven from `multi_verify` per + // chunk-bucket. This per-table replay stops here. #[cfg(feature = "instruments")] let elapsed3 = timer3.elapsed(); @@ -1396,41 +1572,57 @@ pub trait IsStarkVerifier< #[cfg(feature = "instruments")] let timer4 = Instant::now(); - #[allow(clippy::let_and_return)] - if !Self::step_4_verify_trace_and_composition_openings( - proof, - &challenges, - main_tag, - main_mmcs_root, - main_mmcs_spec, - aux_mmcs_root, - aux_mmcs_spec, - comp_mmcs_root, - comp_mmcs_spec, - ) { - #[cfg(not(feature = "test_fiat_shamir"))] - error!("DEEP Composition Polynomial verification failed"); - return false; - } + // Step 4 (per-iota openings) runs at chunk-bucket level (Phase D). #[cfg(feature = "instruments")] - let elapsed4 = timer4.elapsed(); - #[cfg(feature = "instruments")] - println!(" Time spent: {:?}", elapsed4); + let _ = (elapsed1, timer2.elapsed(), timer3.elapsed(), timer4.elapsed()); - #[cfg(feature = "instruments")] - { - let total_time = elapsed1 + elapsed2 + elapsed3 + elapsed4; - println!( - " Fraction of verifying time per step: {:.4} {:.4} {:.4} {:.4}", - elapsed1.as_nanos() as f64 / total_time.as_nanos() as f64, - elapsed2.as_nanos() as f64 / total_time.as_nanos() as f64, - elapsed3.as_nanos() as f64 / total_time.as_nanos() as f64, - elapsed4.as_nanos() as f64 / total_time.as_nanos() as f64 - ); - } + Some(challenges) + } - true + /// Step 4 for one table at the bucket-shared iotas: authenticate + /// every per-query opening against the chunk's main / aux / + /// composition MMCS roots. + #[allow(clippy::too_many_arguments)] + fn verify_step_4_at_iotas( + proof: &StarkProof, + iotas: &[usize], + main_tag: crypto::merkle_tree::mmcs::MatrixTag, + main_mmcs_root: Option<&Commitment>, + main_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], + aux_mmcs_root: Option<&Commitment>, + aux_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], + comp_mmcs_root: Option<&Commitment>, + comp_mmcs_spec: &[(crypto::merkle_tree::mmcs::MatrixTag, usize)], + ) -> bool + where + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, + FieldElement: AsBytes + Sync + Send + math::traits::ByteConversion, + { + if proof.deep_poly_openings.len() < iotas.len() { + return false; + } + iotas + .iter() + .zip(proof.deep_poly_openings.iter()) + .all(|(iota_n, deep_poly_opening)| { + Self::verify_composition_poly_opening( + deep_poly_opening, + comp_mmcs_root, + comp_mmcs_spec, + main_tag, + *iota_n, + ) && Self::verify_trace_openings( + proof, + deep_poly_opening, + *iota_n, + main_tag, + main_mmcs_root, + main_mmcs_spec, + aux_mmcs_root, + aux_mmcs_spec, + ) + }) } }