From d2692bdf34bb6506850b6b7908fdabf82f0d5cca Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Tue, 16 Jun 2026 15:58:32 +1000 Subject: [PATCH 1/7] feat: add standalone checksum/verify_checksum integrity primitive Extracts xxHash3-64 checksum and verify_checksum as a standalone public primitive in src/checksum.rs, gated on the 'checksum' feature alone. Usable without compression or messagepack. Includes 6 unit tests: 5 behavioral + 1 known-answer regression locking algorithm and big-endian byte order. Wire value is identical to StorageEnvelope's embedded checksum. --- src/checksum.rs | 72 +++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 6 +++++ 2 files changed, 78 insertions(+) create mode 100644 src/checksum.rs diff --git a/src/checksum.rs b/src/checksum.rs new file mode 100644 index 0000000..e40a491 --- /dev/null +++ b/src/checksum.rs @@ -0,0 +1,72 @@ +//! Standalone xxHash3-64 integrity primitive. +//! +//! Non-cryptographic corruption detection, decoupled from compression. The same +//! function backs `StorageEnvelope`'s embedded checksum, so the wire value is +//! identical whether you compute it directly or via `ByteStorage::store`. + +use xxhash_rust::xxh3::xxh3_64; + +/// Compute the xxHash3-64 checksum of `data`, big-endian (xxhash canonical +/// byte order — the value embedded in every `StorageEnvelope`). +/// +/// Non-cryptographic: detects corruption, not tampering. For tamper-resistance +/// use AES-256-GCM (the auth tag), not this checksum. Intentionally unbounded — +/// a single-pass, allocation-free O(n) hash over caller-materialized bytes; the +/// `MAX_UNCOMPRESSED_SIZE` cap is `StorageEnvelope`'s decompression-bomb concern. +pub fn checksum(data: &[u8]) -> [u8; 8] { + xxh3_64(data).to_be_bytes() +} + +/// Verify `data` against an expected xxHash3-64 checksum. +/// +/// Plain (non-constant-time) equality, consistent with the non-cryptographic +/// threat model — do NOT change to constant-time (would imply a security +/// property this primitive does not have). +pub fn verify_checksum(data: &[u8], expected: &[u8; 8]) -> bool { + &checksum(data) == expected +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn checksum_is_deterministic() { + let data = b"cachekit checksum determinism vector"; + assert_eq!(checksum(data), checksum(data)); + } + + #[test] + fn checksum_handles_empty_input() { + assert_eq!(checksum(b""), checksum(b"")); + } + + #[test] + fn verify_checksum_accepts_matching() { + let data = b"payload bytes"; + assert!(verify_checksum(data, &checksum(data))); + } + + #[test] + fn verify_checksum_rejects_single_bit_flip() { + let data = b"payload bytes"; + let mut corrupted = checksum(data); + corrupted[0] ^= 0x01; + assert!(!verify_checksum(data, &corrupted)); + } + + #[test] + fn verify_checksum_rejects_wrong_data() { + let expected = checksum(b"original"); + assert!(!verify_checksum(b"tampered", &expected)); + } + + #[test] + fn checksum_known_answer_locks_endianness() { + // Captured from checksum(b"cachekit-kat"); pins algorithm + big-endian order. + assert_eq!( + checksum(b"cachekit-kat"), + [209u8, 35, 204, 155, 190, 157, 164, 177] + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index 54f4de8..39ae82d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,6 +65,12 @@ pub mod metrics; pub use metrics::OperationMetrics; +// Standalone integrity primitive (usable without compression/messagepack) +#[cfg(feature = "checksum")] +pub mod checksum; +#[cfg(feature = "checksum")] +pub use checksum::{checksum, verify_checksum}; + // Core byte storage layer pub mod byte_storage; pub use byte_storage::{ByteStorage, StorageEnvelope}; From b2c1345a7058935f16ef5a99cc159d049e2eaf2d Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Tue, 16 Jun 2026 15:59:28 +1000 Subject: [PATCH 2/7] refactor: StorageEnvelope::new uses the canonical checksum() primitive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRY: replace inline xxh3_64(data).to_be_bytes() with crate::checksum::checksum(data). The DRY-guard test (envelope_embeds_canonical_checksum) confirms byte-identical wire output before and after the refactor. xxh3_64 import retained — extract() still uses it. --- src/byte_storage.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/byte_storage.rs b/src/byte_storage.rs index e15dace..2e30f66 100644 --- a/src/byte_storage.rs +++ b/src/byte_storage.rs @@ -88,8 +88,8 @@ impl StorageEnvelope { return Err(ByteStorageError::InputTooLarge); } - // Generate xxHash3-64 checksum of original data (big-endian = xxhash canonical format) - let checksum = xxh3_64(data).to_be_bytes(); + // Single canonical xxHash3-64 definition (see crate::checksum) + let checksum = crate::checksum::checksum(data); Ok(StorageEnvelope { compressed_data, @@ -322,6 +322,13 @@ impl Default for ByteStorage { mod tests { use super::*; + #[test] + fn envelope_embeds_canonical_checksum() { + let data = b"DRY-guard payload"; + let envelope = StorageEnvelope::new(data, "test".to_string()).unwrap(); + assert_eq!(envelope.checksum, crate::checksum::checksum(data)); + } + #[test] fn test_storage_envelope_roundtrip() { let data = b"Hello, World! This is test data for compression.".to_vec(); From 729d5aeec1bc22ce3521f07b6c1cbed723f99d2a Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Tue, 16 Jun 2026 16:00:46 +1000 Subject: [PATCH 3/7] refactor: StorageEnvelope::extract verifies via verify_checksum() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRY: replace inline xxh3_64(&decompressed).to_be_bytes() + manual compare with crate::checksum::verify_checksum(). ChecksumMismatch error variant is preserved on false return. Removes the now-dead xxhash_rust import from byte_storage.rs — single canonical xxHash3-64 definition lives in checksum.rs. --- src/byte_storage.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/byte_storage.rs b/src/byte_storage.rs index 2e30f66..9d8634b 100644 --- a/src/byte_storage.rs +++ b/src/byte_storage.rs @@ -13,8 +13,6 @@ use std::sync::{Arc, Mutex}; #[cfg(not(target_arch = "wasm32"))] use std::time::Instant; use thiserror::Error; -#[cfg(feature = "checksum")] -use xxhash_rust::xxh3::xxh3_64; /// Error types for ByteStorage operations #[derive(Debug, Error, Clone, PartialEq)] @@ -135,10 +133,8 @@ impl StorageEnvelope { .map_err(|_| ByteStorageError::DecompressionFailed)?; // Verify checksum (checksum validation happens AFTER decompression to prevent processing corrupted data) - // Note: xxHash3 is non-cryptographic, so we use simple equality (not constant-time) - // Security against tampering is provided by AES-GCM authentication tag, not the checksum - let computed_checksum = xxh3_64(&decompressed).to_be_bytes(); - if computed_checksum != self.checksum { + // false -> ChecksumMismatch (preserve the error variant; verify_checksum returns bool) + if !crate::checksum::verify_checksum(&decompressed, &self.checksum) { return Err(ByteStorageError::ChecksumMismatch); } From b79f29ed6b835d5398a15c228e8b3c095cada501 Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Tue, 16 Jun 2026 16:02:34 +1000 Subject: [PATCH 4/7] docs: document standalone checksum API in crate-level docs Updates the xxHash3-64 security property bullet to call out standalone availability via checksum/verify_checksum without requiring compression. --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 39ae82d..ba0dc8f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,7 +57,7 @@ //! //! - **AES-256-GCM**: Authenticated encryption via `ring` //! - **HKDF-SHA256**: Key derivation with tenant isolation (RFC 5869) -//! - **xxHash3-64**: Fast non-cryptographic checksums (corruption detection) +//! - **xxHash3-64**: Fast non-cryptographic checksums (corruption detection), available standalone via [`checksum`]/[`verify_checksum`] without compression. //! - **Nonce safety**: Counter-based + random IV prevents reuse //! - **Memory safety**: `zeroize` on drop for all key material From 467e56c75f1a5f60cbd916eb6720571dc1a29c4c Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Tue, 16 Jun 2026 16:08:28 +1000 Subject: [PATCH 5/7] docs: list checksum.rs in README source tree --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index df43d06..fd75b68 100644 --- a/README.md +++ b/README.md @@ -256,6 +256,7 @@ cachekit-core/ ├── src/ │ ├── lib.rs # Public API exports │ ├── byte_storage.rs # LZ4 + xxHash3 storage envelope +│ ├── checksum.rs # Standalone xxHash3 checksum/verify primitive │ ├── metrics.rs # Operation timing & statistics │ │ │ ├── encryption/ # (feature = "encryption") From e659c7703abf21497effd47c9dd94fa544dc0693 Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Tue, 16 Jun 2026 16:08:28 +1000 Subject: [PATCH 6/7] test: assert ChecksumMismatch variant in extract corruption guard --- src/byte_storage.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/byte_storage.rs b/src/byte_storage.rs index 9d8634b..8b377a6 100644 --- a/src/byte_storage.rs +++ b/src/byte_storage.rs @@ -345,7 +345,12 @@ mod tests { let mut envelope = StorageEnvelope::new(b"test", "test".to_string()).unwrap(); // Corrupt the checksum envelope.checksum[0] = !envelope.checksum[0]; - assert!(envelope.extract().is_err()); + // Fail-open guard: the DRY refactor must still surface the specific + // ChecksumMismatch variant (not silently succeed on a flipped checksum). + assert!(matches!( + envelope.extract(), + Err(ByteStorageError::ChecksumMismatch) + )); } #[test] From 4003b0e5ea0050f0528243dbf3954a4454a4ff2a Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Tue, 16 Jun 2026 16:12:35 +1000 Subject: [PATCH 7/7] test: pin canonical empty-input checksum value (de-tautologize) --- src/checksum.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/checksum.rs b/src/checksum.rs index e40a491..bae3bb9 100644 --- a/src/checksum.rs +++ b/src/checksum.rs @@ -38,7 +38,12 @@ mod tests { #[test] fn checksum_handles_empty_input() { - assert_eq!(checksum(b""), checksum(b"")); + // Pin the canonical xxHash3-64 of the empty input (big-endian); proves + // "handles empty" beyond just "doesn't panic", and locks the value. + assert_eq!( + checksum(b""), + [0x2D, 0x06, 0x80, 0x05, 0x38, 0xD3, 0x94, 0xC2] + ); } #[test]