Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ cachekit-core/
├── src/
│ ├── lib.rs # Public API exports
│ ├── byte_storage.rs # LZ4 + xxHash3 storage envelope
│ ├── checksum.rs # Standalone xxHash3 checksum/verify primitive
│ ├── metrics.rs # Operation timing & statistics
│ │
│ ├── encryption/ # (feature = "encryption")
Expand Down
26 changes: 17 additions & 9 deletions src/byte_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ use std::sync::{Arc, Mutex};
#[cfg(not(target_arch = "wasm32"))]
use std::time::Instant;
use thiserror::Error;
#[cfg(feature = "checksum")]
use xxhash_rust::xxh3::xxh3_64;

/// Error types for ByteStorage operations
#[derive(Debug, Error, Clone, PartialEq)]
Expand Down Expand Up @@ -88,8 +86,8 @@ impl StorageEnvelope {
return Err(ByteStorageError::InputTooLarge);
}

// Generate xxHash3-64 checksum of original data (big-endian = xxhash canonical format)
let checksum = xxh3_64(data).to_be_bytes();
// Single canonical xxHash3-64 definition (see crate::checksum)
let checksum = crate::checksum::checksum(data);

Ok(StorageEnvelope {
compressed_data,
Expand Down Expand Up @@ -135,10 +133,8 @@ impl StorageEnvelope {
.map_err(|_| ByteStorageError::DecompressionFailed)?;

// Verify checksum (checksum validation happens AFTER decompression to prevent processing corrupted data)
// Note: xxHash3 is non-cryptographic, so we use simple equality (not constant-time)
// Security against tampering is provided by AES-GCM authentication tag, not the checksum
let computed_checksum = xxh3_64(&decompressed).to_be_bytes();
if computed_checksum != self.checksum {
// false -> ChecksumMismatch (preserve the error variant; verify_checksum returns bool)
if !crate::checksum::verify_checksum(&decompressed, &self.checksum) {
return Err(ByteStorageError::ChecksumMismatch);
}

Expand Down Expand Up @@ -322,6 +318,13 @@ impl Default for ByteStorage {
mod tests {
use super::*;

#[test]
fn envelope_embeds_canonical_checksum() {
let data = b"DRY-guard payload";
let envelope = StorageEnvelope::new(data, "test".to_string()).unwrap();
assert_eq!(envelope.checksum, crate::checksum::checksum(data));
}

#[test]
fn test_storage_envelope_roundtrip() {
let data = b"Hello, World! This is test data for compression.".to_vec();
Expand All @@ -342,7 +345,12 @@ mod tests {
let mut envelope = StorageEnvelope::new(b"test", "test".to_string()).unwrap();
// Corrupt the checksum
envelope.checksum[0] = !envelope.checksum[0];
assert!(envelope.extract().is_err());
// Fail-open guard: the DRY refactor must still surface the specific
// ChecksumMismatch variant (not silently succeed on a flipped checksum).
assert!(matches!(
envelope.extract(),
Err(ByteStorageError::ChecksumMismatch)
));
}

#[test]
Expand Down
77 changes: 77 additions & 0 deletions src/checksum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
//! Standalone xxHash3-64 integrity primitive.
//!
//! Non-cryptographic corruption detection, decoupled from compression. The same
//! function backs `StorageEnvelope`'s embedded checksum, so the wire value is
//! identical whether you compute it directly or via `ByteStorage::store`.

use xxhash_rust::xxh3::xxh3_64;

/// Compute the xxHash3-64 checksum of `data`, big-endian (xxhash canonical
/// byte order — the value embedded in every `StorageEnvelope`).
///
/// Non-cryptographic: detects corruption, not tampering. For tamper-resistance
/// use AES-256-GCM (the auth tag), not this checksum. Intentionally unbounded —
/// a single-pass, allocation-free O(n) hash over caller-materialized bytes; the
/// `MAX_UNCOMPRESSED_SIZE` cap is `StorageEnvelope`'s decompression-bomb concern.
pub fn checksum(data: &[u8]) -> [u8; 8] {
xxh3_64(data).to_be_bytes()
}

/// Verify `data` against an expected xxHash3-64 checksum.
///
/// Plain (non-constant-time) equality, consistent with the non-cryptographic
/// threat model — do NOT change to constant-time (would imply a security
/// property this primitive does not have).
pub fn verify_checksum(data: &[u8], expected: &[u8; 8]) -> bool {
&checksum(data) == expected
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn checksum_is_deterministic() {
let data = b"cachekit checksum determinism vector";
assert_eq!(checksum(data), checksum(data));
}

#[test]
fn checksum_handles_empty_input() {
// Pin the canonical xxHash3-64 of the empty input (big-endian); proves
// "handles empty" beyond just "doesn't panic", and locks the value.
assert_eq!(
checksum(b""),
[0x2D, 0x06, 0x80, 0x05, 0x38, 0xD3, 0x94, 0xC2]
);
}

#[test]
fn verify_checksum_accepts_matching() {
let data = b"payload bytes";
assert!(verify_checksum(data, &checksum(data)));
}

#[test]
fn verify_checksum_rejects_single_bit_flip() {
let data = b"payload bytes";
let mut corrupted = checksum(data);
corrupted[0] ^= 0x01;
assert!(!verify_checksum(data, &corrupted));
}

#[test]
fn verify_checksum_rejects_wrong_data() {
let expected = checksum(b"original");
assert!(!verify_checksum(b"tampered", &expected));
}

#[test]
fn checksum_known_answer_locks_endianness() {
// Captured from checksum(b"cachekit-kat"); pins algorithm + big-endian order.
assert_eq!(
checksum(b"cachekit-kat"),
[209u8, 35, 204, 155, 190, 157, 164, 177]
);
}
}
8 changes: 7 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,20 @@
//!
//! - **AES-256-GCM**: Authenticated encryption via `ring`
//! - **HKDF-SHA256**: Key derivation with tenant isolation (RFC 5869)
//! - **xxHash3-64**: Fast non-cryptographic checksums (corruption detection)
//! - **xxHash3-64**: Fast non-cryptographic checksums (corruption detection), available standalone via [`checksum`]/[`verify_checksum`] without compression.
//! - **Nonce safety**: Counter-based + random IV prevents reuse
//! - **Memory safety**: `zeroize` on drop for all key material

// Metrics and observability
pub mod metrics;
pub use metrics::OperationMetrics;

// Standalone integrity primitive (usable without compression/messagepack)
#[cfg(feature = "checksum")]
pub mod checksum;
#[cfg(feature = "checksum")]
pub use checksum::{checksum, verify_checksum};

// Core byte storage layer
pub mod byte_storage;
pub use byte_storage::{ByteStorage, StorageEnvelope};
Expand Down
Loading