From 832e773d2f7c548213e7f133b8758687cc34de37 Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Tue, 16 Jun 2026 08:02:00 +1000 Subject: [PATCH] perf: borrow input in ByteStorage write path instead of copying it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit StorageEnvelope::new took its payload as Vec by value, but only ever compresses (lz4_flex::compress(&data)) and hashes (xxh3_64(&data)) it — both borrow. The Vec was never retained, so ByteStorage::store was forced to data.to_vec() its &[u8] input first: a full-payload allocation + memcpy (up to MAX_UNCOMPRESSED_SIZE = 512 MB) on every write, for nothing. Change StorageEnvelope::new to take &[u8]; store() passes its slice straight through. One fewer copy per write, scaling with payload size — directly trims the large-object write-path peak RSS. Addresses the cachekit-core half of #45. The py.allow_threads / GIL-release half lives in the cachekit-py PyO3 bindings and is tracked there. API note: StorageEnvelope::new is pub but functions as an internal constructor; the load-bearing API ByteStorage::store(&[u8]) is unchanged. Greenfield, no back-compat. Internal tests + 3 fuzz targets updated to pass slices. --- .../byte_storage_checksum_collision.rs | 2 +- .../byte_storage_format_injection.rs | 4 ++-- .../integration_layered_security.rs | 2 +- src/byte_storage.rs | 22 +++++++++++-------- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/fuzz/fuzz_targets/byte_storage_checksum_collision.rs b/fuzz/fuzz_targets/byte_storage_checksum_collision.rs index f0d1654..00d3011 100644 --- a/fuzz/fuzz_targets/byte_storage_checksum_collision.rs +++ b/fuzz/fuzz_targets/byte_storage_checksum_collision.rs @@ -24,7 +24,7 @@ fuzz_target!(|test_case: ChecksumTestCase| { } // Create valid envelope first - let envelope = match StorageEnvelope::new(test_case.data.clone(), "msgpack".to_string()) { + let envelope = match StorageEnvelope::new(&test_case.data, "msgpack".to_string()) { Ok(env) => env, Err(_) => return, // Skip if data too large }; diff --git a/fuzz/fuzz_targets/byte_storage_format_injection.rs b/fuzz/fuzz_targets/byte_storage_format_injection.rs index b3fb1c4..6b61550 100644 --- a/fuzz/fuzz_targets/byte_storage_format_injection.rs +++ b/fuzz/fuzz_targets/byte_storage_format_injection.rs @@ -17,7 +17,7 @@ fuzz_target!(|data: &[u8]| { // Create envelope with potentially malicious format let test_data = vec![b'x'; 100]; - let envelope = match StorageEnvelope::new(test_data, format.clone()) { + let envelope = match StorageEnvelope::new(&test_data, format.clone()) { Ok(env) => env, Err(_) => return, // Skip if envelope creation fails (acceptable) }; @@ -50,7 +50,7 @@ fuzz_target!(|data: &[u8]| { for pattern in &injection_patterns { let pattern_data = vec![b'y'; 50]; - if let Ok(env) = StorageEnvelope::new(pattern_data, pattern.to_string()) { + if let Ok(env) = StorageEnvelope::new(&pattern_data, pattern.to_string()) { // Format stored as-is assert_eq!(env.format, *pattern); diff --git a/fuzz/fuzz_targets/integration_layered_security.rs b/fuzz/fuzz_targets/integration_layered_security.rs index 9418403..37fe98d 100644 --- a/fuzz/fuzz_targets/integration_layered_security.rs +++ b/fuzz/fuzz_targets/integration_layered_security.rs @@ -27,7 +27,7 @@ fuzz_target!(|data: &[u8]| { }; // Step 1: Create ByteStorage envelope (compression + checksum) - let envelope = match StorageEnvelope::new(plaintext.to_vec(), "msgpack".to_string()) { + let envelope = match StorageEnvelope::new(plaintext, "msgpack".to_string()) { Ok(env) => env, Err(_) => return, // Plaintext too large }; diff --git a/src/byte_storage.rs b/src/byte_storage.rs index 518c5af..e15dace 100644 --- a/src/byte_storage.rs +++ b/src/byte_storage.rs @@ -66,9 +66,13 @@ pub struct StorageEnvelope { } impl StorageEnvelope { - /// Create new envelope with data compression and checksum + /// Create new envelope with data compression and checksum. + /// + /// Takes the input by shared slice: it is only compressed and hashed (both + /// borrow), never retained, so there is no reason to own it. Avoids a + /// full-payload copy (up to `MAX_UNCOMPRESSED_SIZE`) on the write path. #[cfg(all(feature = "compression", feature = "checksum"))] - pub fn new(data: Vec, format: String) -> Result { + pub fn new(data: &[u8], format: String) -> Result { // Security: Check input size before compression if data.len() > MAX_UNCOMPRESSED_SIZE { return Err(ByteStorageError::InputTooLarge); @@ -77,7 +81,7 @@ impl StorageEnvelope { let original_size = data.len() as u32; // Compress with LZ4 - let compressed_data = lz4_flex::compress(&data); + let compressed_data = lz4_flex::compress(data); // Security: Check compressed size if compressed_data.len() > MAX_COMPRESSED_SIZE { @@ -85,7 +89,7 @@ impl StorageEnvelope { } // Generate xxHash3-64 checksum of original data (big-endian = xxhash canonical format) - let checksum = xxh3_64(&data).to_be_bytes(); + let checksum = xxh3_64(data).to_be_bytes(); Ok(StorageEnvelope { compressed_data, @@ -181,7 +185,7 @@ impl ByteStorage { let compression_start = Instant::now(); let original_size = data.len(); - let envelope = StorageEnvelope::new(data.to_vec(), format)?; + let envelope = StorageEnvelope::new(data, format)?; #[cfg(not(target_arch = "wasm32"))] let compression_micros = compression_start.elapsed().as_micros() as u64; @@ -321,7 +325,7 @@ mod tests { #[test] fn test_storage_envelope_roundtrip() { let data = b"Hello, World! This is test data for compression.".to_vec(); - let envelope = StorageEnvelope::new(data.clone(), "test".to_string()).unwrap(); + let envelope = StorageEnvelope::new(&data, "test".to_string()).unwrap(); let extracted = envelope.extract().unwrap(); assert_eq!(data, extracted); } @@ -329,13 +333,13 @@ mod tests { #[test] fn test_compression_works() { let data = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_vec(); // Highly compressible - let envelope = StorageEnvelope::new(data.clone(), "test".to_string()).unwrap(); + let envelope = StorageEnvelope::new(&data, "test".to_string()).unwrap(); assert!(envelope.compressed_data.len() < data.len()); } #[test] fn test_checksum_validation() { - let mut envelope = StorageEnvelope::new(b"test".to_vec(), "test".to_string()).unwrap(); + let mut envelope = StorageEnvelope::new(b"test", "test".to_string()).unwrap(); // Corrupt the checksum envelope.checksum[0] = !envelope.checksum[0]; assert!(envelope.extract().is_err()); @@ -367,7 +371,7 @@ mod tests { fn test_size_limits_envelope() { // Create data exactly at the limit let max_data = vec![0u8; MAX_UNCOMPRESSED_SIZE]; - let envelope_result = StorageEnvelope::new(max_data, "test".to_string()); + let envelope_result = StorageEnvelope::new(&max_data, "test".to_string()); // Should succeed at exactly the limit assert!(envelope_result.is_ok());