From f377d970aebda318f041836827e211211ca34317 Mon Sep 17 00:00:00 2001 From: Chris O'Neil Date: Sat, 13 Jun 2026 21:55:58 +0100 Subject: [PATCH] fix(storage): verify ClientPut receiver against over-query window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror the paid-quote issuer fix (#141) on the receiver storage-responsibility check. After #140/#141 removed the reachability re-rank and widened the issuer check, uploads recovered substantially, but a residual few percent still fail on: ClientPut receiver is not among this node's local 9 closest peers (close group plus storage margin) most visibly as "Failed to store public DataMap" — the DataMap is a single critical chunk, so one receiver-check rejection fails the whole upload regardless of file size. Cause is the same divergence the issuer check had: the uploader queries 2 * CLOSE_GROUP_SIZE peers and PUTs each chunk to the CLOSE_GROUP_SIZE closest *successful responders* (ant-client get_store_quotes), so when closer peers are slow or NAT-stuck the storer it legitimately PUT to sits at XOR positions up to 2 * close_group_size. The receiver check verified only the bare close_group_size + storage margin (9) of the node's *local* routing table with exact self-membership, so it rejected honest PUTs. Bring it in line with the issuer check: - Widen to 2 * close_group_size, matching the uploader's over-query window. This does not amplify replica count — the uploader still PUTs to only its selected storers — it just lets a legitimately-selected storer at position 10..14 accept. - Keep the XOR-only lookup (find_closest_nodes_local_with_self reranks by reachability and would demote XOR-close relay-only / NAT'd storers). - Hybrid source: try the local routing-table view first, and only on a local miss fall back to an authoritative find_closest_nodes_network lookup (the same view the uploader used to choose the storers), wrapped in the shared CLOSENESS_LOOKUP_TIMEOUT. Reject only if absent from both. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/payment/verifier.rs | 3 +- src/storage/handler.rs | 70 ++++++++++++++++++++++++++++++++--------- 2 files changed, 58 insertions(+), 15 deletions(-) diff --git a/src/payment/verifier.rs b/src/payment/verifier.rs index 1eca1ca..f6504e6 100644 --- a/src/payment/verifier.rs +++ b/src/payment/verifier.rs @@ -1062,7 +1062,8 @@ impl PaymentVerifier { /// at which point a second leader can race for the same pool (see /// [`InflightGuard::drop`]). At steady state the pool cache and pool /// signature verification gate keep this rare in practice. - const CLOSENESS_LOOKUP_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(240); + pub(crate) const CLOSENESS_LOOKUP_TIMEOUT: std::time::Duration = + std::time::Duration::from_secs(240); /// Width of the storer's authoritative network lookup, in peers. /// diff --git a/src/storage/handler.rs b/src/storage/handler.rs index dc7c690..71ec2fb 100644 --- a/src/storage/handler.rs +++ b/src/storage/handler.rs @@ -38,6 +38,10 @@ use crate::client::compute_address; use crate::error::{Error, Result}; use crate::logging::{debug, info, warn}; use crate::payment::{PaymentVerifier, QuoteGenerator, VerificationContext}; +// `storage_admission_width` is now only referenced by the test-only membership +// override below; the production path mirrors the uploader's over-query window +// (see `validate_store_membership`). +#[cfg(any(test, feature = "test-utils"))] use crate::replication::config::storage_admission_width; use crate::replication::fresh::FreshWriteEvent; use crate::storage::lmdb::LmdbStorage; @@ -412,26 +416,64 @@ impl AntProtocol { }; let self_id = *p2p_node.peer_id(); - let admission_width = storage_admission_width(self.payment_verifier.close_group_size()); - // Storage-responsibility *verification* must mirror the uploader's pure - // XOR-distance peer selection. `find_closest_nodes_local_with_self` - // reranks the local routing table by reachability (preferring - // directly-reachable peers, XOR only as a tiebreaker), which demotes - // this node out of the compared window when it is an XOR-close - // relay-only / NAT'd peer and falsely rejects an honest PUT it is - // legitimately responsible for. Use the XOR-only sibling so the local - // admission check matches how the client chose the close group. - let closest = p2p_node + // The uploader selects storers by querying `2 * CLOSE_GROUP_SIZE` peers + // and keeping the `CLOSE_GROUP_SIZE` closest *successful responders* + // (ant-client `get_store_quotes`), then PUTs each chunk to that set. + // When closer peers are slow or NAT-stuck the storer it legitimately + // PUT to can sit anywhere in the top `2 * close_group_size` by XOR + // distance, so a bare close-group + margin window rejects honest PUTs + // with no security benefit. Mirror the uploader's over-query window — + // the same width and rationale as the paid-quote issuer check + // (`PaymentVerifier::validate_paid_quote_issuer_close_group`). + let lookup_width = self.payment_verifier.close_group_size().saturating_mul(2); + + // Use the XOR-only lookup, not `find_closest_nodes_local_with_self` + // (which reranks by reachability and would demote this node when it is + // an XOR-close relay-only / NAT'd storer). Try the cheap local + // routing-table view first — it covers the common case with no network + // I/O — and only when this node is absent locally (our table may simply + // not know the closer peers yet) fall back to an authoritative network + // lookup, the same view the uploader used to choose the storers, before + // rejecting a PUT this node may legitimately be responsible for. + let local = p2p_node .dht_manager() - .find_closest_nodes_local_by_distance_with_self(address, admission_width) + .find_closest_nodes_local_by_distance_with_self(address, lookup_width) .await; - if closest.iter().any(|node| node.peer_id == self_id) { + if local.iter().any(|node| node.peer_id == self_id) { + return Ok(()); + } + + let network_lookup = p2p_node + .dht_manager() + .find_closest_nodes_network(address, lookup_width); + let network = + match tokio::time::timeout(PaymentVerifier::CLOSENESS_LOOKUP_TIMEOUT, network_lookup) + .await + { + Ok(Ok(peers)) => peers, + Ok(Err(e)) => { + return Err(ProtocolError::PaymentFailed(format!( + "ClientPut storage responsibility could not be verified against \ + the authoritative network view for {}: {e}", + hex::encode(address) + ))); + } + Err(_) => { + return Err(ProtocolError::PaymentFailed(format!( + "ClientPut storage responsibility network lookup timed out after \ + {:?} for {}", + PaymentVerifier::CLOSENESS_LOOKUP_TIMEOUT, + hex::encode(address) + ))); + } + }; + if network.iter().any(|node| node.peer_id == self_id) { return Ok(()); } Err(ProtocolError::PaymentFailed(format!( - "ClientPut receiver {} is not among this node's local {admission_width} closest peers for {} \ - (close group plus storage margin)", + "ClientPut receiver {} is not among the {lookup_width} closest peers for {} \ + (checked the local routing table and the authoritative network view)", self_id.to_hex(), hex::encode(address) )))