From 15750d759757ced2b349224039922d3c3532c2a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Wed, 3 Jun 2026 18:02:32 -0300 Subject: [PATCH 1/3] fix: run skipped tick intervals instead of silently dropping them handle_tick scheduled the next tick after on_tick returned, with a delay computed from the timestamp captured at entry. A duty overrunning the 800ms interval (e.g. inline block-building proofs taking ~1.3s) pushed the next tick past the following boundary, so that interval's duty never ran: a proposing node never attested on its own slot, which alone prevents 1- and 2-node devnets from ever justifying. Drive tick processing from store.time() (the last processed interval) instead of the wall clock at handling time: each tick loops over every interval boundary that has passed but was not processed yet, running missed intervals in place (with a warning) so their duties still execute. When more than a slot behind (e.g. a long stall), jump straight to the wall clock, matching store::on_tick's existing fast-forward. Closes #413 --- crates/blockchain/src/lib.rs | 157 ++++++++++++++++++++++++++++++----- 1 file changed, 138 insertions(+), 19 deletions(-) diff --git a/crates/blockchain/src/lib.rs b/crates/blockchain/src/lib.rs index 1a6fb766..472c72d0 100644 --- a/crates/blockchain/src/lib.rs +++ b/crates/blockchain/src/lib.rs @@ -57,13 +57,36 @@ pub const MAX_ATTESTATIONS_DATA: usize = 16; /// See: leanSpec PR #682. pub const GOSSIP_DISPARITY_INTERVALS: u64 = 1; -/// Milliseconds until the next interval boundary, measured relative to genesis. -fn ms_until_next_interval(now_ms: u64, genesis_time_ms: u64) -> u64 { - // Before genesis: wait until genesis itself. - let Some(ms_since_genesis) = now_ms.checked_sub(genesis_time_ms) else { - return genesis_time_ms - now_ms; - }; - MILLISECONDS_PER_INTERVAL - (ms_since_genesis % MILLISECONDS_PER_INTERVAL) +/// Milliseconds until the interval boundary for interval-count `time`. +fn ms_until_tick(time: u64, now_ms: u64, genesis_time_ms: u64) -> u64 { + let boundary_ms = genesis_time_ms + time * MILLISECONDS_PER_INTERVAL; + boundary_ms.saturating_sub(now_ms) +} + +/// Next interval whose duties should run, given the last processed interval +/// (`store.time()`) and the current wall-clock interval. Returns `None` when +/// caught up. +/// +/// Each missed interval is processed one at a time so its duties still run +/// when a previous duty overran its interval (issue #413). When more than a +/// slot behind (e.g. a long stall), jump straight to the wall clock instead; +/// `store::on_tick` fast-forwards through the gap the same way. +fn next_tick_time(store_time: u64, wall_time: u64) -> Option { + if store_time >= wall_time { + None + } else if wall_time - store_time > INTERVALS_PER_SLOT { + Some(wall_time) + } else { + Some(store_time + 1) + } +} + +/// Current UNIX timestamp in milliseconds. +fn unix_now_ms() -> u64 { + SystemTime::UNIX_EPOCH + .elapsed() + .expect("already past the unix epoch") + .as_millis() as u64 } impl BlockChain { @@ -81,10 +104,7 @@ impl BlockChain { // Catch XMSS keys up to the current slot before the first tick // store.time() doesn't work here: after an offline gap it lags wall-clock by // exactly the gap we need to catch up through - let now_ms = SystemTime::UNIX_EPOCH - .elapsed() - .expect("already past the unix epoch") - .as_millis() as u64; + let now_ms = unix_now_ms(); let current_slot = (now_ms.saturating_sub(genesis_time * 1000) / MILLISECONDS_PER_SLOT) as u32; key_manager.advance_keys_to(current_slot); @@ -692,16 +712,47 @@ pub(crate) trait BlockChainProtocol: Send + Sync { impl BlockChainServer { #[send_handler] async fn handle_tick(&mut self, _msg: block_chain_protocol::Tick, ctx: &Context) { - let timestamp = SystemTime::UNIX_EPOCH - .elapsed() - .expect("already past the unix epoch"); - let now_ms = timestamp.as_millis() as u64; - self.on_tick(now_ms, ctx).await; - // Schedule the next tick at the next interval boundary let genesis_time_ms = self.store.config().genesis_time * 1000; - let ms_to_next_interval = ms_until_next_interval(now_ms, genesis_time_ms); + + // Process every interval boundary that has passed but was not ticked + // yet (`store.time()` tracks the last processed interval), deriving + // each tick's timestamp from its boundary rather than the wall clock. + // A duty that overruns its interval (e.g. inline block-building + // proofs) then delays the following duties instead of silently + // skipping them (issue #413). + loop { + let wall_time = + unix_now_ms().saturating_sub(genesis_time_ms) / MILLISECONDS_PER_INTERVAL; + let Some(tick_time) = next_tick_time(self.store.time(), wall_time) else { + break; + }; + if tick_time < wall_time { + let slot = tick_time / INTERVALS_PER_SLOT; + let interval = tick_time % INTERVALS_PER_SLOT; + warn!( + %slot, + %interval, + "Tick duties overran their interval; running missed tick in place" + ); + } + let timestamp_ms = genesis_time_ms + tick_time * MILLISECONDS_PER_INTERVAL; + let store_time_before = self.store.time(); + self.on_tick(timestamp_ms, ctx).await; + if self.store.time() == store_time_before { + // on_tick bailed without processing (e.g. the no-validators + // guard); break instead of spinning, and retry next interval. + break; + } + } + + // Schedule the next tick at the next interval boundary. Computed from + // the wall clock rather than store.time() so a tick that made no + // progress retries at the next boundary instead of immediately. + let now_ms = unix_now_ms(); + let wall_time = now_ms.saturating_sub(genesis_time_ms) / MILLISECONDS_PER_INTERVAL; + let delay = ms_until_tick(wall_time + 1, now_ms, genesis_time_ms); send_after( - Duration::from_millis(ms_to_next_interval), + Duration::from_millis(delay), ctx.clone(), block_chain_protocol::Tick, ); @@ -826,3 +877,71 @@ impl Handler for BlockChainServer { } } } + +#[cfg(test)] +mod tests { + use super::*; + + const GENESIS_MS: u64 = 1_000_000; + + #[test] + fn ms_until_tick_before_boundary_returns_exact_delay() { + // Tick 3 fires at genesis + 2400ms; now is 100ms before that. + let now = GENESIS_MS + 3 * MILLISECONDS_PER_INTERVAL - 100; + assert_eq!(ms_until_tick(3, now, GENESIS_MS), 100); + } + + #[test] + fn ms_until_tick_past_boundary_returns_zero() { + let now = GENESIS_MS + 3 * MILLISECONDS_PER_INTERVAL + 500; + assert_eq!(ms_until_tick(3, now, GENESIS_MS), 0); + } + + #[test] + fn ms_until_tick_pre_genesis_waits_until_genesis() { + let now = GENESIS_MS - 5_000; + assert_eq!(ms_until_tick(0, now, GENESIS_MS), 5_000); + } + + #[test] + fn next_tick_time_caught_up_returns_none() { + assert_eq!(next_tick_time(7, 7), None); + // Clock skew (store ahead of wall) must not produce a tick either. + assert_eq!(next_tick_time(8, 7), None); + } + + #[test] + fn next_tick_time_steps_one_interval() { + assert_eq!(next_tick_time(6, 7), Some(7)); + } + + #[test] + fn next_tick_time_replays_each_missed_interval() { + // Scenario from issue #413: interval 0 of some slot processed on + // time, but block building overran into interval 2's window. Both + // missed intervals are still processed, one at a time, so interval + // 1's attestation duty runs instead of being silently skipped. + let store_time = 2 * INTERVALS_PER_SLOT; // slot 2, interval 0 + let wall_time = store_time + 2; + let next = next_tick_time(store_time, wall_time).unwrap(); + assert_eq!(next % INTERVALS_PER_SLOT, 1); // attestation interval + assert_eq!(next_tick_time(next, wall_time), Some(next + 1)); + assert_eq!(next_tick_time(next + 1, wall_time), None); + } + + #[test] + fn next_tick_time_replays_up_to_a_full_slot() { + let store_time = 7; + let wall_time = store_time + INTERVALS_PER_SLOT; + assert_eq!(next_tick_time(store_time, wall_time), Some(store_time + 1)); + } + + #[test] + fn next_tick_time_jumps_when_more_than_a_slot_behind() { + // Mirrors store::on_tick's fast-forward: don't replay every missed + // interval after a long stall, jump straight to the wall clock. + let store_time = 7; + let wall_time = store_time + INTERVALS_PER_SLOT + 1; + assert_eq!(next_tick_time(store_time, wall_time), Some(wall_time)); + } +} From baf8c6764eeba81f4bd311efeac15da0312c21b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Wed, 3 Jun 2026 18:43:27 -0300 Subject: [PATCH 2/3] Revert "fix: run skipped tick intervals instead of silently dropping them" This reverts commit 15750d759757ced2b349224039922d3c3532c2a7. --- crates/blockchain/src/lib.rs | 157 +++++------------------------------ 1 file changed, 19 insertions(+), 138 deletions(-) diff --git a/crates/blockchain/src/lib.rs b/crates/blockchain/src/lib.rs index 472c72d0..1a6fb766 100644 --- a/crates/blockchain/src/lib.rs +++ b/crates/blockchain/src/lib.rs @@ -57,36 +57,13 @@ pub const MAX_ATTESTATIONS_DATA: usize = 16; /// See: leanSpec PR #682. pub const GOSSIP_DISPARITY_INTERVALS: u64 = 1; -/// Milliseconds until the interval boundary for interval-count `time`. -fn ms_until_tick(time: u64, now_ms: u64, genesis_time_ms: u64) -> u64 { - let boundary_ms = genesis_time_ms + time * MILLISECONDS_PER_INTERVAL; - boundary_ms.saturating_sub(now_ms) -} - -/// Next interval whose duties should run, given the last processed interval -/// (`store.time()`) and the current wall-clock interval. Returns `None` when -/// caught up. -/// -/// Each missed interval is processed one at a time so its duties still run -/// when a previous duty overran its interval (issue #413). When more than a -/// slot behind (e.g. a long stall), jump straight to the wall clock instead; -/// `store::on_tick` fast-forwards through the gap the same way. -fn next_tick_time(store_time: u64, wall_time: u64) -> Option { - if store_time >= wall_time { - None - } else if wall_time - store_time > INTERVALS_PER_SLOT { - Some(wall_time) - } else { - Some(store_time + 1) - } -} - -/// Current UNIX timestamp in milliseconds. -fn unix_now_ms() -> u64 { - SystemTime::UNIX_EPOCH - .elapsed() - .expect("already past the unix epoch") - .as_millis() as u64 +/// Milliseconds until the next interval boundary, measured relative to genesis. +fn ms_until_next_interval(now_ms: u64, genesis_time_ms: u64) -> u64 { + // Before genesis: wait until genesis itself. + let Some(ms_since_genesis) = now_ms.checked_sub(genesis_time_ms) else { + return genesis_time_ms - now_ms; + }; + MILLISECONDS_PER_INTERVAL - (ms_since_genesis % MILLISECONDS_PER_INTERVAL) } impl BlockChain { @@ -104,7 +81,10 @@ impl BlockChain { // Catch XMSS keys up to the current slot before the first tick // store.time() doesn't work here: after an offline gap it lags wall-clock by // exactly the gap we need to catch up through - let now_ms = unix_now_ms(); + let now_ms = SystemTime::UNIX_EPOCH + .elapsed() + .expect("already past the unix epoch") + .as_millis() as u64; let current_slot = (now_ms.saturating_sub(genesis_time * 1000) / MILLISECONDS_PER_SLOT) as u32; key_manager.advance_keys_to(current_slot); @@ -712,47 +692,16 @@ pub(crate) trait BlockChainProtocol: Send + Sync { impl BlockChainServer { #[send_handler] async fn handle_tick(&mut self, _msg: block_chain_protocol::Tick, ctx: &Context) { + let timestamp = SystemTime::UNIX_EPOCH + .elapsed() + .expect("already past the unix epoch"); + let now_ms = timestamp.as_millis() as u64; + self.on_tick(now_ms, ctx).await; + // Schedule the next tick at the next interval boundary let genesis_time_ms = self.store.config().genesis_time * 1000; - - // Process every interval boundary that has passed but was not ticked - // yet (`store.time()` tracks the last processed interval), deriving - // each tick's timestamp from its boundary rather than the wall clock. - // A duty that overruns its interval (e.g. inline block-building - // proofs) then delays the following duties instead of silently - // skipping them (issue #413). - loop { - let wall_time = - unix_now_ms().saturating_sub(genesis_time_ms) / MILLISECONDS_PER_INTERVAL; - let Some(tick_time) = next_tick_time(self.store.time(), wall_time) else { - break; - }; - if tick_time < wall_time { - let slot = tick_time / INTERVALS_PER_SLOT; - let interval = tick_time % INTERVALS_PER_SLOT; - warn!( - %slot, - %interval, - "Tick duties overran their interval; running missed tick in place" - ); - } - let timestamp_ms = genesis_time_ms + tick_time * MILLISECONDS_PER_INTERVAL; - let store_time_before = self.store.time(); - self.on_tick(timestamp_ms, ctx).await; - if self.store.time() == store_time_before { - // on_tick bailed without processing (e.g. the no-validators - // guard); break instead of spinning, and retry next interval. - break; - } - } - - // Schedule the next tick at the next interval boundary. Computed from - // the wall clock rather than store.time() so a tick that made no - // progress retries at the next boundary instead of immediately. - let now_ms = unix_now_ms(); - let wall_time = now_ms.saturating_sub(genesis_time_ms) / MILLISECONDS_PER_INTERVAL; - let delay = ms_until_tick(wall_time + 1, now_ms, genesis_time_ms); + let ms_to_next_interval = ms_until_next_interval(now_ms, genesis_time_ms); send_after( - Duration::from_millis(delay), + Duration::from_millis(ms_to_next_interval), ctx.clone(), block_chain_protocol::Tick, ); @@ -877,71 +826,3 @@ impl Handler for BlockChainServer { } } } - -#[cfg(test)] -mod tests { - use super::*; - - const GENESIS_MS: u64 = 1_000_000; - - #[test] - fn ms_until_tick_before_boundary_returns_exact_delay() { - // Tick 3 fires at genesis + 2400ms; now is 100ms before that. - let now = GENESIS_MS + 3 * MILLISECONDS_PER_INTERVAL - 100; - assert_eq!(ms_until_tick(3, now, GENESIS_MS), 100); - } - - #[test] - fn ms_until_tick_past_boundary_returns_zero() { - let now = GENESIS_MS + 3 * MILLISECONDS_PER_INTERVAL + 500; - assert_eq!(ms_until_tick(3, now, GENESIS_MS), 0); - } - - #[test] - fn ms_until_tick_pre_genesis_waits_until_genesis() { - let now = GENESIS_MS - 5_000; - assert_eq!(ms_until_tick(0, now, GENESIS_MS), 5_000); - } - - #[test] - fn next_tick_time_caught_up_returns_none() { - assert_eq!(next_tick_time(7, 7), None); - // Clock skew (store ahead of wall) must not produce a tick either. - assert_eq!(next_tick_time(8, 7), None); - } - - #[test] - fn next_tick_time_steps_one_interval() { - assert_eq!(next_tick_time(6, 7), Some(7)); - } - - #[test] - fn next_tick_time_replays_each_missed_interval() { - // Scenario from issue #413: interval 0 of some slot processed on - // time, but block building overran into interval 2's window. Both - // missed intervals are still processed, one at a time, so interval - // 1's attestation duty runs instead of being silently skipped. - let store_time = 2 * INTERVALS_PER_SLOT; // slot 2, interval 0 - let wall_time = store_time + 2; - let next = next_tick_time(store_time, wall_time).unwrap(); - assert_eq!(next % INTERVALS_PER_SLOT, 1); // attestation interval - assert_eq!(next_tick_time(next, wall_time), Some(next + 1)); - assert_eq!(next_tick_time(next + 1, wall_time), None); - } - - #[test] - fn next_tick_time_replays_up_to_a_full_slot() { - let store_time = 7; - let wall_time = store_time + INTERVALS_PER_SLOT; - assert_eq!(next_tick_time(store_time, wall_time), Some(store_time + 1)); - } - - #[test] - fn next_tick_time_jumps_when_more_than_a_slot_behind() { - // Mirrors store::on_tick's fast-forward: don't replay every missed - // interval after a long stall, jump straight to the wall clock. - let store_time = 7; - let wall_time = store_time + INTERVALS_PER_SLOT + 1; - assert_eq!(next_tick_time(store_time, wall_time), Some(wall_time)); - } -} From d270f9ae8fd94ee03faad897a4f8731102758f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 4 Jun 2026 11:06:26 -0300 Subject: [PATCH 3/3] fix: re-tick immediately when on_tick overruns its interval boundary handle_tick scheduled the next tick from the timestamp captured at entry, so a duty overrunning the 800ms interval (e.g. inline block-building proofs ~1.3s) pushed the next tick past the following boundary and that interval's duty never ran: a proposing node never attested on its own slot, which alone prevents 1- and 2-node devnets from ever justifying. Re-read the clock after on_tick and compare elapsed time against the interval slack remaining at entry. If a boundary was crossed, tick again immediately so the missed interval's duty still runs; otherwise schedule for the next boundary from the post-tick timestamp. The comparison keys off the boundary actually being crossed rather than a fixed 800ms threshold, so it also handles scheduling jitter and exact-boundary entry. Closes #413 --- crates/blockchain/src/lib.rs | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/crates/blockchain/src/lib.rs b/crates/blockchain/src/lib.rs index 1a6fb766..145f82d2 100644 --- a/crates/blockchain/src/lib.rs +++ b/crates/blockchain/src/lib.rs @@ -66,6 +66,14 @@ fn ms_until_next_interval(now_ms: u64, genesis_time_ms: u64) -> u64 { MILLISECONDS_PER_INTERVAL - (ms_since_genesis % MILLISECONDS_PER_INTERVAL) } +/// Current UNIX timestamp in milliseconds. +fn unix_now_ms() -> u64 { + SystemTime::UNIX_EPOCH + .elapsed() + .expect("already past the unix epoch") + .as_millis() as u64 +} + impl BlockChain { pub fn spawn( store: Store, @@ -81,10 +89,7 @@ impl BlockChain { // Catch XMSS keys up to the current slot before the first tick // store.time() doesn't work here: after an offline gap it lags wall-clock by // exactly the gap we need to catch up through - let now_ms = SystemTime::UNIX_EPOCH - .elapsed() - .expect("already past the unix epoch") - .as_millis() as u64; + let now_ms = unix_now_ms(); let current_slot = (now_ms.saturating_sub(genesis_time * 1000) / MILLISECONDS_PER_SLOT) as u32; key_manager.advance_keys_to(current_slot); @@ -692,14 +697,22 @@ pub(crate) trait BlockChainProtocol: Send + Sync { impl BlockChainServer { #[send_handler] async fn handle_tick(&mut self, _msg: block_chain_protocol::Tick, ctx: &Context) { - let timestamp = SystemTime::UNIX_EPOCH - .elapsed() - .expect("already past the unix epoch"); - let now_ms = timestamp.as_millis() as u64; + let now_ms = unix_now_ms(); self.on_tick(now_ms, ctx).await; - // Schedule the next tick at the next interval boundary + let genesis_time_ms = self.store.config().genesis_time * 1000; - let ms_to_next_interval = ms_until_next_interval(now_ms, genesis_time_ms); + let remaining_at_entry = ms_until_next_interval(now_ms, genesis_time_ms); + let now_after_tick = unix_now_ms(); + let elapsed = now_after_tick.saturating_sub(now_ms); + + // If on_tick ran past the next interval boundary, tick again + // immediately so that interval's duty still runs (issue #413). + let ms_to_next_interval = if elapsed >= remaining_at_entry { + 0 + } else { + // Schedule the next tick at the next interval boundary + ms_until_next_interval(now_after_tick, genesis_time_ms) + }; send_after( Duration::from_millis(ms_to_next_interval), ctx.clone(),