runtimeverification · ehildenb · Jun 15, 2026 · Jun 11, 2026 · Jun 12, 2026 · Jun 14, 2026
diff --git a/kevm-pyk/src/kevm_pyk/__main__.py b/kevm-pyk/src/kevm_pyk/__main__.py
@@ -254,6 +254,13 @@ def exec_prove(options: ProveOptions) -> None:
     else:
         kore_rpc_command = options.kore_rpc_command
 
+    # Both booster servers parse the flag (shared Booster.CLOptions); plain kore-rpc does not,
+    # so the budget is skipped (with a warning) rather than rejected now that it defaults on.
+    if Path(kore_rpc_command[0]).name in ('kore-rpc-booster', 'booster-dev'):
+        kore_rpc_command += ('--equation-max-local-steps', str(options.equation_max_local_steps))
+    else:
+        _LOGGER.warning(f'Ignoring --equation-max-local-steps for non-booster server: {kore_rpc_command[0]}')
+
     def is_functional(claim: KClaim) -> bool:
         claim_lhs = claim.body
         if type(claim_lhs) is KRewrite:

diff --git a/kevm-pyk/src/kevm_pyk/cli.py b/kevm-pyk/src/kevm_pyk/cli.py
@@ -192,6 +192,17 @@ def _create_argument_parser() -> ArgumentParser:
         action='store_true',
         help='Skip the Kore simplification pass after Booster; assume_defined still uses Kore for #Ceil evaluation.',
     )
+    prove_args.add_argument(
+        '--equation-max-local-steps',
+        dest='equation_max_local_steps',
+        type=int,
+        default=None,
+        help=(
+            'Booster equation budget for in-place evaluation at rewritten subterms before restarting '
+            'traversal from the top (default: 20; 0 = restart-only). Passed through to '
+            'kore-rpc-booster / booster-dev; requires a haskell-backend that supports the flag.'
+        ),
+    )
     prove_args.add_argument(
         '--max-frontier-parallel',
         type=int,
@@ -611,13 +622,15 @@ class ProveOptions(
 ):
     reinit: bool
     booster_only_simplify: bool
+    equation_max_local_steps: int
     max_frontier_parallel: int
 
     @staticmethod
     def default() -> dict[str, Any]:
         return {
             'reinit': False,
             'booster_only_simplify': False,
+            'equation_max_local_steps': 20,
             'max_frontier_parallel': 1,
         }
 

diff --git a/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/buf.md b/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/buf.md
@@ -48,6 +48,13 @@ module BUF
     rule [powByteLen-lt-concrete]: CONST <Int #powByteLen(N) => true
         requires 0 <=Int N andBool CONST <Int 2 ^Int (8 *Int N)
         [simplification, concrete(CONST, N), preserves-definedness]
+    // Companion false-direction: a concrete CONST that does not fit into N bytes is not
+    // less than #powByteLen(N). Together with the rule above this makes the concrete
+    // comparison total. The two requires are mutually exclusive and neither RHS re-exposes
+    // `_ <Int #powByteLen(_)`, so the pair is loop-safe.
+    rule [powByteLen-lt-concrete-false]: CONST <Int #powByteLen(N) => false
+        requires 0 <=Int N andBool 2 ^Int (8 *Int N) <=Int CONST
+        [simplification, concrete(CONST, N), preserves-definedness]
     rule #write(WM, IDX, VAL) => WM [ IDX := #buf(1, VAL) ] [simplification]
 
     rule #bufStrict(SIZE, DATA) => #buf(SIZE, DATA)

diff --git a/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/lemmas/evm-int-simplification.k b/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/lemmas/evm-int-simplification.k
@@ -136,12 +136,12 @@ module EVM-INT-SIMPLIFICATION
   // neither rule fires on the other's requires.
   rule [asWord-lt-false]: #asWord ( B:Bytes )  <Int X:Int => false requires X <=Int #asWord ( B ) [simplification]
   rule [asWord-le-false]: #asWord ( B:Bytes ) <=Int X:Int => false requires X  <Int #asWord ( B ) [simplification]
-  // Comparison: `#asWord(B) == X` is false when the path condition says X =/= #asWord(B).
-  // The requires X =/=Int #asWord(B) expands to notBool(X ==Int #asWord(B)) via the K builtin;
-  // since ==Int is comm, this matches the path condition fact notBool(#asWord(B) ==Int X).
-  // No loop: our rule matches #asWord on the LEFT; after builtin expansion the recursive
-  // term X ==Int #asWord(B) has X on the LEFT, so this rule does not fire on it.
-  rule [asWord-eq-false]: #asWord ( B:Bytes ) ==Int X:Int => false requires X =/=Int #asWord ( B ) [simplification]
+  // Note: there is intentionally no `asWord-eq-false` analogue of the `<Int`/`<=Int` rules above.
+  // Such a rule (`#asWord(B) ==Int X => false requires X =/=Int #asWord(B)`) is not loop-safe: when
+  // both operands are `#asWord`, applying it rewrites the equality into its own `requires` with the
+  // two operands swapped — still `#asWord`-on-the-left — so it re-matches and recurses without bound
+  // in the Kore simplifier. `<Int`/`<=Int` are antisymmetric, so their swapped requires is a
+  // different relation and does not re-match. See `docs/asword-eq-false-loop.md`.
 
   // Comparison: `#asWord` of `+Bytes` when lower bytes match, with `<Int`
   rule [asWord-concat-lt]:

diff --git a/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/lemmas/int-simplification.k b/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/lemmas/int-simplification.k
@@ -287,7 +287,15 @@ module INT-SIMPLIFICATION-COMMON
     rule A ==Int B => false requires 0 <=Int A andBool B <Int 0 [simplification, concrete(B)]
 
     // A ==Int B is false when A < B is a known path-condition fact (B concrete).
-    rule [eq-false-lt]: A:Int ==Int B:Int => false requires A <Int B [simplification, concrete(B)]
+    // TEMPORARILY DISABLED (evm-semantics #2859 / kontrol #1153): this rule drives the CSE Kore-simplifier
+    // non-termination. During the Kore definedness check of `#Ceil(<accounts> AccountCellMap)`, the account
+    // distinctness predicates `ACCT_ID ==Int <concrete address> => false` invite this rule, whose side
+    // condition `A <Int B` is undetermined for a range-bounded symbolic account id and a large concrete
+    // address — so it cannot fire but spawns a ~500ms SMT round-trip per attempt, repeated as the constraint
+    // simplifier re-presents the predicate without reaching a fixpoint. Confirmed the sole rule driving those
+    // undetermined SMT calls (removing it took the rule-driven SMT count from 45 to 0 on the exemplar).
+    // Re-introduce once Booster performs definedness without falling back to the Kore simplifier.
+    // rule [eq-false-lt]: A:Int ==Int B:Int => false requires A <Int B [simplification, concrete(B)]
 
     rule 0 <Int X => true requires 0 <=Int X andBool notBool (X ==Int 0) [simplification(60)]
 

diff --git a/kevm-pyk/src/tests/conftest.py b/kevm-pyk/src/tests/conftest.py
@@ -91,6 +91,16 @@ def pytest_addoption(parser: Parser) -> None:
         default=False,
         help='Skip the Kore simplification pass after Booster for all simplify/execute/implies calls.',
     )
+    parser.addoption(
+        '--equation-max-local-steps',
+        type=int,
+        default=20,
+        help=(
+            'Booster equation budget for in-place evaluation at rewritten subterms '
+            '(default: 20; 0 = restart-only). Threaded to kore-rpc-booster / booster-dev '
+            'for all proof tests.'
+        ),
+    )
 
 
 @pytest.fixture
@@ -161,3 +171,8 @@ def booster_log_levels(request: FixtureRequest) -> list[str] | None:
 @pytest.fixture(scope='session')
 def booster_only_simplify(request: FixtureRequest) -> bool:
     return request.config.getoption('--booster-only-simplify')
+
+
+@pytest.fixture(scope='session')
+def equation_max_local_steps(request: FixtureRequest) -> int:
+    return request.config.getoption('--equation-max-local-steps')
diff --git a/kevm-pyk/src/tests/integration/test_prove.py b/kevm-pyk/src/tests/integration/test_prove.py
@@ -198,6 +198,7 @@ def _test_prove(
     workers: int | None = None,
     direct_subproof_rules: bool = False,
     booster_only_simplify: bool = False,
+    equation_max_local_steps: int = 20,
 ) -> None:
     caplog.set_level(logging.INFO)
 
@@ -254,6 +255,7 @@ def _test_prove(
             'direct_subproof_rules': direct_subproof_rules,
             'booster_only_simplify': booster_only_simplify,
             'claim_labels': claim_labels,
+            'equation_max_local_steps': equation_max_local_steps,
         }
         if haskell_logging:
             options_dict['haskell_log_entries'] = booster_log_levels or list(HASKELL_LOGGING_ENTRIES)
@@ -362,6 +364,7 @@ def test_prove_rules(
     booster_log_levels: list[str] | None,
     claim_labels: list[str] | None,
     booster_only_simplify: bool,
+    equation_max_local_steps: int,
 ) -> None:
     _test_prove(
         spec_file,
@@ -378,6 +381,7 @@ def test_prove_rules(
         booster_log_levels=booster_log_levels,
         claim_labels=claim_labels,
         booster_only_simplify=booster_only_simplify,
+        equation_max_local_steps=equation_max_local_steps,
     )
 
 
@@ -400,6 +404,7 @@ def test_prove_functional(
     booster_log_levels: list[str] | None,
     claim_labels: list[str] | None,
     booster_only_simplify: bool,
+    equation_max_local_steps: int,
 ) -> None:
     _test_prove(
         spec_file,
@@ -417,6 +422,7 @@ def test_prove_functional(
         claim_labels=claim_labels,
         workers=8,
         booster_only_simplify=booster_only_simplify,
+        equation_max_local_steps=equation_max_local_steps,
     )
 
 
@@ -430,6 +436,7 @@ def test_prove_dss(
     booster_log_levels: list[str] | None,
     claim_labels: list[str] | None,
     booster_only_simplify: bool,
+    equation_max_local_steps: int,
 ) -> None:
     for spec_file in [REPO_ROOT / 'tests/specs/mcd/vat-spec.k', REPO_ROOT / 'tests/specs/mcd-structured/vat-spec.k']:
         _test_prove(
@@ -449,6 +456,7 @@ def test_prove_dss(
             workers=8,
             direct_subproof_rules=True,
             booster_only_simplify=booster_only_simplify,
+            equation_max_local_steps=equation_max_local_steps,
         )
 
 

diff --git a/tests/specs/functional/asword-eq-false-loop-spec.k b/tests/specs/functional/asword-eq-false-loop-spec.k
@@ -0,0 +1,43 @@
+requires "verification.k"
+
+module ASWORD-EQ-FALSE-LOOP-SPEC
+    imports VERIFICATION
+
+    // Regression test for the non-terminating `asWord-eq-false` simplification loop
+    // (evm-semantics #2859 / kontrol #1153). The offending rule has been removed from
+    // `lemmas/evm-int-simplification.k`; these claims guard against it being reintroduced.
+    //
+    // The removed rule was:
+    //     rule [asWord-eq-false]: #asWord(B) ==Int X => false requires X =/=Int #asWord(B) [simplification]
+    // It is loop-safe ONLY when `X` is not itself an `#asWord(_)`. When BOTH sides of the
+    // `==Int` are `#asWord` terms, applying it rewrites the equality into its own `requires`
+    // with the two `#asWord` operands swapped — still `#asWord`-on-the-left — so it matches
+    // again, swaps back, and recurses without bound (observed: one request looping >21 min at
+    // 96% CPU, the rule nested 5,406 deep in the Kore simplifier).
+    //
+    // Each claim below asks the simplifier to evaluate an equality with `#asWord` on BOTH
+    // sides, known unequal from the path condition. Expected result: `false`, discharged
+    // against the path condition. If the looping rule is reintroduced, simplification of the
+    // left-hand `runLemma` term loops and the proof TIMES OUT. `asWord-lt-false` /
+    // `asWord-le-false` need no analogue: `<Int` / `<=Int` are antisymmetric, so their
+    // swapped requires is a different, non-matching relation.
+
+    // Minimal trigger: two symbolic byte-words, known unequal.
+    claim [asword-eq-false-symmetric]:
+      <k> runLemma ( #asWord ( B1:Bytes ) ==Int #asWord ( B2:Bytes ) ) => doneLemma ( false ) ... </k>
+      requires #asWord ( B1 ) =/=Int #asWord ( B2 )
+
+    // The shape actually observed in the CSE proofs: a plain word vs an `#asWord` over a
+    // buffer expression (a `+Bytes` of symbolic operands does not reduce, so the right side
+    // stays `#asWord`, keeping both operands `#asWord` exactly as in the captured loop term).
+    claim [asword-eq-false-buffer]:
+      <k> runLemma ( #asWord ( B1:Bytes ) ==Int #asWord ( B2:Bytes +Bytes B3:Bytes ) ) => doneLemma ( false ) ... </k>
+      requires #asWord ( B1 ) =/=Int #asWord ( B2 +Bytes B3 )
+
+    // Same hazard reached via `=/=Int` (which desugars to `notBool(_ ==Int_)`), the form in
+    // which the loop term appeared inside another rule's requires.
+    claim [asword-neq-true-symmetric]:
+      <k> runLemma ( #asWord ( B1:Bytes ) =/=Int #asWord ( B2:Bytes ) ) => doneLemma ( true ) ... </k>
+      requires #asWord ( B1 ) =/=Int #asWord ( B2 )
+
+endmodule
diff --git a/tests/specs/functional/eq-false-lt-loop-spec.k b/tests/specs/functional/eq-false-lt-loop-spec.k
@@ -0,0 +1,68 @@
+requires "verification.k"
+
+module EQ-FALSE-LT-LOOP-SPEC
+    imports VERIFICATION
+
+    // Regression exemplar for the SECOND CSE non-termination layer (evm-semantics #2859 / kontrol #1153),
+    // the one that remains after `asWord-eq-false` is removed. See `evm-semantics-cse-loop-change-request.md`.
+    //
+    // HOW THIS WAS FOUND: rebuilding KEVM with `asWord-eq-false` deleted (branch `asword-eq-false-loop-fix`,
+    // == kontrol-side "variant A") does NOT stop the CSE proofs hanging — proof #4
+    // `ArithmeticCallTest.test_double_add_sub_external` still spins >25 min in one `execute` request. The
+    // loop simply RELOCATES. Server-side context logging of the hanging request shows two rules dominating,
+    // ~16-17k attempts each, in two different engines under the same execute request:
+    //
+    //   * booster > execute > simplify > … > [eq-false-lt] > smt        (15 865 attempts)
+    //       rule [eq-false-lt]: A:Int ==Int B:Int => false requires A <Int B [simplification, concrete(B)]
+    //   * kore > execute > constraint > term:90151e4 > <AccountCellMap #Ceil> > success   (17 082 attempts,
+    //       re-firing on the SAME constraint term)
+    //
+    // MECHANISM (a two-engine fixpoint failure, NOT a single self-loop like asWord-eq-false):
+    //   1. `#Ceil(<accounts> AccountCellMap)` definedness requires the account ids be DISTINCT, i.e. it emits
+    //      predicates of the form `ACCOUNT_ID ==Int <concrete account address> => false`.
+    //   2. `eq-false-lt` (concrete RHS) is invited to discharge each one via its side condition
+    //      `ACCOUNT_ID <Int <concrete address>`. With ACCOUNT_ID a symbolic account id constrained only to a
+    //      range (e.g. `[0, pow160)`) that does NOT decide the comparison, SMT returns (Sat,Sat) —
+    //      UNDETERMINED (2 518 such verdicts captured). The rule cannot fire and the predicate cannot reduce.
+    //   3. Because the account-distinctness predicate never simplifies to a stable form, the Kore constraint
+    //      simplifier never reaches a fixpoint and re-evaluates the AccountCellMap `#Ceil` on the same term
+    //      thousands of times — each pass paying a fresh ~500 ms undetermined SMT round-trip per account id.
+    //
+    // The captured concrete address is `645326474426547203313410069153905908525362434349` (a 160-bit value);
+    // the symbolic ids were CALLER_ID, ORIGIN_ID, and the deployed-contract ids.
+    //
+    // SCOPE / CAVEAT — READ BEFORE RELYING ON THIS:
+    //   The faithful trigger is the INTERACTION of AccountCellMap definedness with `eq-false-lt`, driven by
+    //   the Kore constraint simplifier's lack of a fixpoint guard. A bare `runLemma(ACCT ==Int <concrete>)`
+    //   (claim 1 below) reproduces the EXPENSIVE-UNDETERMINED-SMT step but will most likely return the term
+    //   undetermined ONCE rather than spin, because the AccountCellMap `#Ceil` that re-presents the predicate
+    //   is absent. Claim 2 adds a two-account `<accounts>` cell so the definedness machinery is in play; this
+    //   is the closer reproduction, but whether it spins depends on the backend's constraint-simplification
+    //   fixpoint behaviour, which is the real defect. Please run BOTH on the buggy build and report which
+    //   actually loops; the primary recommendation to the team is a backend fixpoint/termination guard plus
+    //   making `eq-false-lt` cheap (or skipped) when `A <Int B` is undetermined — not a lemma deletion.
+
+    // Claim 1 — the eq-false-lt undetermined-SMT step in isolation (the per-pass cost of the loop).
+    // Exactly the captured comparison: symbolic account id vs the concrete address, range-bounded so that
+    // `ACCT_ID <Int 645…349` is undetermined.
+    claim [eq-false-lt-undetermined-acctid]:
+      <k> runLemma ( ACCT_ID:Int ==Int 645326474426547203313410069153905908525362434349 )
+       => doneLemma ( ?_RESULT:Bool ) ... </k>
+      requires 0 <=Int ACCT_ID andBool ACCT_ID <Int pow160
+
+    // Claim 2 — closer reproduction: two symbolic-id accounts in the <accounts> cell, so evaluating the
+    // configuration's definedness invokes the AccountCellMap `#Ceil` that emits the account-distinctness
+    // predicate `ACCT_ID_1 ==Int ACCT_ID_2 => false`, which `eq-false-lt` then tries (and fails) to discharge.
+    // Both ids are range-bounded but unordered, so the distinctness comparison is undetermined — the captured
+    // shape. (Adjust the cell context to match the project's VERIFICATION module if `<accounts>` is wrapped.)
+    claim [eq-false-lt-acctmap-definedness]:
+      <k> runLemma ( .Bytes ) => doneLemma ( .Bytes ) ... </k>
+      <accounts>
+        <account> <acctID> ACCT_ID_1:Int </acctID> ... </account>
+        <account> <acctID> ACCT_ID_2:Int </acctID> ... </account>
+        ...
+      </accounts>
+      requires 0 <=Int ACCT_ID_1 andBool ACCT_ID_1 <Int pow160
+       andBool 0 <=Int ACCT_ID_2 andBool ACCT_ID_2 <Int pow160
+
+endmodule
diff --git a/tests/specs/functional/lemmas-no-smt-spec.k b/tests/specs/functional/lemmas-no-smt-spec.k
@@ -85,4 +85,12 @@ module LEMMAS-NO-SMT-SPEC
     claim [powByteLen-lt-concrete]:
       <k> runLemma ( 31 <Int #powByteLen(32) ) => doneLemma ( true ) ... </k>
 
+    // powByteLen-lt-concrete-false: CONST <Int #powByteLen(N) => false when CONST does
+    // not fit into N bytes. The companion false-direction of the rule above; together they
+    // make the concrete comparison total. Regression for the `asWord-unif-03/04` lemmas-spec
+    // claims, which decompose an out-of-range `#asWord(B) ==Int CONST` into exactly this
+    // comparison and previously relied on the (removed, looping) `asWord-eq-false` to close.
+    claim [powByteLen-lt-concrete-false]:
+      <k> runLemma ( 2 ^Int 160 <Int #powByteLen(16) ) => doneLemma ( false ) ... </k>
+
 endmodule