diff --git a/kevm-pyk/src/kevm_pyk/__main__.py b/kevm-pyk/src/kevm_pyk/__main__.py index 72457cc9a4..d0cc9326f6 100644 --- a/kevm-pyk/src/kevm_pyk/__main__.py +++ b/kevm-pyk/src/kevm_pyk/__main__.py @@ -254,6 +254,13 @@ def exec_prove(options: ProveOptions) -> None: else: kore_rpc_command = options.kore_rpc_command + # Both booster servers parse the flag (shared Booster.CLOptions); plain kore-rpc does not, + # so the budget is skipped (with a warning) rather than rejected now that it defaults on. + if Path(kore_rpc_command[0]).name in ('kore-rpc-booster', 'booster-dev'): + kore_rpc_command += ('--equation-max-local-steps', str(options.equation_max_local_steps)) + else: + _LOGGER.warning(f'Ignoring --equation-max-local-steps for non-booster server: {kore_rpc_command[0]}') + def is_functional(claim: KClaim) -> bool: claim_lhs = claim.body if type(claim_lhs) is KRewrite: diff --git a/kevm-pyk/src/kevm_pyk/cli.py b/kevm-pyk/src/kevm_pyk/cli.py index cf38e22d2c..65fbd14a4d 100644 --- a/kevm-pyk/src/kevm_pyk/cli.py +++ b/kevm-pyk/src/kevm_pyk/cli.py @@ -192,6 +192,17 @@ def _create_argument_parser() -> ArgumentParser: action='store_true', help='Skip the Kore simplification pass after Booster; assume_defined still uses Kore for #Ceil evaluation.', ) + prove_args.add_argument( + '--equation-max-local-steps', + dest='equation_max_local_steps', + type=int, + default=None, + help=( + 'Booster equation budget for in-place evaluation at rewritten subterms before restarting ' + 'traversal from the top (default: 20; 0 = restart-only). Passed through to ' + 'kore-rpc-booster / booster-dev; requires a haskell-backend that supports the flag.' + ), + ) prove_args.add_argument( '--max-frontier-parallel', type=int, @@ -611,6 +622,7 @@ class ProveOptions( ): reinit: bool booster_only_simplify: bool + equation_max_local_steps: int max_frontier_parallel: int @staticmethod @@ -618,6 +630,7 @@ def default() -> dict[str, Any]: return { 'reinit': False, 'booster_only_simplify': False, + 'equation_max_local_steps': 20, 'max_frontier_parallel': 1, } diff --git a/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/buf.md b/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/buf.md index 1ea46865b8..3a8912d385 100644 --- a/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/buf.md +++ b/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/buf.md @@ -48,6 +48,13 @@ module BUF rule [powByteLen-lt-concrete]: CONST true requires 0 <=Int N andBool CONST false + requires 0 <=Int N andBool 2 ^Int (8 *Int N) <=Int CONST + [simplification, concrete(CONST, N), preserves-definedness] rule #write(WM, IDX, VAL) => WM [ IDX := #buf(1, VAL) ] [simplification] rule #bufStrict(SIZE, DATA) => #buf(SIZE, DATA) diff --git a/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/lemmas/evm-int-simplification.k b/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/lemmas/evm-int-simplification.k index f1735b4664..f04d4ccdd9 100644 --- a/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/lemmas/evm-int-simplification.k +++ b/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/lemmas/evm-int-simplification.k @@ -136,12 +136,12 @@ module EVM-INT-SIMPLIFICATION // neither rule fires on the other's requires. rule [asWord-lt-false]: #asWord ( B:Bytes ) false requires X <=Int #asWord ( B ) [simplification] rule [asWord-le-false]: #asWord ( B:Bytes ) <=Int X:Int => false requires X false requires X =/=Int #asWord ( B ) [simplification] + // Note: there is intentionally no `asWord-eq-false` analogue of the ` false requires X =/=Int #asWord(B)`) is not loop-safe: when + // both operands are `#asWord`, applying it rewrites the equality into its own `requires` with the + // two operands swapped — still `#asWord`-on-the-left — so it re-matches and recurses without bound + // in the Kore simplifier. ` false requires 0 <=Int A andBool B false requires A AccountCellMap)`, the account + // distinctness predicates `ACCT_ID ==Int => false` invite this rule, whose side + // condition `A false requires A true requires 0 <=Int X andBool notBool (X ==Int 0) [simplification(60)] diff --git a/kevm-pyk/src/tests/conftest.py b/kevm-pyk/src/tests/conftest.py index f1c6787e2d..2a88ee08ed 100644 --- a/kevm-pyk/src/tests/conftest.py +++ b/kevm-pyk/src/tests/conftest.py @@ -91,6 +91,16 @@ def pytest_addoption(parser: Parser) -> None: default=False, help='Skip the Kore simplification pass after Booster for all simplify/execute/implies calls.', ) + parser.addoption( + '--equation-max-local-steps', + type=int, + default=20, + help=( + 'Booster equation budget for in-place evaluation at rewritten subterms ' + '(default: 20; 0 = restart-only). Threaded to kore-rpc-booster / booster-dev ' + 'for all proof tests.' + ), + ) @pytest.fixture @@ -161,3 +171,8 @@ def booster_log_levels(request: FixtureRequest) -> list[str] | None: @pytest.fixture(scope='session') def booster_only_simplify(request: FixtureRequest) -> bool: return request.config.getoption('--booster-only-simplify') + + +@pytest.fixture(scope='session') +def equation_max_local_steps(request: FixtureRequest) -> int: + return request.config.getoption('--equation-max-local-steps') diff --git a/kevm-pyk/src/tests/integration/test_prove.py b/kevm-pyk/src/tests/integration/test_prove.py index 0ee7b51922..ab628db272 100644 --- a/kevm-pyk/src/tests/integration/test_prove.py +++ b/kevm-pyk/src/tests/integration/test_prove.py @@ -198,6 +198,7 @@ def _test_prove( workers: int | None = None, direct_subproof_rules: bool = False, booster_only_simplify: bool = False, + equation_max_local_steps: int = 20, ) -> None: caplog.set_level(logging.INFO) @@ -254,6 +255,7 @@ def _test_prove( 'direct_subproof_rules': direct_subproof_rules, 'booster_only_simplify': booster_only_simplify, 'claim_labels': claim_labels, + 'equation_max_local_steps': equation_max_local_steps, } if haskell_logging: options_dict['haskell_log_entries'] = booster_log_levels or list(HASKELL_LOGGING_ENTRIES) @@ -362,6 +364,7 @@ def test_prove_rules( booster_log_levels: list[str] | None, claim_labels: list[str] | None, booster_only_simplify: bool, + equation_max_local_steps: int, ) -> None: _test_prove( spec_file, @@ -378,6 +381,7 @@ def test_prove_rules( booster_log_levels=booster_log_levels, claim_labels=claim_labels, booster_only_simplify=booster_only_simplify, + equation_max_local_steps=equation_max_local_steps, ) @@ -400,6 +404,7 @@ def test_prove_functional( booster_log_levels: list[str] | None, claim_labels: list[str] | None, booster_only_simplify: bool, + equation_max_local_steps: int, ) -> None: _test_prove( spec_file, @@ -417,6 +422,7 @@ def test_prove_functional( claim_labels=claim_labels, workers=8, booster_only_simplify=booster_only_simplify, + equation_max_local_steps=equation_max_local_steps, ) @@ -430,6 +436,7 @@ def test_prove_dss( booster_log_levels: list[str] | None, claim_labels: list[str] | None, booster_only_simplify: bool, + equation_max_local_steps: int, ) -> None: for spec_file in [REPO_ROOT / 'tests/specs/mcd/vat-spec.k', REPO_ROOT / 'tests/specs/mcd-structured/vat-spec.k']: _test_prove( @@ -449,6 +456,7 @@ def test_prove_dss( workers=8, direct_subproof_rules=True, booster_only_simplify=booster_only_simplify, + equation_max_local_steps=equation_max_local_steps, ) diff --git a/tests/specs/functional/asword-eq-false-loop-spec.k b/tests/specs/functional/asword-eq-false-loop-spec.k new file mode 100644 index 0000000000..8b2c361b45 --- /dev/null +++ b/tests/specs/functional/asword-eq-false-loop-spec.k @@ -0,0 +1,43 @@ +requires "verification.k" + +module ASWORD-EQ-FALSE-LOOP-SPEC + imports VERIFICATION + + // Regression test for the non-terminating `asWord-eq-false` simplification loop + // (evm-semantics #2859 / kontrol #1153). The offending rule has been removed from + // `lemmas/evm-int-simplification.k`; these claims guard against it being reintroduced. + // + // The removed rule was: + // rule [asWord-eq-false]: #asWord(B) ==Int X => false requires X =/=Int #asWord(B) [simplification] + // It is loop-safe ONLY when `X` is not itself an `#asWord(_)`. When BOTH sides of the + // `==Int` are `#asWord` terms, applying it rewrites the equality into its own `requires` + // with the two `#asWord` operands swapped — still `#asWord`-on-the-left — so it matches + // again, swaps back, and recurses without bound (observed: one request looping >21 min at + // 96% CPU, the rule nested 5,406 deep in the Kore simplifier). + // + // Each claim below asks the simplifier to evaluate an equality with `#asWord` on BOTH + // sides, known unequal from the path condition. Expected result: `false`, discharged + // against the path condition. If the looping rule is reintroduced, simplification of the + // left-hand `runLemma` term loops and the proof TIMES OUT. `asWord-lt-false` / + // `asWord-le-false` need no analogue: ` runLemma ( #asWord ( B1:Bytes ) ==Int #asWord ( B2:Bytes ) ) => doneLemma ( false ) ... + requires #asWord ( B1 ) =/=Int #asWord ( B2 ) + + // The shape actually observed in the CSE proofs: a plain word vs an `#asWord` over a + // buffer expression (a `+Bytes` of symbolic operands does not reduce, so the right side + // stays `#asWord`, keeping both operands `#asWord` exactly as in the captured loop term). + claim [asword-eq-false-buffer]: + runLemma ( #asWord ( B1:Bytes ) ==Int #asWord ( B2:Bytes +Bytes B3:Bytes ) ) => doneLemma ( false ) ... + requires #asWord ( B1 ) =/=Int #asWord ( B2 +Bytes B3 ) + + // Same hazard reached via `=/=Int` (which desugars to `notBool(_ ==Int_)`), the form in + // which the loop term appeared inside another rule's requires. + claim [asword-neq-true-symmetric]: + runLemma ( #asWord ( B1:Bytes ) =/=Int #asWord ( B2:Bytes ) ) => doneLemma ( true ) ... + requires #asWord ( B1 ) =/=Int #asWord ( B2 ) + +endmodule diff --git a/tests/specs/functional/eq-false-lt-loop-spec.k b/tests/specs/functional/eq-false-lt-loop-spec.k new file mode 100644 index 0000000000..b0875c256e --- /dev/null +++ b/tests/specs/functional/eq-false-lt-loop-spec.k @@ -0,0 +1,68 @@ +requires "verification.k" + +module EQ-FALSE-LT-LOOP-SPEC + imports VERIFICATION + + // Regression exemplar for the SECOND CSE non-termination layer (evm-semantics #2859 / kontrol #1153), + // the one that remains after `asWord-eq-false` is removed. See `evm-semantics-cse-loop-change-request.md`. + // + // HOW THIS WAS FOUND: rebuilding KEVM with `asWord-eq-false` deleted (branch `asword-eq-false-loop-fix`, + // == kontrol-side "variant A") does NOT stop the CSE proofs hanging — proof #4 + // `ArithmeticCallTest.test_double_add_sub_external` still spins >25 min in one `execute` request. The + // loop simply RELOCATES. Server-side context logging of the hanging request shows two rules dominating, + // ~16-17k attempts each, in two different engines under the same execute request: + // + // * booster > execute > simplify > … > [eq-false-lt] > smt (15 865 attempts) + // rule [eq-false-lt]: A:Int ==Int B:Int => false requires A execute > constraint > term:90151e4 > > success (17 082 attempts, + // re-firing on the SAME constraint term) + // + // MECHANISM (a two-engine fixpoint failure, NOT a single self-loop like asWord-eq-false): + // 1. `#Ceil( AccountCellMap)` definedness requires the account ids be DISTINCT, i.e. it emits + // predicates of the form `ACCOUNT_ID ==Int => false`. + // 2. `eq-false-lt` (concrete RHS) is invited to discharge each one via its side condition + // `ACCOUNT_ID `. With ACCOUNT_ID a symbolic account id constrained only to a + // range (e.g. `[0, pow160)`) that does NOT decide the comparison, SMT returns (Sat,Sat) — + // UNDETERMINED (2 518 such verdicts captured). The rule cannot fire and the predicate cannot reduce. + // 3. Because the account-distinctness predicate never simplifies to a stable form, the Kore constraint + // simplifier never reaches a fixpoint and re-evaluates the AccountCellMap `#Ceil` on the same term + // thousands of times — each pass paying a fresh ~500 ms undetermined SMT round-trip per account id. + // + // The captured concrete address is `645326474426547203313410069153905908525362434349` (a 160-bit value); + // the symbolic ids were CALLER_ID, ORIGIN_ID, and the deployed-contract ids. + // + // SCOPE / CAVEAT — READ BEFORE RELYING ON THIS: + // The faithful trigger is the INTERACTION of AccountCellMap definedness with `eq-false-lt`, driven by + // the Kore constraint simplifier's lack of a fixpoint guard. A bare `runLemma(ACCT ==Int )` + // (claim 1 below) reproduces the EXPENSIVE-UNDETERMINED-SMT step but will most likely return the term + // undetermined ONCE rather than spin, because the AccountCellMap `#Ceil` that re-presents the predicate + // is absent. Claim 2 adds a two-account `` cell so the definedness machinery is in play; this + // is the closer reproduction, but whether it spins depends on the backend's constraint-simplification + // fixpoint behaviour, which is the real defect. Please run BOTH on the buggy build and report which + // actually loops; the primary recommendation to the team is a backend fixpoint/termination guard plus + // making `eq-false-lt` cheap (or skipped) when `A runLemma ( ACCT_ID:Int ==Int 645326474426547203313410069153905908525362434349 ) + => doneLemma ( ?_RESULT:Bool ) ... + requires 0 <=Int ACCT_ID andBool ACCT_ID cell, so evaluating the + // configuration's definedness invokes the AccountCellMap `#Ceil` that emits the account-distinctness + // predicate `ACCT_ID_1 ==Int ACCT_ID_2 => false`, which `eq-false-lt` then tries (and fails) to discharge. + // Both ids are range-bounded but unordered, so the distinctness comparison is undetermined — the captured + // shape. (Adjust the cell context to match the project's VERIFICATION module if `` is wrapped.) + claim [eq-false-lt-acctmap-definedness]: + runLemma ( .Bytes ) => doneLemma ( .Bytes ) ... + + ACCT_ID_1:Int ... + ACCT_ID_2:Int ... + ... + + requires 0 <=Int ACCT_ID_1 andBool ACCT_ID_1 runLemma ( 31 doneLemma ( true ) ... + // powByteLen-lt-concrete-false: CONST false when CONST does + // not fit into N bytes. The companion false-direction of the rule above; together they + // make the concrete comparison total. Regression for the `asWord-unif-03/04` lemmas-spec + // claims, which decompose an out-of-range `#asWord(B) ==Int CONST` into exactly this + // comparison and previously relied on the (removed, looping) `asWord-eq-false` to close. + claim [powByteLen-lt-concrete-false]: + runLemma ( 2 ^Int 160 doneLemma ( false ) ... + endmodule