Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions kevm-pyk/src/kevm_pyk/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,13 @@ def exec_prove(options: ProveOptions) -> None:
else:
kore_rpc_command = options.kore_rpc_command

# Both booster servers parse the flag (shared Booster.CLOptions); plain kore-rpc does not,
# so the budget is skipped (with a warning) rather than rejected now that it defaults on.
if Path(kore_rpc_command[0]).name in ('kore-rpc-booster', 'booster-dev'):
kore_rpc_command += ('--equation-max-local-steps', str(options.equation_max_local_steps))
else:
_LOGGER.warning(f'Ignoring --equation-max-local-steps for non-booster server: {kore_rpc_command[0]}')

def is_functional(claim: KClaim) -> bool:
claim_lhs = claim.body
if type(claim_lhs) is KRewrite:
Expand Down
13 changes: 13 additions & 0 deletions kevm-pyk/src/kevm_pyk/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,17 @@ def _create_argument_parser() -> ArgumentParser:
action='store_true',
help='Skip the Kore simplification pass after Booster; assume_defined still uses Kore for #Ceil evaluation.',
)
prove_args.add_argument(
'--equation-max-local-steps',
dest='equation_max_local_steps',
type=int,
default=None,
help=(
'Booster equation budget for in-place evaluation at rewritten subterms before restarting '
'traversal from the top (default: 20; 0 = restart-only). Passed through to '
'kore-rpc-booster / booster-dev; requires a haskell-backend that supports the flag.'
),
)
prove_args.add_argument(
'--max-frontier-parallel',
type=int,
Expand Down Expand Up @@ -611,13 +622,15 @@ class ProveOptions(
):
reinit: bool
booster_only_simplify: bool
equation_max_local_steps: int
max_frontier_parallel: int

@staticmethod
def default() -> dict[str, Any]:
return {
'reinit': False,
'booster_only_simplify': False,
'equation_max_local_steps': 20,
'max_frontier_parallel': 1,
}

Expand Down
7 changes: 7 additions & 0 deletions kevm-pyk/src/kevm_pyk/kproj/evm-semantics/buf.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ module BUF
rule [powByteLen-lt-concrete]: CONST <Int #powByteLen(N) => true
requires 0 <=Int N andBool CONST <Int 2 ^Int (8 *Int N)
[simplification, concrete(CONST, N), preserves-definedness]
// Companion false-direction: a concrete CONST that does not fit into N bytes is not
// less than #powByteLen(N). Together with the rule above this makes the concrete
// comparison total. The two requires are mutually exclusive and neither RHS re-exposes
// `_ <Int #powByteLen(_)`, so the pair is loop-safe.
rule [powByteLen-lt-concrete-false]: CONST <Int #powByteLen(N) => false
requires 0 <=Int N andBool 2 ^Int (8 *Int N) <=Int CONST
[simplification, concrete(CONST, N), preserves-definedness]
rule #write(WM, IDX, VAL) => WM [ IDX := #buf(1, VAL) ] [simplification]

rule #bufStrict(SIZE, DATA) => #buf(SIZE, DATA)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,12 @@ module EVM-INT-SIMPLIFICATION
// neither rule fires on the other's requires.
rule [asWord-lt-false]: #asWord ( B:Bytes ) <Int X:Int => false requires X <=Int #asWord ( B ) [simplification]
rule [asWord-le-false]: #asWord ( B:Bytes ) <=Int X:Int => false requires X <Int #asWord ( B ) [simplification]
// Comparison: `#asWord(B) == X` is false when the path condition says X =/= #asWord(B).
// The requires X =/=Int #asWord(B) expands to notBool(X ==Int #asWord(B)) via the K builtin;
// since ==Int is comm, this matches the path condition fact notBool(#asWord(B) ==Int X).
// No loop: our rule matches #asWord on the LEFT; after builtin expansion the recursive
// term X ==Int #asWord(B) has X on the LEFT, so this rule does not fire on it.
rule [asWord-eq-false]: #asWord ( B:Bytes ) ==Int X:Int => false requires X =/=Int #asWord ( B ) [simplification]
// Note: there is intentionally no `asWord-eq-false` analogue of the `<Int`/`<=Int` rules above.
// Such a rule (`#asWord(B) ==Int X => false requires X =/=Int #asWord(B)`) is not loop-safe: when
// both operands are `#asWord`, applying it rewrites the equality into its own `requires` with the
// two operands swapped — still `#asWord`-on-the-left — so it re-matches and recurses without bound
// in the Kore simplifier. `<Int`/`<=Int` are antisymmetric, so their swapped requires is a
// different relation and does not re-match. See `docs/asword-eq-false-loop.md`.

// Comparison: `#asWord` of `+Bytes` when lower bytes match, with `<Int`
rule [asWord-concat-lt]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,15 @@ module INT-SIMPLIFICATION-COMMON
rule A ==Int B => false requires 0 <=Int A andBool B <Int 0 [simplification, concrete(B)]

// A ==Int B is false when A < B is a known path-condition fact (B concrete).
rule [eq-false-lt]: A:Int ==Int B:Int => false requires A <Int B [simplification, concrete(B)]
// TEMPORARILY DISABLED (evm-semantics #2859 / kontrol #1153): this rule drives the CSE Kore-simplifier
// non-termination. During the Kore definedness check of `#Ceil(<accounts> AccountCellMap)`, the account
// distinctness predicates `ACCT_ID ==Int <concrete address> => false` invite this rule, whose side
// condition `A <Int B` is undetermined for a range-bounded symbolic account id and a large concrete
// address — so it cannot fire but spawns a ~500ms SMT round-trip per attempt, repeated as the constraint
// simplifier re-presents the predicate without reaching a fixpoint. Confirmed the sole rule driving those
// undetermined SMT calls (removing it took the rule-driven SMT count from 45 to 0 on the exemplar).
// Re-introduce once Booster performs definedness without falling back to the Kore simplifier.
// rule [eq-false-lt]: A:Int ==Int B:Int => false requires A <Int B [simplification, concrete(B)]

rule 0 <Int X => true requires 0 <=Int X andBool notBool (X ==Int 0) [simplification(60)]

Expand Down
15 changes: 15 additions & 0 deletions kevm-pyk/src/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,16 @@ def pytest_addoption(parser: Parser) -> None:
default=False,
help='Skip the Kore simplification pass after Booster for all simplify/execute/implies calls.',
)
parser.addoption(
'--equation-max-local-steps',
type=int,
default=20,
help=(
'Booster equation budget for in-place evaluation at rewritten subterms '
'(default: 20; 0 = restart-only). Threaded to kore-rpc-booster / booster-dev '
'for all proof tests.'
),
)


@pytest.fixture
Expand Down Expand Up @@ -161,3 +171,8 @@ def booster_log_levels(request: FixtureRequest) -> list[str] | None:
@pytest.fixture(scope='session')
def booster_only_simplify(request: FixtureRequest) -> bool:
return request.config.getoption('--booster-only-simplify')


@pytest.fixture(scope='session')
def equation_max_local_steps(request: FixtureRequest) -> int:
return request.config.getoption('--equation-max-local-steps')
8 changes: 8 additions & 0 deletions kevm-pyk/src/tests/integration/test_prove.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def _test_prove(
workers: int | None = None,
direct_subproof_rules: bool = False,
booster_only_simplify: bool = False,
equation_max_local_steps: int = 20,
) -> None:
caplog.set_level(logging.INFO)

Expand Down Expand Up @@ -254,6 +255,7 @@ def _test_prove(
'direct_subproof_rules': direct_subproof_rules,
'booster_only_simplify': booster_only_simplify,
'claim_labels': claim_labels,
'equation_max_local_steps': equation_max_local_steps,
}
if haskell_logging:
options_dict['haskell_log_entries'] = booster_log_levels or list(HASKELL_LOGGING_ENTRIES)
Expand Down Expand Up @@ -362,6 +364,7 @@ def test_prove_rules(
booster_log_levels: list[str] | None,
claim_labels: list[str] | None,
booster_only_simplify: bool,
equation_max_local_steps: int,
) -> None:
_test_prove(
spec_file,
Expand All @@ -378,6 +381,7 @@ def test_prove_rules(
booster_log_levels=booster_log_levels,
claim_labels=claim_labels,
booster_only_simplify=booster_only_simplify,
equation_max_local_steps=equation_max_local_steps,
)


Expand All @@ -400,6 +404,7 @@ def test_prove_functional(
booster_log_levels: list[str] | None,
claim_labels: list[str] | None,
booster_only_simplify: bool,
equation_max_local_steps: int,
) -> None:
_test_prove(
spec_file,
Expand All @@ -417,6 +422,7 @@ def test_prove_functional(
claim_labels=claim_labels,
workers=8,
booster_only_simplify=booster_only_simplify,
equation_max_local_steps=equation_max_local_steps,
)


Expand All @@ -430,6 +436,7 @@ def test_prove_dss(
booster_log_levels: list[str] | None,
claim_labels: list[str] | None,
booster_only_simplify: bool,
equation_max_local_steps: int,
) -> None:
for spec_file in [REPO_ROOT / 'tests/specs/mcd/vat-spec.k', REPO_ROOT / 'tests/specs/mcd-structured/vat-spec.k']:
_test_prove(
Expand All @@ -449,6 +456,7 @@ def test_prove_dss(
workers=8,
direct_subproof_rules=True,
booster_only_simplify=booster_only_simplify,
equation_max_local_steps=equation_max_local_steps,
)


Expand Down
43 changes: 43 additions & 0 deletions tests/specs/functional/asword-eq-false-loop-spec.k
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
requires "verification.k"

module ASWORD-EQ-FALSE-LOOP-SPEC
imports VERIFICATION

// Regression test for the non-terminating `asWord-eq-false` simplification loop
// (evm-semantics #2859 / kontrol #1153). The offending rule has been removed from
// `lemmas/evm-int-simplification.k`; these claims guard against it being reintroduced.
//
// The removed rule was:
// rule [asWord-eq-false]: #asWord(B) ==Int X => false requires X =/=Int #asWord(B) [simplification]
// It is loop-safe ONLY when `X` is not itself an `#asWord(_)`. When BOTH sides of the
// `==Int` are `#asWord` terms, applying it rewrites the equality into its own `requires`
// with the two `#asWord` operands swapped — still `#asWord`-on-the-left — so it matches
// again, swaps back, and recurses without bound (observed: one request looping >21 min at
// 96% CPU, the rule nested 5,406 deep in the Kore simplifier).
//
// Each claim below asks the simplifier to evaluate an equality with `#asWord` on BOTH
// sides, known unequal from the path condition. Expected result: `false`, discharged
// against the path condition. If the looping rule is reintroduced, simplification of the
// left-hand `runLemma` term loops and the proof TIMES OUT. `asWord-lt-false` /
// `asWord-le-false` need no analogue: `<Int` / `<=Int` are antisymmetric, so their
// swapped requires is a different, non-matching relation.

// Minimal trigger: two symbolic byte-words, known unequal.
claim [asword-eq-false-symmetric]:
<k> runLemma ( #asWord ( B1:Bytes ) ==Int #asWord ( B2:Bytes ) ) => doneLemma ( false ) ... </k>
requires #asWord ( B1 ) =/=Int #asWord ( B2 )

// The shape actually observed in the CSE proofs: a plain word vs an `#asWord` over a
// buffer expression (a `+Bytes` of symbolic operands does not reduce, so the right side
// stays `#asWord`, keeping both operands `#asWord` exactly as in the captured loop term).
claim [asword-eq-false-buffer]:
<k> runLemma ( #asWord ( B1:Bytes ) ==Int #asWord ( B2:Bytes +Bytes B3:Bytes ) ) => doneLemma ( false ) ... </k>
requires #asWord ( B1 ) =/=Int #asWord ( B2 +Bytes B3 )

// Same hazard reached via `=/=Int` (which desugars to `notBool(_ ==Int_)`), the form in
// which the loop term appeared inside another rule's requires.
claim [asword-neq-true-symmetric]:
<k> runLemma ( #asWord ( B1:Bytes ) =/=Int #asWord ( B2:Bytes ) ) => doneLemma ( true ) ... </k>
requires #asWord ( B1 ) =/=Int #asWord ( B2 )

endmodule
68 changes: 68 additions & 0 deletions tests/specs/functional/eq-false-lt-loop-spec.k
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
requires "verification.k"

module EQ-FALSE-LT-LOOP-SPEC
imports VERIFICATION

// Regression exemplar for the SECOND CSE non-termination layer (evm-semantics #2859 / kontrol #1153),
// the one that remains after `asWord-eq-false` is removed. See `evm-semantics-cse-loop-change-request.md`.
//
// HOW THIS WAS FOUND: rebuilding KEVM with `asWord-eq-false` deleted (branch `asword-eq-false-loop-fix`,
// == kontrol-side "variant A") does NOT stop the CSE proofs hanging — proof #4
// `ArithmeticCallTest.test_double_add_sub_external` still spins >25 min in one `execute` request. The
// loop simply RELOCATES. Server-side context logging of the hanging request shows two rules dominating,
// ~16-17k attempts each, in two different engines under the same execute request:
//
// * booster > execute > simplify > … > [eq-false-lt] > smt (15 865 attempts)
// rule [eq-false-lt]: A:Int ==Int B:Int => false requires A <Int B [simplification, concrete(B)]
// * kore > execute > constraint > term:90151e4 > <AccountCellMap #Ceil> > success (17 082 attempts,
// re-firing on the SAME constraint term)
//
// MECHANISM (a two-engine fixpoint failure, NOT a single self-loop like asWord-eq-false):
// 1. `#Ceil(<accounts> AccountCellMap)` definedness requires the account ids be DISTINCT, i.e. it emits
// predicates of the form `ACCOUNT_ID ==Int <concrete account address> => false`.
// 2. `eq-false-lt` (concrete RHS) is invited to discharge each one via its side condition
// `ACCOUNT_ID <Int <concrete address>`. With ACCOUNT_ID a symbolic account id constrained only to a
// range (e.g. `[0, pow160)`) that does NOT decide the comparison, SMT returns (Sat,Sat) —
// UNDETERMINED (2 518 such verdicts captured). The rule cannot fire and the predicate cannot reduce.
// 3. Because the account-distinctness predicate never simplifies to a stable form, the Kore constraint
// simplifier never reaches a fixpoint and re-evaluates the AccountCellMap `#Ceil` on the same term
// thousands of times — each pass paying a fresh ~500 ms undetermined SMT round-trip per account id.
//
// The captured concrete address is `645326474426547203313410069153905908525362434349` (a 160-bit value);
// the symbolic ids were CALLER_ID, ORIGIN_ID, and the deployed-contract ids.
//
// SCOPE / CAVEAT — READ BEFORE RELYING ON THIS:
// The faithful trigger is the INTERACTION of AccountCellMap definedness with `eq-false-lt`, driven by
// the Kore constraint simplifier's lack of a fixpoint guard. A bare `runLemma(ACCT ==Int <concrete>)`
// (claim 1 below) reproduces the EXPENSIVE-UNDETERMINED-SMT step but will most likely return the term
// undetermined ONCE rather than spin, because the AccountCellMap `#Ceil` that re-presents the predicate
// is absent. Claim 2 adds a two-account `<accounts>` cell so the definedness machinery is in play; this
// is the closer reproduction, but whether it spins depends on the backend's constraint-simplification
// fixpoint behaviour, which is the real defect. Please run BOTH on the buggy build and report which
// actually loops; the primary recommendation to the team is a backend fixpoint/termination guard plus
// making `eq-false-lt` cheap (or skipped) when `A <Int B` is undetermined — not a lemma deletion.

// Claim 1 — the eq-false-lt undetermined-SMT step in isolation (the per-pass cost of the loop).
// Exactly the captured comparison: symbolic account id vs the concrete address, range-bounded so that
// `ACCT_ID <Int 645…349` is undetermined.
claim [eq-false-lt-undetermined-acctid]:
<k> runLemma ( ACCT_ID:Int ==Int 645326474426547203313410069153905908525362434349 )
=> doneLemma ( ?_RESULT:Bool ) ... </k>
requires 0 <=Int ACCT_ID andBool ACCT_ID <Int pow160

// Claim 2 — closer reproduction: two symbolic-id accounts in the <accounts> cell, so evaluating the
// configuration's definedness invokes the AccountCellMap `#Ceil` that emits the account-distinctness
// predicate `ACCT_ID_1 ==Int ACCT_ID_2 => false`, which `eq-false-lt` then tries (and fails) to discharge.
// Both ids are range-bounded but unordered, so the distinctness comparison is undetermined — the captured
// shape. (Adjust the cell context to match the project's VERIFICATION module if `<accounts>` is wrapped.)
claim [eq-false-lt-acctmap-definedness]:
<k> runLemma ( .Bytes ) => doneLemma ( .Bytes ) ... </k>
<accounts>
<account> <acctID> ACCT_ID_1:Int </acctID> ... </account>
<account> <acctID> ACCT_ID_2:Int </acctID> ... </account>
...
</accounts>
requires 0 <=Int ACCT_ID_1 andBool ACCT_ID_1 <Int pow160
andBool 0 <=Int ACCT_ID_2 andBool ACCT_ID_2 <Int pow160

endmodule
8 changes: 8 additions & 0 deletions tests/specs/functional/lemmas-no-smt-spec.k
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,12 @@ module LEMMAS-NO-SMT-SPEC
claim [powByteLen-lt-concrete]:
<k> runLemma ( 31 <Int #powByteLen(32) ) => doneLemma ( true ) ... </k>

// powByteLen-lt-concrete-false: CONST <Int #powByteLen(N) => false when CONST does
// not fit into N bytes. The companion false-direction of the rule above; together they
// make the concrete comparison total. Regression for the `asWord-unif-03/04` lemmas-spec
// claims, which decompose an out-of-range `#asWord(B) ==Int CONST` into exactly this
// comparison and previously relied on the (removed, looping) `asWord-eq-false` to close.
claim [powByteLen-lt-concrete-false]:
<k> runLemma ( 2 ^Int 160 <Int #powByteLen(16) ) => doneLemma ( false ) ... </k>

endmodule
Loading