From 1a01d830f4b7947a32302a8ade25cc7c61d8b08c Mon Sep 17 00:00:00 2001 From: StreamDemon Date: Thu, 2 Jul 2026 14:17:49 +0800 Subject: [PATCH 1/7] Open parser enhancements integration branch Empty seed commit for the enhancement-wave base PR; the roadmap lives in the PR description. Rebased onto main after the parser correctness wave (PR #69) merged. From 9a8eab2a008ab20fe0f789c2816698601117d1d1 Mon Sep 17 00:00:00 2001 From: RevenantPulse <68097584+StreamDemon@users.noreply.github.com> Date: Thu, 2 Jul 2026 19:06:35 +0800 Subject: [PATCH 2/7] Dedupe the lexer numeric-suffix list (#75) `numeric_suffix` and `validate_numeric_body` each carried their own copy of the 13-entry suffix table, so any future suffix change had to be made twice or the two paths would drift apart. Hoist the table into a shared `NUMERIC_SUFFIXES` const and return the matched `&'static str` instead of allocating a fresh `String` on every suffix scan. A new test iterates the shared table and lexes `1` for every entry, so additions to the list are covered automatically. --- crates/sploosh-lexer/src/lib.rs | 40 +++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/crates/sploosh-lexer/src/lib.rs b/crates/sploosh-lexer/src/lib.rs index e21e234..1ad0525 100644 --- a/crates/sploosh-lexer/src/lib.rs +++ b/crates/sploosh-lexer/src/lib.rs @@ -148,6 +148,13 @@ pub fn is_contextual_keyword(text: &str) -> bool { ) } +/// §16.1 numeric suffixes, longest-first so prefix scanning never matches a +/// shorter suffix inside a longer one. Shared by suffix scanning and +/// separator validation. +const NUMERIC_SUFFIXES: [&str; 13] = [ + "i128", "u128", "u256", "i64", "u64", "f64", "i32", "u32", "f32", "i16", "u16", "i8", "u8", +]; + #[derive(Debug, Clone, PartialEq, Eq)] pub struct LexError { pub message: String, @@ -339,16 +346,12 @@ impl Lexer<'_> { } } - fn numeric_suffix(&mut self) -> Option { + fn numeric_suffix(&mut self) -> Option<&'static str> { let rest = &self.source[self.pos..]; - let suffixes = [ - "i128", "u128", "u256", "i64", "u64", "f64", "i32", "u32", "f32", "i16", "u16", "i8", - "u8", - ]; - for suffix in suffixes { + for suffix in NUMERIC_SUFFIXES { if rest.starts_with(suffix) { self.pos += suffix.len(); - return Some(suffix.to_string()); + return Some(suffix); } } None @@ -356,11 +359,7 @@ impl Lexer<'_> { fn validate_numeric_body(&mut self, start: usize, base: u8) { let text = &self.source[start..self.pos]; - let suffixes = [ - "i128", "u128", "u256", "i64", "u64", "f64", "i32", "u32", "f32", "i16", "u16", "i8", - "u8", - ]; - let body = suffixes + let body = NUMERIC_SUFFIXES .iter() .find_map(|suffix| text.strip_suffix(suffix)) .unwrap_or(text); @@ -628,6 +627,23 @@ mod tests { assert_eq!(tokens[4].kind, TokenKind::IntLit); } + #[test] + fn every_numeric_suffix_lexes() { + for suffix in NUMERIC_SUFFIXES { + let source = format!("1{suffix}"); + let tokens = lex(&source).unwrap(); + assert_eq!(tokens.len(), 1, "{source}"); + let expected = if suffix.starts_with('f') { + TokenKind::FloatLit + } else { + TokenKind::IntLit + }; + assert_eq!(tokens[0].kind, expected, "{source}"); + assert_eq!(tokens[0].lexeme, source); + assert_eq!(tokens[0].span, Span::new(0, source.len())); + } + } + #[test] fn rejects_bad_numeric_separators() { let err = lex("1__2").unwrap_err(); From 3e65f7f0c1b321260aeee22de7d72ac505cdc911 Mon Sep 17 00:00:00 2001 From: RevenantPulse <68097584+StreamDemon@users.noreply.github.com> Date: Thu, 2 Jul 2026 19:16:46 +0800 Subject: [PATCH 3/7] Store spans instead of lexeme strings on tokens (#76) Every token carried an owned copy of its source text, so lexing a file allocated one String per token even though the source buffer already holds the same bytes. Token is now just a kind and a span; the new `Token::text(source)` slices the original buffer on demand. The parser threads the source string through and derives text only at the few places that need it (identifiers, literals, the `vec` head, the extern target). Unary and binary operator text now comes from the token kind rather than the lexeme, since the kind already determines it. --- crates/sploosh-lexer/src/lib.rs | 12 +++++-- crates/sploosh-parser/src/lib.rs | 59 +++++++++++++++++++++----------- 2 files changed, 48 insertions(+), 23 deletions(-) diff --git a/crates/sploosh-lexer/src/lib.rs b/crates/sploosh-lexer/src/lib.rs index 1ad0525..b32f1ae 100644 --- a/crates/sploosh-lexer/src/lib.rs +++ b/crates/sploosh-lexer/src/lib.rs @@ -5,10 +5,17 @@ use sploosh_ast::Span; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Token { pub kind: TokenKind, - pub lexeme: String, pub span: Span, } +impl Token { + /// The token's source text, sliced from the file it was lexed from. + /// Tokens carry only spans; slicing on demand keeps lexing allocation-free. + pub fn text<'src>(&self, source: &'src str) -> &'src str { + &source[self.span.start..self.span.end] + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum TokenKind { Ident, @@ -559,7 +566,6 @@ impl Lexer<'_> { fn push(&mut self, kind: TokenKind, start: usize, end: usize) { self.tokens.push(Token { kind, - lexeme: self.source[start..end].to_string(), span: Span::new(start, end), }); } @@ -639,7 +645,7 @@ mod tests { TokenKind::IntLit }; assert_eq!(tokens[0].kind, expected, "{source}"); - assert_eq!(tokens[0].lexeme, source); + assert_eq!(tokens[0].text(&source), source); assert_eq!(tokens[0].span, Span::new(0, source.len())); } } diff --git a/crates/sploosh-parser/src/lib.rs b/crates/sploosh-parser/src/lib.rs index 3c3b9ca..bf53557 100644 --- a/crates/sploosh-parser/src/lib.rs +++ b/crates/sploosh-parser/src/lib.rs @@ -11,7 +11,7 @@ pub struct ParseError { pub fn parse_program(source: &str) -> Result> { let tokens = lex(source).map_err(lex_errors)?; - Parser::new(tokens).parse_program() + Parser::new(tokens, source).parse_program() } fn lex_errors(errors: Vec) -> Vec { @@ -24,8 +24,9 @@ fn lex_errors(errors: Vec) -> Vec { .collect() } -struct Parser { +struct Parser<'src> { tokens: Vec, + source: &'src str, pos: usize, errors: Vec, /// When set, a `struct_literal` may not be the outermost expression — the @@ -33,16 +34,22 @@ struct Parser { no_struct_literal: bool, } -impl Parser { - fn new(tokens: Vec) -> Self { +impl<'src> Parser<'src> { + fn new(tokens: Vec, source: &'src str) -> Self { Self { tokens, + source, pos: 0, errors: Vec::new(), no_struct_literal: false, } } + /// Source text for a token; tokens carry only spans (see `Token::text`). + fn text(&self, token: &Token) -> &'src str { + token.text(self.source) + } + fn parse_program(mut self) -> Result> { let mut items = Vec::new(); while !self.eof() { @@ -477,7 +484,8 @@ impl Parser { fn extern_block(&mut self) -> Option { self.expect_keyword(Keyword::Extern)?; let target = if self.at(TokenKind::StringLit) { - self.bump().lexeme + let token = self.bump(); + self.text(&token).to_string() } else { self.expect_keyword(Keyword::Onchain)?; self.expect_keyword(Keyword::Mod)?; @@ -734,7 +742,7 @@ impl Parser { if left_bp < min_bp { break; } - let op_text = self.bump().lexeme; + self.bump(); if op == "|>" { // §16: the RHS of `|>` is a `pipe_stage`, not a precedence-climbed // expression. @@ -742,7 +750,7 @@ impl Parser { let span = lhs.span.join(stage.span); lhs = Expr { kind: ExprKind::Binary { - op: op_text, + op: op.to_string(), left: Box::new(lhs), right: Box::new(stage), }, @@ -776,7 +784,7 @@ impl Parser { } else { Expr { kind: ExprKind::Binary { - op: op_text, + op: op.to_string(), left: Box::new(lhs), right: Box::new(rhs), }, @@ -826,11 +834,12 @@ impl Parser { match token.kind { TokenKind::IntLit | TokenKind::FloatLit | TokenKind::StringLit | TokenKind::CharLit => { self.bump(); + let text = self.text(&token).to_string(); let lit = match token.kind { - TokenKind::IntLit => Literal::Int(token.lexeme), - TokenKind::FloatLit => Literal::Float(token.lexeme), - TokenKind::StringLit => Literal::String(token.lexeme), - TokenKind::CharLit => Literal::Char(token.lexeme), + TokenKind::IntLit => Literal::Int(text), + TokenKind::FloatLit => Literal::Float(text), + TokenKind::StringLit => Literal::String(text), + TokenKind::CharLit => Literal::Char(text), _ => unreachable!(), }; Some(Expr { @@ -841,11 +850,14 @@ impl Parser { TokenKind::Keyword(Keyword::True | Keyword::False) => { self.bump(); Some(Expr { - kind: ExprKind::Literal(Literal::Bool(token.lexeme == "true")), + kind: ExprKind::Literal(Literal::Bool(matches!( + token.kind, + TokenKind::Keyword(Keyword::True) + ))), span: token.span, }) } - TokenKind::Ident if token.lexeme == "vec" => { + TokenKind::Ident if self.text(&token) == "vec" => { self.bump(); if self.eat(TokenKind::Bang).is_some() { // §16 `vec_literal`: `vec` "!" only ever binds to square @@ -885,7 +897,7 @@ impl Parser { } Some(Expr { kind: ExprKind::Path(Path { - segments: vec![token.lexeme], + segments: vec![self.text(&token).to_string()], span: token.span, }), span: token.span, @@ -908,7 +920,14 @@ impl Parser { }) } TokenKind::Bang | TokenKind::Minus | TokenKind::Star | TokenKind::Amp => { - let op = self.bump().lexeme; + let op = match token.kind { + TokenKind::Bang => "!", + TokenKind::Minus => "-", + TokenKind::Star => "*", + TokenKind::Amp => "&", + _ => unreachable!(), + }; + self.bump(); if op == "&" { let _mutable = self.eat_ident_text("mut"); } @@ -916,7 +935,7 @@ impl Parser { let span = token.span.join(expr.span); Some(Expr { kind: ExprKind::Unary { - op, + op: op.to_string(), expr: Box::new(expr), }, span, @@ -1091,7 +1110,7 @@ impl Parser { | TokenKind::Keyword(Keyword::SelfType) | TokenKind::Keyword(Keyword::SelfValue) => { let token = self.bump(); - Some(Ident::new(token.lexeme, token.span)) + Some(Ident::new(self.text(&token), token.span)) } _ => { self.error_here("expected path segment"); @@ -1105,7 +1124,7 @@ impl Parser { match token.kind { TokenKind::Ident => { self.bump(); - Some(Ident::new(token.lexeme, token.span)) + Some(Ident::new(self.text(&token), token.span)) } _ => { self.error_here("expected identifier"); @@ -1276,7 +1295,7 @@ impl Parser { fn at_ident_text(&self, text: &str) -> bool { self.peek() - .is_some_and(|token| token.kind == TokenKind::Ident && token.lexeme == text) + .is_some_and(|token| token.kind == TokenKind::Ident && token.text(self.source) == text) } fn eat_ident_text(&mut self, text: &str) -> Option { From 018bf455ea87909784a11d9f99f5aeb9068cf666 Mon Sep 17 00:00:00 2001 From: RevenantPulse <68097584+StreamDemon@users.noreply.github.com> Date: Thu, 2 Jul 2026 19:28:38 +0800 Subject: [PATCH 4/7] Make Token and TokenKind Copy, drop parse-loop clones (#77) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the lexeme gone, a token is a payload-free kind plus a span — both trivially copyable. Deriving Copy lets every parse loop pass and return tokens by value, so the `.clone()` calls sprinkled through `at`, `eat`, `expect`, `bump`, `peek_kind`, and the recovery helpers all disappear. The PR #71 roadmap sketched this slot as "`at`/`eat`/`expect` take `&TokenKind`"; deriving Copy reaches the same goal (no clones in parse loops) with by-value call sites instead of reference threading, which only became possible after the span-slicing change landed. --- crates/sploosh-lexer/src/lib.rs | 4 ++-- crates/sploosh-parser/src/lib.rs | 33 ++++++++++++++------------------ 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/crates/sploosh-lexer/src/lib.rs b/crates/sploosh-lexer/src/lib.rs index b32f1ae..b8c0389 100644 --- a/crates/sploosh-lexer/src/lib.rs +++ b/crates/sploosh-lexer/src/lib.rs @@ -2,7 +2,7 @@ use sploosh_ast::Span; -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Token { pub kind: TokenKind, pub span: Span, @@ -16,7 +16,7 @@ impl Token { } } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum TokenKind { Ident, Keyword(Keyword), diff --git a/crates/sploosh-parser/src/lib.rs b/crates/sploosh-parser/src/lib.rs index bf53557..ae87b39 100644 --- a/crates/sploosh-parser/src/lib.rs +++ b/crates/sploosh-parser/src/lib.rs @@ -613,10 +613,7 @@ impl<'src> Parser<'src> { } else if self.eat_keyword(Keyword::Continue).is_some() { self.expect(TokenKind::Semi)?; statements.push(Stmt::Continue); - } else if self.at_ident_text("send") - && self - .peek_kind_at(1) - .is_some_and(|kind| can_begin_expr(&kind)) + } else if self.at_ident_text("send") && self.peek_kind_at(1).is_some_and(can_begin_expr) { // §2.7: `send` at statement head followed by any token that can // begin an expression always opens a send-statement; the operand @@ -830,7 +827,7 @@ impl<'src> Parser<'src> { } fn prefix(&mut self) -> Option { - let token = self.peek()?.clone(); + let token = *self.peek()?; match token.kind { TokenKind::IntLit | TokenKind::FloatLit | TokenKind::StringLit | TokenKind::CharLit => { self.bump(); @@ -1061,7 +1058,7 @@ impl<'src> Parser<'src> { fn args(&mut self, close: TokenKind) -> Option> { let mut args = Vec::new(); - while !self.at(close.clone()) && !self.eof() { + while !self.at(close) && !self.eof() { args.push(self.delimited_expr()?); if self.eat(TokenKind::Comma).is_none() { break; @@ -1120,7 +1117,7 @@ impl<'src> Parser<'src> { } fn ident(&mut self) -> Option { - let token = self.peek()?.clone(); + let token = *self.peek()?; match token.kind { TokenKind::Ident => { self.bump(); @@ -1197,9 +1194,9 @@ impl<'src> Parser<'src> { fn skip_balanced(&mut self, open: TokenKind, close: TokenKind) { let mut depth = 0usize; while !self.eof() { - if self.at(open.clone()) { + if self.at(open) { depth += 1; - } else if self.at(close.clone()) { + } else if self.at(close) { if depth == 0 { self.bump(); break; @@ -1218,9 +1215,9 @@ impl<'src> Parser<'src> { fn skip_balanced_after_open(&mut self, open: TokenKind, close: TokenKind) { let mut depth = 1usize; while !self.eof() { - if self.at(open.clone()) { + if self.at(open) { depth += 1; - } else if self.at(close.clone()) { + } else if self.at(close) { depth -= 1; self.bump(); if depth == 0 { @@ -1273,7 +1270,7 @@ impl<'src> Parser<'src> { } fn recover_until(&mut self, kinds: &[TokenKind]) { - while !self.eof() && !kinds.iter().any(|kind| self.at(kind.clone())) { + while !self.eof() && !kinds.iter().any(|kind| self.at(*kind)) { self.bump(); } } @@ -1315,7 +1312,7 @@ impl<'src> Parser<'src> { } fn expect(&mut self, kind: TokenKind) -> Option { - self.eat(kind.clone()).or_else(|| { + self.eat(kind).or_else(|| { self.error_here(format!("expected `{kind:?}`")); None }) @@ -1330,13 +1327,11 @@ impl<'src> Parser<'src> { } fn peek_kind(&self) -> Option { - self.peek().map(|token| token.kind.clone()) + self.peek().map(|token| token.kind) } fn peek_kind_at(&self, offset: usize) -> Option { - self.tokens - .get(self.pos + offset) - .map(|token| token.kind.clone()) + self.tokens.get(self.pos + offset).map(|token| token.kind) } fn can_start_path_segment_at(&self, offset: usize) -> bool { @@ -1361,7 +1356,7 @@ impl<'src> Parser<'src> { } fn bump(&mut self) -> Token { - let token = self.tokens[self.pos].clone(); + let token = self.tokens[self.pos]; self.pos += 1; token } @@ -1409,7 +1404,7 @@ fn is_assign_target(expr: &Expr) -> bool { } /// Tokens that can begin an expression — must stay in sync with `prefix()`. -fn can_begin_expr(kind: &TokenKind) -> bool { +fn can_begin_expr(kind: TokenKind) -> bool { matches!( kind, TokenKind::IntLit From dea4d82365fd925695bee1ce2a3c13964fc8a643 Mon Sep 17 00:00:00 2001 From: RevenantPulse <68097584+StreamDemon@users.noreply.github.com> Date: Thu, 2 Jul 2026 19:33:44 +0800 Subject: [PATCH 5/7] Replace operator strings with UnaryOp and BinaryOp enums (#79) `ExprKind::Unary`/`Binary` stored their operator as an owned String, which allocated per node and let any string masquerade as an operator. Dedicated enums make illegal operators unrepresentable, shrink the nodes, and give match exhaustiveness checking to every consumer. `=` never reaches the AST (it builds `ExprKind::Assign`), so the parser classifies infix tokens with a private `Infix { Assign, Op(BinaryOp) }` wrapper instead of widening the public enum with a variant no AST node can carry. `as_str()`/`Display` on both enums recover the source spelling for diagnostics and the future pretty-printer (#67). --- crates/sploosh-ast/src/lib.rs | 98 ++++++++++++++++++++++++++- crates/sploosh-parser/src/lib.rs | 113 +++++++++++++++++-------------- 2 files changed, 157 insertions(+), 54 deletions(-) diff --git a/crates/sploosh-ast/src/lib.rs b/crates/sploosh-ast/src/lib.rs index cbbba7c..5da1828 100644 --- a/crates/sploosh-ast/src/lib.rs +++ b/crates/sploosh-ast/src/lib.rs @@ -239,6 +239,100 @@ pub enum Stmt { Continue, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UnaryOp { + /// `!` + Not, + /// `-` + Neg, + /// `*` + Deref, + /// `&` + Ref, +} + +impl UnaryOp { + pub fn as_str(self) -> &'static str { + match self { + Self::Not => "!", + Self::Neg => "-", + Self::Deref => "*", + Self::Ref => "&", + } + } +} + +impl std::fmt::Display for UnaryOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinaryOp { + /// `|>` + Pipe, + /// `+` + Add, + /// `-` + Sub, + /// `*` + Mul, + /// `/` + Div, + /// `%` + Rem, + /// `==` + Eq, + /// `!=` + Ne, + /// `<` + Lt, + /// `>` + Gt, + /// `<=` + Le, + /// `>=` + Ge, + /// `&&` + And, + /// `||` + Or, + /// `..` + Range, + /// `..=` + RangeInclusive, +} + +impl BinaryOp { + pub fn as_str(self) -> &'static str { + match self { + Self::Pipe => "|>", + Self::Add => "+", + Self::Sub => "-", + Self::Mul => "*", + Self::Div => "/", + Self::Rem => "%", + Self::Eq => "==", + Self::Ne => "!=", + Self::Lt => "<", + Self::Gt => ">", + Self::Le => "<=", + Self::Ge => ">=", + Self::And => "&&", + Self::Or => "||", + Self::Range => "..", + Self::RangeInclusive => "..=", + } + } +} + +impl std::fmt::Display for BinaryOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + #[derive(Debug, Clone, PartialEq)] pub struct Expr { pub kind: ExprKind, @@ -263,11 +357,11 @@ pub enum ExprKind { index: Box, }, Unary { - op: String, + op: UnaryOp, expr: Box, }, Binary { - op: String, + op: BinaryOp, left: Box, right: Box, }, diff --git a/crates/sploosh-parser/src/lib.rs b/crates/sploosh-parser/src/lib.rs index ae87b39..bfa16ce 100644 --- a/crates/sploosh-parser/src/lib.rs +++ b/crates/sploosh-parser/src/lib.rs @@ -24,6 +24,14 @@ fn lex_errors(errors: Vec) -> Vec { .collect() } +/// Parser-internal classification of infix operators: `=` builds an +/// `ExprKind::Assign` node, everything else an `ExprKind::Binary`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Infix { + Assign, + Op(BinaryOp), +} + struct Parser<'src> { tokens: Vec, source: &'src str, @@ -733,21 +741,21 @@ impl<'src> Parser<'src> { }; continue; } - let Some((op, left_bp, right_bp)) = self.infix_binding_power() else { + let Some((infix, left_bp, right_bp)) = self.infix_binding_power() else { break; }; if left_bp < min_bp { break; } self.bump(); - if op == "|>" { + if infix == Infix::Op(BinaryOp::Pipe) { // §16: the RHS of `|>` is a `pipe_stage`, not a precedence-climbed // expression. let stage = self.pipe_stage()?; let span = lhs.span.join(stage.span); lhs = Expr { kind: ExprKind::Binary { - op: op.to_string(), + op: BinaryOp::Pipe, left: Box::new(lhs), right: Box::new(stage), }, @@ -766,31 +774,32 @@ impl<'src> Parser<'src> { } let rhs = self.expr(right_bp)?; let span = lhs.span.join(rhs.span); - lhs = if op == "=" { - // §16: only an `assign_target` may appear on the left side. - if !is_assign_target(&lhs) { - self.error_at(lhs.span, "invalid assignment target"); - } - Expr { - kind: ExprKind::Assign { - target: Box::new(lhs), - value: Box::new(rhs), - }, - span, + lhs = match infix { + Infix::Assign => { + // §16: only an `assign_target` may appear on the left side. + if !is_assign_target(&lhs) { + self.error_at(lhs.span, "invalid assignment target"); + } + Expr { + kind: ExprKind::Assign { + target: Box::new(lhs), + value: Box::new(rhs), + }, + span, + } } - } else { - Expr { + Infix::Op(op) => Expr { kind: ExprKind::Binary { - op: op.to_string(), + op, left: Box::new(lhs), right: Box::new(rhs), }, span, - } + }, }; // The precedence table marks `..`/`..=` non-associative: a range // operand may not itself be an unparenthesized range. - if matches!(op, ".." | "..=") + if matches!(infix, Infix::Op(BinaryOp::Range | BinaryOp::RangeInclusive)) && matches!( self.peek_kind(), Some(TokenKind::DotDot | TokenKind::DotDotEq) @@ -918,21 +927,21 @@ impl<'src> Parser<'src> { } TokenKind::Bang | TokenKind::Minus | TokenKind::Star | TokenKind::Amp => { let op = match token.kind { - TokenKind::Bang => "!", - TokenKind::Minus => "-", - TokenKind::Star => "*", - TokenKind::Amp => "&", + TokenKind::Bang => UnaryOp::Not, + TokenKind::Minus => UnaryOp::Neg, + TokenKind::Star => UnaryOp::Deref, + TokenKind::Amp => UnaryOp::Ref, _ => unreachable!(), }; self.bump(); - if op == "&" { + if op == UnaryOp::Ref { let _mutable = self.eat_ident_text("mut"); } let expr = self.expr(11)?; let span = token.span.join(expr.span); Some(Expr { kind: ExprKind::Unary { - op: op.to_string(), + op, expr: Box::new(expr), }, span, @@ -1235,25 +1244,25 @@ impl<'src> Parser<'src> { } } - fn infix_binding_power(&self) -> Option<(&'static str, u8, u8)> { + fn infix_binding_power(&self) -> Option<(Infix, u8, u8)> { Some(match self.peek_kind()? { - TokenKind::Eq => ("=", 2, 1), - TokenKind::PipeGt => ("|>", 8, 9), - TokenKind::Plus => ("+", 9, 10), - TokenKind::Minus => ("-", 9, 10), - TokenKind::Star => ("*", 10, 11), - TokenKind::Slash => ("/", 10, 11), - TokenKind::Percent => ("%", 10, 11), - TokenKind::EqEq => ("==", 6, 7), - TokenKind::Ne => ("!=", 6, 7), - TokenKind::Lt => ("<", 7, 8), - TokenKind::Gt => (">", 7, 8), - TokenKind::Le => ("<=", 7, 8), - TokenKind::Ge => (">=", 7, 8), - TokenKind::AmpAmp => ("&&", 5, 6), - TokenKind::PipePipe => ("||", 4, 5), - TokenKind::DotDot => ("..", 3, 4), - TokenKind::DotDotEq => ("..=", 3, 4), + TokenKind::Eq => (Infix::Assign, 2, 1), + TokenKind::PipeGt => (Infix::Op(BinaryOp::Pipe), 8, 9), + TokenKind::Plus => (Infix::Op(BinaryOp::Add), 9, 10), + TokenKind::Minus => (Infix::Op(BinaryOp::Sub), 9, 10), + TokenKind::Star => (Infix::Op(BinaryOp::Mul), 10, 11), + TokenKind::Slash => (Infix::Op(BinaryOp::Div), 10, 11), + TokenKind::Percent => (Infix::Op(BinaryOp::Rem), 10, 11), + TokenKind::EqEq => (Infix::Op(BinaryOp::Eq), 6, 7), + TokenKind::Ne => (Infix::Op(BinaryOp::Ne), 6, 7), + TokenKind::Lt => (Infix::Op(BinaryOp::Lt), 7, 8), + TokenKind::Gt => (Infix::Op(BinaryOp::Gt), 7, 8), + TokenKind::Le => (Infix::Op(BinaryOp::Le), 7, 8), + TokenKind::Ge => (Infix::Op(BinaryOp::Ge), 7, 8), + TokenKind::AmpAmp => (Infix::Op(BinaryOp::And), 5, 6), + TokenKind::PipePipe => (Infix::Op(BinaryOp::Or), 4, 5), + TokenKind::DotDot => (Infix::Op(BinaryOp::Range), 3, 4), + TokenKind::DotDotEq => (Infix::Op(BinaryOp::RangeInclusive), 3, 4), _ => return None, }) } @@ -1398,7 +1407,7 @@ fn is_assign_target(expr: &Expr) -> bool { path.segments.len() == 1 && path.segments[0] != "self" && path.segments[0] != "Self" } ExprKind::Field { .. } | ExprKind::Index { .. } => true, - ExprKind::Unary { op, .. } => op == "*", + ExprKind::Unary { op, .. } => *op == UnaryOp::Deref, _ => false, } } @@ -1571,7 +1580,7 @@ mod tests { let ExprKind::Binary { op, .. } = &tail.kind else { panic!("expected binary expression"); }; - assert_eq!(op, "/"); + assert_eq!(*op, BinaryOp::Div); } #[test] @@ -1794,7 +1803,7 @@ mod tests { let ExprKind::Binary { op, left, right } = &inner.kind else { panic!("expected pipe binary inside ErrorProp"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); assert!(path_named(left, "input")); assert!(path_named(right, "parse")); } @@ -1809,7 +1818,7 @@ mod tests { let ExprKind::Binary { op, left, right } = &outer.kind else { panic!("expected outer pipe binary"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); assert!(path_named(right, "g")); let ExprKind::ErrorProp(mid) = &left.kind else { panic!("expected inner ErrorProp"); @@ -1817,7 +1826,7 @@ mod tests { let ExprKind::Binary { op, left, right } = &mid.kind else { panic!("expected inner pipe binary"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); assert!(path_named(left, "a")); assert!(path_named(right, "f")); } @@ -1831,7 +1840,7 @@ mod tests { let ExprKind::Binary { op, right, .. } = &inner.kind else { panic!("expected pipe binary"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); let ExprKind::Call { callee, args, .. } = &right.kind else { panic!("expected call stage"); }; @@ -1849,7 +1858,7 @@ mod tests { let ExprKind::Binary { op, right, .. } = &inner.kind else { panic!("expected pipe binary"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); let ExprKind::Field { base, name } = &right.kind else { panic!("expected field-chain stage"); }; @@ -1887,12 +1896,12 @@ mod tests { let ExprKind::Binary { op, left, right } = &value.kind else { panic!("expected `+` at the top"); }; - assert_eq!(op, "+"); + assert_eq!(*op, BinaryOp::Add); assert!(path_named(right, "b")); let ExprKind::Binary { op, left, right } = &left.kind else { panic!("expected pipe binary on the left"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); assert!(path_named(left, "x")); assert!(path_named(right, "a")); } From f3b173a0be55f08134210f789bab87c4ad4a1f52 Mon Sep 17 00:00:00 2001 From: RevenantPulse <68097584+StreamDemon@users.noreply.github.com> Date: Thu, 2 Jul 2026 19:39:21 +0800 Subject: [PATCH 6/7] Preserve attribute arguments and actor-handler attrs in the AST (#80) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `@mailbox(capacity: 2048)` and `@supervisor(strategy: "one_for_one")` lost their arguments at parse time — the parser skipped everything inside the parens — and attributes on actor handlers were dropped outright. Nothing downstream (semantic analysis, diagnostics) could ever see them. `Attribute` now carries `args: Vec` plus a span covering `@` through the closing paren, with `AttrArg` mirroring the §16 grammar (`attr_arg = IDENT [ ":" expr | "=" expr | "(" expr ")" ] | expr`). Only the `IDENT ":"` form needs lookahead; the `=` and call forms are valid expressions, so they parse as expressions and canonicalize to the most specific attr shape afterwards. Actor handlers become `Handler { attrs, function }`, since a handler is a `fn_def` and §16 puts attrs on `fn_def` itself. The now-unused `skip_balanced_after_open` helper is removed. --- crates/sploosh-ast/src/lib.rs | 31 +++- crates/sploosh-parser/src/lib.rs | 249 +++++++++++++++++++++++++++---- 2 files changed, 253 insertions(+), 27 deletions(-) diff --git a/crates/sploosh-ast/src/lib.rs b/crates/sploosh-ast/src/lib.rs index 5da1828..6607d2b 100644 --- a/crates/sploosh-ast/src/lib.rs +++ b/crates/sploosh-ast/src/lib.rs @@ -73,6 +73,26 @@ pub enum ItemKind { #[derive(Debug, Clone, PartialEq)] pub struct Attribute { pub name: Ident, + pub args: Vec, + /// Covers `@` through the closing `)` (or the name, when there are no args). + pub span: Span, +} + +/// `attr_arg = IDENT [ ":" expr | "=" expr | "(" expr ")" ] | expr` (§16). +/// The `IDENT`-headed alternatives overlap with `expr`; the parser stores the +/// most specific form that matches. +#[derive(Debug, Clone, PartialEq)] +pub enum AttrArg { + /// Bare `IDENT` — `@derive(Debug)`, `@overflow(wrapping)`. + Ident(Ident), + /// `IDENT ":" expr` — `@mailbox(capacity: 2048)`. + Named { name: Ident, value: Expr }, + /// `IDENT "=" expr`. + Assigned { name: Ident, value: Expr }, + /// `IDENT "(" expr ")"`. + Call { name: Ident, arg: Expr }, + /// Any other bare expression. + Expr(Expr), } #[derive(Debug, Clone, PartialEq)] @@ -136,7 +156,16 @@ pub enum VariantKind { pub struct Actor { pub name: Ident, pub fields: Vec, - pub handlers: Vec, + pub handlers: Vec, +} + +/// A handler is a `fn_def` inside an `actor` body (§16), so it carries its +/// own attributes (`@mailbox(capacity: N)`, ...). Item-position `fn` attrs +/// stay hoisted on `Item.attrs` during bootstrap. +#[derive(Debug, Clone, PartialEq)] +pub struct Handler { + pub attrs: Vec, + pub function: Function, } #[derive(Debug, Clone, PartialEq)] diff --git a/crates/sploosh-parser/src/lib.rs b/crates/sploosh-parser/src/lib.rs index bfa16ce..50fc1f6 100644 --- a/crates/sploosh-parser/src/lib.rs +++ b/crates/sploosh-parser/src/lib.rs @@ -147,20 +147,56 @@ impl<'src> Parser<'src> { fn attrs(&mut self) -> Vec { let mut attrs = Vec::new(); - loop { - if self.eat(TokenKind::At).is_none() { - break; - } + while let Some(at) = self.eat(TokenKind::At) { if let Some(name) = self.ident() { + let mut args = Vec::new(); + let mut end = name.span.end; if self.eat(TokenKind::LParen).is_some() { - self.skip_balanced_after_open(TokenKind::LParen, TokenKind::RParen); + args = self.attr_args(); + end = match self.expect(TokenKind::RParen) { + Some(close) => close.span.end, + None => self.prev_span().end, + }; } - attrs.push(Attribute { name }); + attrs.push(Attribute { + name, + args, + span: Span::new(at.span.start, end), + }); } } attrs } + /// `attr_args = attr_arg { "," attr_arg }` (§16). + fn attr_args(&mut self) -> Vec { + let mut args = Vec::new(); + while !self.at(TokenKind::RParen) && !self.eof() { + match self.attr_arg() { + Some(arg) => args.push(arg), + None => self.recover_until(&[TokenKind::Comma, TokenKind::RParen]), + } + if self.eat(TokenKind::Comma).is_none() { + break; + } + } + args + } + + /// `attr_arg = IDENT [ ":" expr | "=" expr | "(" expr ")" ] | expr` (§16). + /// Only `IDENT ":"` needs lookahead — `:` cannot continue an expression. + /// The `=` and `(...)` alternatives are canonicalized out of the parsed + /// expression, since both are valid expression shapes themselves. + fn attr_arg(&mut self) -> Option { + if self.at(TokenKind::Ident) && self.peek_kind_at(1) == Some(TokenKind::Colon) { + let name = self.ident()?; + self.bump(); + let value = self.delimited_expr()?; + return Some(AttrArg::Named { name, value }); + } + Some(classify_attr_expr(self.delimited_expr()?)) + } + fn function_after_mods( &mut self, visibility: Visibility, @@ -319,7 +355,11 @@ impl<'src> Parser<'src> { let mut handlers = Vec::new(); while !self.at(TokenKind::RBrace) && !self.eof() { self.skip_doc_comments(); - let _attrs = self.attrs(); + // A handler is a `fn_def`, so its attrs (`@mailbox(...)`, ...) are + // preserved. Fields take no attrs in §16; anything parsed before a + // field is currently discarded, matching the item-position + // tolerance for attrs on kinds the grammar leaves bare. + let attrs = self.attrs(); let visibility = if self.eat_keyword(Keyword::Pub).is_some() { Visibility::Public } else { @@ -327,7 +367,8 @@ impl<'src> Parser<'src> { }; let is_async = self.eat_keyword(Keyword::Async).is_some(); if self.at_keyword(Keyword::Fn) { - handlers.push(self.function_after_mods(visibility, is_async, false, true)?); + let function = self.function_after_mods(visibility, is_async, false, true)?; + handlers.push(Handler { attrs, function }); } else { let name = self.ident()?; self.expect(TokenKind::Colon)?; @@ -1221,23 +1262,6 @@ impl<'src> Parser<'src> { } } - fn skip_balanced_after_open(&mut self, open: TokenKind, close: TokenKind) { - let mut depth = 1usize; - while !self.eof() { - if self.at(open) { - depth += 1; - } else if self.at(close) { - depth -= 1; - self.bump(); - if depth == 0 { - break; - } - continue; - } - self.bump(); - } - } - fn skip_doc_comments(&mut self) { while self.at(TokenKind::DocComment) { self.bump(); @@ -1389,6 +1413,63 @@ impl<'src> Parser<'src> { } } +/// Canonicalizes the overlapping `attr_arg` alternatives (§16): a bare +/// `IDENT`, `IDENT "=" expr`, and `IDENT "(" expr ")"` are all valid +/// expressions too, so they parse as expressions and the most specific attr +/// form is recovered from the shape afterwards. +fn classify_attr_expr(expr: Expr) -> AttrArg { + let span = expr.span; + match expr.kind { + ExprKind::Path(path) if is_attr_ident(&path) => { + let name = path.segments.into_iter().next().unwrap(); + AttrArg::Ident(Ident::new(name, path.span)) + } + ExprKind::Assign { target, value } => match target.kind { + ExprKind::Path(path) if is_attr_ident(&path) => { + let name = path.segments.into_iter().next().unwrap(); + AttrArg::Assigned { + name: Ident::new(name, path.span), + value: *value, + } + } + kind => AttrArg::Expr(Expr { + kind: ExprKind::Assign { + target: Box::new(Expr { + kind, + span: target.span, + }), + value, + }, + span, + }), + }, + ExprKind::Call { + callee, + type_args, + mut args, + } if type_args.is_empty() && args.len() == 1 && is_attr_ident_expr(&callee) => { + let ExprKind::Path(path) = callee.kind else { + unreachable!(); + }; + let name = path.segments.into_iter().next().unwrap(); + AttrArg::Call { + name: Ident::new(name, path.span), + arg: args.pop().unwrap(), + } + } + kind => AttrArg::Expr(Expr { kind, span }), + } +} + +/// A single plain identifier — `self`/`Self` are keywords, not `IDENT` (§16.1). +fn is_attr_ident(path: &Path) -> bool { + path.segments.len() == 1 && path.segments[0] != "self" && path.segments[0] != "Self" +} + +fn is_attr_ident_expr(expr: &Expr) -> bool { + matches!(&expr.kind, ExprKind::Path(path) if is_attr_ident(path)) +} + /// The §2.7 send-statement operand shape: `handle.method(args)` — a call whose /// callee is a field access. fn is_method_call(expr: &Expr) -> bool { @@ -1497,6 +1578,122 @@ mod tests { assert!(parse_program(source).is_ok()); } + #[test] + fn attribute_arguments_are_preserved() { + let source = "@derive(Debug, Eq)\nstruct User { id: u64 }"; + let program = parse_program(source).unwrap(); + let attr = &program.items[0].attrs[0]; + assert_eq!(attr.name.name, "derive"); + // Attribute span covers `@derive(Debug, Eq)`. + assert_eq!(attr.span, Span::new(0, source.find('\n').unwrap())); + let names: Vec<_> = attr + .args + .iter() + .map(|arg| { + let AttrArg::Ident(ident) = arg else { + panic!("expected bare ident arg, got {arg:?}"); + }; + ident.name.as_str() + }) + .collect(); + assert_eq!(names, ["Debug", "Eq"]); + } + + #[test] + fn attribute_named_args_are_preserved() { + let source = r#" + @supervisor(strategy: "one_for_one", max_restarts: 5) + struct Sup { x: i64 } + "#; + let program = parse_program(source).unwrap(); + let attr = &program.items[0].attrs[0]; + assert_eq!(attr.name.name, "supervisor"); + assert_eq!(attr.args.len(), 2); + let AttrArg::Named { name, value } = &attr.args[0] else { + panic!("expected named arg, got {:?}", attr.args[0]); + }; + assert_eq!(name.name, "strategy"); + assert!(matches!( + &value.kind, + ExprKind::Literal(Literal::String(text)) if text == "\"one_for_one\"" + )); + let AttrArg::Named { name, value } = &attr.args[1] else { + panic!("expected named arg, got {:?}", attr.args[1]); + }; + assert_eq!(name.name, "max_restarts"); + assert!(matches!( + &value.kind, + ExprKind::Literal(Literal::Int(text)) if text == "5" + )); + } + + #[test] + fn attribute_assigned_call_and_expr_args_are_preserved() { + let source = "@cfg(target = evm, feature(fast), CAP + 1)\nstruct S { x: i64 }"; + let program = parse_program(source).unwrap(); + let attr = &program.items[0].attrs[0]; + assert_eq!(attr.args.len(), 3); + let AttrArg::Assigned { name, value } = &attr.args[0] else { + panic!("expected assigned arg, got {:?}", attr.args[0]); + }; + assert_eq!(name.name, "target"); + assert!(matches!(&value.kind, ExprKind::Path(path) if path.segments == ["evm"])); + let AttrArg::Call { name, arg } = &attr.args[1] else { + panic!("expected call arg, got {:?}", attr.args[1]); + }; + assert_eq!(name.name, "feature"); + assert!(matches!(&arg.kind, ExprKind::Path(path) if path.segments == ["fast"])); + let AttrArg::Expr(expr) = &attr.args[2] else { + panic!("expected expr arg, got {:?}", attr.args[2]); + }; + assert!(matches!( + &expr.kind, + ExprKind::Binary { + op: BinaryOp::Add, + .. + } + )); + } + + #[test] + fn bare_attribute_has_no_args_and_name_span() { + let program = parse_program("@test\nfn t() {}").unwrap(); + let attr = &program.items[0].attrs[0]; + assert_eq!(attr.name.name, "test"); + assert!(attr.args.is_empty()); + assert_eq!(attr.span, Span::new(0, 5)); + } + + #[test] + fn actor_handler_attributes_are_preserved() { + let source = r#" + actor Worker { + state: i64, + @mailbox(capacity: 2048) + pub fn run(&mut self, n: i64) {} + } + "#; + let program = parse_program(source).unwrap(); + let ItemKind::Actor(actor) = &program.items[0].kind else { + panic!("expected actor"); + }; + let handler = &actor.handlers[0]; + assert_eq!(handler.function.name.name, "run"); + assert_eq!(handler.attrs.len(), 1); + let attr = &handler.attrs[0]; + assert_eq!(attr.name.name, "mailbox"); + // Span anchors to the `@` in the original source. + assert_eq!(attr.span.start, source.find('@').unwrap()); + let AttrArg::Named { name, value } = &attr.args[0] else { + panic!("expected named arg, got {:?}", attr.args[0]); + }; + assert_eq!(name.name, "capacity"); + assert!(matches!( + &value.kind, + ExprKind::Literal(Literal::Int(text)) if text == "2048" + )); + } + #[test] fn declaration_names_cannot_be_reserved_keywords() { let errors = parse_program("fn self() {}").unwrap_err(); @@ -1532,7 +1729,7 @@ mod tests { let ItemKind::Actor(actor) = &program.items[0].kind else { panic!("expected actor"); }; - assert_eq!(actor.handlers[0].visibility, Visibility::Public); + assert_eq!(actor.handlers[0].function.visibility, Visibility::Public); let ItemKind::ExternBlock(extern_block) = &program.items[1].kind else { panic!("expected extern block"); }; From 4f25344fb641200943a1aea32e0699a633bdb526 Mon Sep 17 00:00:00 2001 From: RevenantPulse <68097584+StreamDemon@users.noreply.github.com> Date: Thu, 2 Jul 2026 19:46:06 +0800 Subject: [PATCH 7/7] Expand the corpus to cover every accepted grammar shape (#81) The crates/AGENTS.md rule is "add corpus tests for every grammar shape accepted", but extern blocks, onchain modules, use trees, casts, the literal zoo, async/.await, `?` outside pipes, attribute arguments, and struct-literal shapes had no fixtures. Six new fixtures close those gaps. The harness now discovers `tests/corpus/*.sp` instead of maintaining a hard-coded list, so a new fixture cannot be silently skipped; an is-empty guard catches a moved or emptied corpus directory. --- crates/sploosh-parser/tests/corpus.rs | 33 +++++++++++---------- tests/corpus/async_await.sp | 18 ++++++++++++ tests/corpus/attributes.sp | 41 +++++++++++++++++++++++++++ tests/corpus/casts_literals.sp | 35 +++++++++++++++++++++++ tests/corpus/expressions.sp | 34 ++++++++++++++++++++++ tests/corpus/extern_onchain.sp | 34 ++++++++++++++++++++++ tests/corpus/modules_use.sp | 23 +++++++++++++++ 7 files changed, 203 insertions(+), 15 deletions(-) create mode 100644 tests/corpus/async_await.sp create mode 100644 tests/corpus/attributes.sp create mode 100644 tests/corpus/casts_literals.sp create mode 100644 tests/corpus/expressions.sp create mode 100644 tests/corpus/extern_onchain.sp create mode 100644 tests/corpus/modules_use.sp diff --git a/crates/sploosh-parser/tests/corpus.rs b/crates/sploosh-parser/tests/corpus.rs index 78ecba6..95508ea 100644 --- a/crates/sploosh-parser/tests/corpus.rs +++ b/crates/sploosh-parser/tests/corpus.rs @@ -1,22 +1,25 @@ use sploosh_parser::parse_program; +/// Parses every `.sp` fixture in `tests/corpus/` at the repo root. Fixtures +/// are discovered, not listed, so a new file cannot be silently skipped +/// (crates/AGENTS.md: corpus tests for every accepted grammar shape). #[test] fn parses_corpus_files() { - for path in [ - "tests/corpus/basic.sp", - "tests/corpus/actor.sp", - "tests/corpus/control_flow.sp", - "tests/corpus/traits_impls.sp", - "tests/corpus/pipes.sp", - "tests/corpus/send_assign.sp", - "tests/corpus/ranges_modifiers.sp", - ] { - let path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .join("../..") - .join(path); - let source = std::fs::read_to_string(&path).unwrap_or_else(|err| { - panic!("{}: {err}", path.display()); - }); + let corpus_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../tests/corpus"); + let mut paths: Vec<_> = std::fs::read_dir(&corpus_dir) + .unwrap_or_else(|err| panic!("{}: {err}", corpus_dir.display())) + .map(|entry| entry.expect("corpus dir entry").path()) + .filter(|path| path.extension().is_some_and(|ext| ext == "sp")) + .collect(); + paths.sort(); + assert!( + !paths.is_empty(), + "no .sp fixtures found in {}", + corpus_dir.display() + ); + for path in paths { + let source = std::fs::read_to_string(&path) + .unwrap_or_else(|err| panic!("{}: {err}", path.display())); parse_program(&source).unwrap_or_else(|errors| panic!("{}: {errors:#?}", path.display())); } } diff --git a/tests/corpus/async_await.sp b/tests/corpus/async_await.sp new file mode 100644 index 0000000..838a243 --- /dev/null +++ b/tests/corpus/async_await.sp @@ -0,0 +1,18 @@ +/// Async functions, `.await`, and `?` error propagation outside pipes +/// (§6 errors, §8.9 async). `.context(...)` is an ordinary method call. +async fn fetch(url: &str) -> Result { + let response = net::get(url).await?; + Ok(response) +} + +pub async fn retry(url: &str) -> Result { + let first = fetch(url).await; + let second = fetch(url).await?; + Ok(second) +} + +fn propagate(input: &str) -> Result { + let n = parse::(input)?; + let checked = validate(n).context("validating input")?; + Ok(checked) +} diff --git a/tests/corpus/attributes.sp b/tests/corpus/attributes.sp new file mode 100644 index 0000000..11c38ad --- /dev/null +++ b/tests/corpus/attributes.sp @@ -0,0 +1,41 @@ +/// Attribute shapes (§12, §16 attrs/attr_args): bare markers, derive lists, +/// named arguments, and actor-handler attributes. +@derive(Serialize, Clone, Debug) +struct Payload { + pub body: String, +} + +@error +enum AppError { + NotFound, + Denied, +} + +@overflow(wrapping) +fn wrap_add(a: u8, b: u8) -> u8 { + a + b +} + +@fast_math(contract, afn) +fn mix(a: f64, b: f64) -> f64 { + a * b + a +} + +@supervisor(strategy: "one_for_one", max_restarts: 5, window_secs: 60) +actor Sup { + children: i64, +} + +actor Mailer { + queued: i64, + + @mailbox(capacity: 2048) + pub fn enqueue(&mut self, n: i64) { + self.queued = self.queued + n; + } +} + +@test +fn smoke() { + let ok = true; +} diff --git a/tests/corpus/casts_literals.sp b/tests/corpus/casts_literals.sp new file mode 100644 index 0000000..d4569dd --- /dev/null +++ b/tests/corpus/casts_literals.sp @@ -0,0 +1,35 @@ +/// Numeric casts (§3.2 `as`, numeric-only) and the §16.1 literal zoo: based +/// integers, separators, suffixes, float exponents, string escapes, and +/// character literals. Lifetimes appear in generic params and reference types. +fn convert(n: i64, ratio: f64) -> u32 { + let small = n as i32; + let wide = small as i64; + let scaled = ratio as f32; + let back = scaled as f64; + let total = wide + n; + total as u32 +} + +fn literals() -> f64 { + let hex = 0xFF_FFu32; + let oct = 0o777; + let bin = 0b1010_1010u8; + let million = 1_000_000; + let big = 340_282_366_920_938u128; + let chain_cap = 115_792u256; + let pi = 3.14159f64; + let tiny = 2.5e-3; + let large = 1e10f64; + let greeting = "hi\n\t\\\"\x41\u{1F600}"; + let letter = 'a'; + let newline = '\n'; + let quote = '\''; + let escaped = '\u{41}'; + let yes = true; + let no = false; + 3.0 +} + +fn longest<'a>(a: &'a str, b: &'a str) -> &'a str { + a +} diff --git a/tests/corpus/expressions.sp b/tests/corpus/expressions.sp new file mode 100644 index 0000000..9485099 --- /dev/null +++ b/tests/corpus/expressions.sp @@ -0,0 +1,34 @@ +/// Struct literals (§5.1 incl. the parenthesized block-head escape and +/// shorthand field init), nesting, and the boolean/comparison operator set. +struct Point { + pub x: i64, + pub y: i64, +} + +struct Cfg { + pub on: bool, +} + +fn build(x: i64, y: i64) -> Point { + let origin = Point { x: 0, y: 0 }; + let shorthand = Point { x, y }; + let nested = Wrap { inner: Point { x: 1, y: 2 } }; + origin +} + +fn guarded() -> i64 { + if (Cfg { on: true }).on { + 1 + } else { + 2 + } +} + +fn logic(a: bool, b: bool, lo: i64, hi: i64) -> bool { + let both = a && b; + let either = a || b; + let cmp = lo <= hi; + let ne = lo != hi; + let modulo = hi % 7 == 0; + both && either && cmp && ne && modulo +} diff --git a/tests/corpus/extern_onchain.sp b/tests/corpus/extern_onchain.sp new file mode 100644 index 0000000..ebd3a3d --- /dev/null +++ b/tests/corpus/extern_onchain.sp @@ -0,0 +1,34 @@ +/// FFI and on-chain surfaces: extern "C" blocks (§4.9), string-target async +/// extern blocks, extern onchain mod declarations (§11.4a), a top-level +/// onchain enum, and an onchain mod with a storage block (§11.1a). +extern "C" { + pub fn puts(s: &str) -> i32; + fn c_open(path: &str, flags: i32) -> Result; +} + +extern "C" async { + fn poll_events(mask: u32) -> u64; +} + +extern onchain mod token { + pub fn balance_of(account: Address) -> Result; +} + +onchain enum TokenError { + InsufficientBalance, + Unauthorized, +} + +onchain mod vault { + storage { + balances: Map, + supply: u256, + } + + pub fn deposit(amount: u256) -> Result { + let caller = ctx::caller(); + let current = storage::get(&self.balances, caller)?; + storage::set(&mut self.balances, caller, current + amount); + Ok(current + amount) + } +} diff --git a/tests/corpus/modules_use.sp b/tests/corpus/modules_use.sp new file mode 100644 index 0000000..894d63c --- /dev/null +++ b/tests/corpus/modules_use.sp @@ -0,0 +1,23 @@ +/// Module trees and use declarations (§10): inline modules, file modules, +/// nested paths, brace imports, contextual `crate`/`super` heads, and +/// re-exports. +mod auth { + pub mod login; + pub mod token; + + pub fn is_enabled() -> bool { + true + } +} + +mod models; + +use std::collections::Map; +use crate::models::{User, Role}; +pub use crate::models::User; +use super::shared; +use crate::api; + +fn wire() -> bool { + true +}