diff --git a/crates/sploosh-ast/src/lib.rs b/crates/sploosh-ast/src/lib.rs index cbbba7c..6607d2b 100644 --- a/crates/sploosh-ast/src/lib.rs +++ b/crates/sploosh-ast/src/lib.rs @@ -73,6 +73,26 @@ pub enum ItemKind { #[derive(Debug, Clone, PartialEq)] pub struct Attribute { pub name: Ident, + pub args: Vec, + /// Covers `@` through the closing `)` (or the name, when there are no args). + pub span: Span, +} + +/// `attr_arg = IDENT [ ":" expr | "=" expr | "(" expr ")" ] | expr` (§16). +/// The `IDENT`-headed alternatives overlap with `expr`; the parser stores the +/// most specific form that matches. +#[derive(Debug, Clone, PartialEq)] +pub enum AttrArg { + /// Bare `IDENT` — `@derive(Debug)`, `@overflow(wrapping)`. + Ident(Ident), + /// `IDENT ":" expr` — `@mailbox(capacity: 2048)`. + Named { name: Ident, value: Expr }, + /// `IDENT "=" expr`. + Assigned { name: Ident, value: Expr }, + /// `IDENT "(" expr ")"`. + Call { name: Ident, arg: Expr }, + /// Any other bare expression. + Expr(Expr), } #[derive(Debug, Clone, PartialEq)] @@ -136,7 +156,16 @@ pub enum VariantKind { pub struct Actor { pub name: Ident, pub fields: Vec, - pub handlers: Vec, + pub handlers: Vec, +} + +/// A handler is a `fn_def` inside an `actor` body (§16), so it carries its +/// own attributes (`@mailbox(capacity: N)`, ...). Item-position `fn` attrs +/// stay hoisted on `Item.attrs` during bootstrap. +#[derive(Debug, Clone, PartialEq)] +pub struct Handler { + pub attrs: Vec, + pub function: Function, } #[derive(Debug, Clone, PartialEq)] @@ -239,6 +268,100 @@ pub enum Stmt { Continue, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UnaryOp { + /// `!` + Not, + /// `-` + Neg, + /// `*` + Deref, + /// `&` + Ref, +} + +impl UnaryOp { + pub fn as_str(self) -> &'static str { + match self { + Self::Not => "!", + Self::Neg => "-", + Self::Deref => "*", + Self::Ref => "&", + } + } +} + +impl std::fmt::Display for UnaryOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinaryOp { + /// `|>` + Pipe, + /// `+` + Add, + /// `-` + Sub, + /// `*` + Mul, + /// `/` + Div, + /// `%` + Rem, + /// `==` + Eq, + /// `!=` + Ne, + /// `<` + Lt, + /// `>` + Gt, + /// `<=` + Le, + /// `>=` + Ge, + /// `&&` + And, + /// `||` + Or, + /// `..` + Range, + /// `..=` + RangeInclusive, +} + +impl BinaryOp { + pub fn as_str(self) -> &'static str { + match self { + Self::Pipe => "|>", + Self::Add => "+", + Self::Sub => "-", + Self::Mul => "*", + Self::Div => "/", + Self::Rem => "%", + Self::Eq => "==", + Self::Ne => "!=", + Self::Lt => "<", + Self::Gt => ">", + Self::Le => "<=", + Self::Ge => ">=", + Self::And => "&&", + Self::Or => "||", + Self::Range => "..", + Self::RangeInclusive => "..=", + } + } +} + +impl std::fmt::Display for BinaryOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + #[derive(Debug, Clone, PartialEq)] pub struct Expr { pub kind: ExprKind, @@ -263,11 +386,11 @@ pub enum ExprKind { index: Box, }, Unary { - op: String, + op: UnaryOp, expr: Box, }, Binary { - op: String, + op: BinaryOp, left: Box, right: Box, }, diff --git a/crates/sploosh-lexer/src/lib.rs b/crates/sploosh-lexer/src/lib.rs index e21e234..b8c0389 100644 --- a/crates/sploosh-lexer/src/lib.rs +++ b/crates/sploosh-lexer/src/lib.rs @@ -2,14 +2,21 @@ use sploosh_ast::Span; -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Token { pub kind: TokenKind, - pub lexeme: String, pub span: Span, } -#[derive(Debug, Clone, PartialEq, Eq)] +impl Token { + /// The token's source text, sliced from the file it was lexed from. + /// Tokens carry only spans; slicing on demand keeps lexing allocation-free. + pub fn text<'src>(&self, source: &'src str) -> &'src str { + &source[self.span.start..self.span.end] + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum TokenKind { Ident, Keyword(Keyword), @@ -148,6 +155,13 @@ pub fn is_contextual_keyword(text: &str) -> bool { ) } +/// §16.1 numeric suffixes, longest-first so prefix scanning never matches a +/// shorter suffix inside a longer one. Shared by suffix scanning and +/// separator validation. +const NUMERIC_SUFFIXES: [&str; 13] = [ + "i128", "u128", "u256", "i64", "u64", "f64", "i32", "u32", "f32", "i16", "u16", "i8", "u8", +]; + #[derive(Debug, Clone, PartialEq, Eq)] pub struct LexError { pub message: String, @@ -339,16 +353,12 @@ impl Lexer<'_> { } } - fn numeric_suffix(&mut self) -> Option { + fn numeric_suffix(&mut self) -> Option<&'static str> { let rest = &self.source[self.pos..]; - let suffixes = [ - "i128", "u128", "u256", "i64", "u64", "f64", "i32", "u32", "f32", "i16", "u16", "i8", - "u8", - ]; - for suffix in suffixes { + for suffix in NUMERIC_SUFFIXES { if rest.starts_with(suffix) { self.pos += suffix.len(); - return Some(suffix.to_string()); + return Some(suffix); } } None @@ -356,11 +366,7 @@ impl Lexer<'_> { fn validate_numeric_body(&mut self, start: usize, base: u8) { let text = &self.source[start..self.pos]; - let suffixes = [ - "i128", "u128", "u256", "i64", "u64", "f64", "i32", "u32", "f32", "i16", "u16", "i8", - "u8", - ]; - let body = suffixes + let body = NUMERIC_SUFFIXES .iter() .find_map(|suffix| text.strip_suffix(suffix)) .unwrap_or(text); @@ -560,7 +566,6 @@ impl Lexer<'_> { fn push(&mut self, kind: TokenKind, start: usize, end: usize) { self.tokens.push(Token { kind, - lexeme: self.source[start..end].to_string(), span: Span::new(start, end), }); } @@ -628,6 +633,23 @@ mod tests { assert_eq!(tokens[4].kind, TokenKind::IntLit); } + #[test] + fn every_numeric_suffix_lexes() { + for suffix in NUMERIC_SUFFIXES { + let source = format!("1{suffix}"); + let tokens = lex(&source).unwrap(); + assert_eq!(tokens.len(), 1, "{source}"); + let expected = if suffix.starts_with('f') { + TokenKind::FloatLit + } else { + TokenKind::IntLit + }; + assert_eq!(tokens[0].kind, expected, "{source}"); + assert_eq!(tokens[0].text(&source), source); + assert_eq!(tokens[0].span, Span::new(0, source.len())); + } + } + #[test] fn rejects_bad_numeric_separators() { let err = lex("1__2").unwrap_err(); diff --git a/crates/sploosh-parser/src/lib.rs b/crates/sploosh-parser/src/lib.rs index 3c3b9ca..50fc1f6 100644 --- a/crates/sploosh-parser/src/lib.rs +++ b/crates/sploosh-parser/src/lib.rs @@ -11,7 +11,7 @@ pub struct ParseError { pub fn parse_program(source: &str) -> Result> { let tokens = lex(source).map_err(lex_errors)?; - Parser::new(tokens).parse_program() + Parser::new(tokens, source).parse_program() } fn lex_errors(errors: Vec) -> Vec { @@ -24,8 +24,17 @@ fn lex_errors(errors: Vec) -> Vec { .collect() } -struct Parser { +/// Parser-internal classification of infix operators: `=` builds an +/// `ExprKind::Assign` node, everything else an `ExprKind::Binary`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Infix { + Assign, + Op(BinaryOp), +} + +struct Parser<'src> { tokens: Vec, + source: &'src str, pos: usize, errors: Vec, /// When set, a `struct_literal` may not be the outermost expression — the @@ -33,16 +42,22 @@ struct Parser { no_struct_literal: bool, } -impl Parser { - fn new(tokens: Vec) -> Self { +impl<'src> Parser<'src> { + fn new(tokens: Vec, source: &'src str) -> Self { Self { tokens, + source, pos: 0, errors: Vec::new(), no_struct_literal: false, } } + /// Source text for a token; tokens carry only spans (see `Token::text`). + fn text(&self, token: &Token) -> &'src str { + token.text(self.source) + } + fn parse_program(mut self) -> Result> { let mut items = Vec::new(); while !self.eof() { @@ -132,20 +147,56 @@ impl Parser { fn attrs(&mut self) -> Vec { let mut attrs = Vec::new(); - loop { - if self.eat(TokenKind::At).is_none() { - break; - } + while let Some(at) = self.eat(TokenKind::At) { if let Some(name) = self.ident() { + let mut args = Vec::new(); + let mut end = name.span.end; if self.eat(TokenKind::LParen).is_some() { - self.skip_balanced_after_open(TokenKind::LParen, TokenKind::RParen); + args = self.attr_args(); + end = match self.expect(TokenKind::RParen) { + Some(close) => close.span.end, + None => self.prev_span().end, + }; } - attrs.push(Attribute { name }); + attrs.push(Attribute { + name, + args, + span: Span::new(at.span.start, end), + }); } } attrs } + /// `attr_args = attr_arg { "," attr_arg }` (§16). + fn attr_args(&mut self) -> Vec { + let mut args = Vec::new(); + while !self.at(TokenKind::RParen) && !self.eof() { + match self.attr_arg() { + Some(arg) => args.push(arg), + None => self.recover_until(&[TokenKind::Comma, TokenKind::RParen]), + } + if self.eat(TokenKind::Comma).is_none() { + break; + } + } + args + } + + /// `attr_arg = IDENT [ ":" expr | "=" expr | "(" expr ")" ] | expr` (§16). + /// Only `IDENT ":"` needs lookahead — `:` cannot continue an expression. + /// The `=` and `(...)` alternatives are canonicalized out of the parsed + /// expression, since both are valid expression shapes themselves. + fn attr_arg(&mut self) -> Option { + if self.at(TokenKind::Ident) && self.peek_kind_at(1) == Some(TokenKind::Colon) { + let name = self.ident()?; + self.bump(); + let value = self.delimited_expr()?; + return Some(AttrArg::Named { name, value }); + } + Some(classify_attr_expr(self.delimited_expr()?)) + } + fn function_after_mods( &mut self, visibility: Visibility, @@ -304,7 +355,11 @@ impl Parser { let mut handlers = Vec::new(); while !self.at(TokenKind::RBrace) && !self.eof() { self.skip_doc_comments(); - let _attrs = self.attrs(); + // A handler is a `fn_def`, so its attrs (`@mailbox(...)`, ...) are + // preserved. Fields take no attrs in §16; anything parsed before a + // field is currently discarded, matching the item-position + // tolerance for attrs on kinds the grammar leaves bare. + let attrs = self.attrs(); let visibility = if self.eat_keyword(Keyword::Pub).is_some() { Visibility::Public } else { @@ -312,7 +367,8 @@ impl Parser { }; let is_async = self.eat_keyword(Keyword::Async).is_some(); if self.at_keyword(Keyword::Fn) { - handlers.push(self.function_after_mods(visibility, is_async, false, true)?); + let function = self.function_after_mods(visibility, is_async, false, true)?; + handlers.push(Handler { attrs, function }); } else { let name = self.ident()?; self.expect(TokenKind::Colon)?; @@ -477,7 +533,8 @@ impl Parser { fn extern_block(&mut self) -> Option { self.expect_keyword(Keyword::Extern)?; let target = if self.at(TokenKind::StringLit) { - self.bump().lexeme + let token = self.bump(); + self.text(&token).to_string() } else { self.expect_keyword(Keyword::Onchain)?; self.expect_keyword(Keyword::Mod)?; @@ -605,10 +662,7 @@ impl Parser { } else if self.eat_keyword(Keyword::Continue).is_some() { self.expect(TokenKind::Semi)?; statements.push(Stmt::Continue); - } else if self.at_ident_text("send") - && self - .peek_kind_at(1) - .is_some_and(|kind| can_begin_expr(&kind)) + } else if self.at_ident_text("send") && self.peek_kind_at(1).is_some_and(can_begin_expr) { // §2.7: `send` at statement head followed by any token that can // begin an expression always opens a send-statement; the operand @@ -728,21 +782,21 @@ impl Parser { }; continue; } - let Some((op, left_bp, right_bp)) = self.infix_binding_power() else { + let Some((infix, left_bp, right_bp)) = self.infix_binding_power() else { break; }; if left_bp < min_bp { break; } - let op_text = self.bump().lexeme; - if op == "|>" { + self.bump(); + if infix == Infix::Op(BinaryOp::Pipe) { // §16: the RHS of `|>` is a `pipe_stage`, not a precedence-climbed // expression. let stage = self.pipe_stage()?; let span = lhs.span.join(stage.span); lhs = Expr { kind: ExprKind::Binary { - op: op_text, + op: BinaryOp::Pipe, left: Box::new(lhs), right: Box::new(stage), }, @@ -761,31 +815,32 @@ impl Parser { } let rhs = self.expr(right_bp)?; let span = lhs.span.join(rhs.span); - lhs = if op == "=" { - // §16: only an `assign_target` may appear on the left side. - if !is_assign_target(&lhs) { - self.error_at(lhs.span, "invalid assignment target"); - } - Expr { - kind: ExprKind::Assign { - target: Box::new(lhs), - value: Box::new(rhs), - }, - span, + lhs = match infix { + Infix::Assign => { + // §16: only an `assign_target` may appear on the left side. + if !is_assign_target(&lhs) { + self.error_at(lhs.span, "invalid assignment target"); + } + Expr { + kind: ExprKind::Assign { + target: Box::new(lhs), + value: Box::new(rhs), + }, + span, + } } - } else { - Expr { + Infix::Op(op) => Expr { kind: ExprKind::Binary { - op: op_text, + op, left: Box::new(lhs), right: Box::new(rhs), }, span, - } + }, }; // The precedence table marks `..`/`..=` non-associative: a range // operand may not itself be an unparenthesized range. - if matches!(op, ".." | "..=") + if matches!(infix, Infix::Op(BinaryOp::Range | BinaryOp::RangeInclusive)) && matches!( self.peek_kind(), Some(TokenKind::DotDot | TokenKind::DotDotEq) @@ -822,15 +877,16 @@ impl Parser { } fn prefix(&mut self) -> Option { - let token = self.peek()?.clone(); + let token = *self.peek()?; match token.kind { TokenKind::IntLit | TokenKind::FloatLit | TokenKind::StringLit | TokenKind::CharLit => { self.bump(); + let text = self.text(&token).to_string(); let lit = match token.kind { - TokenKind::IntLit => Literal::Int(token.lexeme), - TokenKind::FloatLit => Literal::Float(token.lexeme), - TokenKind::StringLit => Literal::String(token.lexeme), - TokenKind::CharLit => Literal::Char(token.lexeme), + TokenKind::IntLit => Literal::Int(text), + TokenKind::FloatLit => Literal::Float(text), + TokenKind::StringLit => Literal::String(text), + TokenKind::CharLit => Literal::Char(text), _ => unreachable!(), }; Some(Expr { @@ -841,11 +897,14 @@ impl Parser { TokenKind::Keyword(Keyword::True | Keyword::False) => { self.bump(); Some(Expr { - kind: ExprKind::Literal(Literal::Bool(token.lexeme == "true")), + kind: ExprKind::Literal(Literal::Bool(matches!( + token.kind, + TokenKind::Keyword(Keyword::True) + ))), span: token.span, }) } - TokenKind::Ident if token.lexeme == "vec" => { + TokenKind::Ident if self.text(&token) == "vec" => { self.bump(); if self.eat(TokenKind::Bang).is_some() { // §16 `vec_literal`: `vec` "!" only ever binds to square @@ -885,7 +944,7 @@ impl Parser { } Some(Expr { kind: ExprKind::Path(Path { - segments: vec![token.lexeme], + segments: vec![self.text(&token).to_string()], span: token.span, }), span: token.span, @@ -908,8 +967,15 @@ impl Parser { }) } TokenKind::Bang | TokenKind::Minus | TokenKind::Star | TokenKind::Amp => { - let op = self.bump().lexeme; - if op == "&" { + let op = match token.kind { + TokenKind::Bang => UnaryOp::Not, + TokenKind::Minus => UnaryOp::Neg, + TokenKind::Star => UnaryOp::Deref, + TokenKind::Amp => UnaryOp::Ref, + _ => unreachable!(), + }; + self.bump(); + if op == UnaryOp::Ref { let _mutable = self.eat_ident_text("mut"); } let expr = self.expr(11)?; @@ -1042,7 +1108,7 @@ impl Parser { fn args(&mut self, close: TokenKind) -> Option> { let mut args = Vec::new(); - while !self.at(close.clone()) && !self.eof() { + while !self.at(close) && !self.eof() { args.push(self.delimited_expr()?); if self.eat(TokenKind::Comma).is_none() { break; @@ -1091,7 +1157,7 @@ impl Parser { | TokenKind::Keyword(Keyword::SelfType) | TokenKind::Keyword(Keyword::SelfValue) => { let token = self.bump(); - Some(Ident::new(token.lexeme, token.span)) + Some(Ident::new(self.text(&token), token.span)) } _ => { self.error_here("expected path segment"); @@ -1101,11 +1167,11 @@ impl Parser { } fn ident(&mut self) -> Option { - let token = self.peek()?.clone(); + let token = *self.peek()?; match token.kind { TokenKind::Ident => { self.bump(); - Some(Ident::new(token.lexeme, token.span)) + Some(Ident::new(self.text(&token), token.span)) } _ => { self.error_here("expected identifier"); @@ -1178,9 +1244,9 @@ impl Parser { fn skip_balanced(&mut self, open: TokenKind, close: TokenKind) { let mut depth = 0usize; while !self.eof() { - if self.at(open.clone()) { + if self.at(open) { depth += 1; - } else if self.at(close.clone()) { + } else if self.at(close) { if depth == 0 { self.bump(); break; @@ -1196,48 +1262,31 @@ impl Parser { } } - fn skip_balanced_after_open(&mut self, open: TokenKind, close: TokenKind) { - let mut depth = 1usize; - while !self.eof() { - if self.at(open.clone()) { - depth += 1; - } else if self.at(close.clone()) { - depth -= 1; - self.bump(); - if depth == 0 { - break; - } - continue; - } - self.bump(); - } - } - fn skip_doc_comments(&mut self) { while self.at(TokenKind::DocComment) { self.bump(); } } - fn infix_binding_power(&self) -> Option<(&'static str, u8, u8)> { + fn infix_binding_power(&self) -> Option<(Infix, u8, u8)> { Some(match self.peek_kind()? { - TokenKind::Eq => ("=", 2, 1), - TokenKind::PipeGt => ("|>", 8, 9), - TokenKind::Plus => ("+", 9, 10), - TokenKind::Minus => ("-", 9, 10), - TokenKind::Star => ("*", 10, 11), - TokenKind::Slash => ("/", 10, 11), - TokenKind::Percent => ("%", 10, 11), - TokenKind::EqEq => ("==", 6, 7), - TokenKind::Ne => ("!=", 6, 7), - TokenKind::Lt => ("<", 7, 8), - TokenKind::Gt => (">", 7, 8), - TokenKind::Le => ("<=", 7, 8), - TokenKind::Ge => (">=", 7, 8), - TokenKind::AmpAmp => ("&&", 5, 6), - TokenKind::PipePipe => ("||", 4, 5), - TokenKind::DotDot => ("..", 3, 4), - TokenKind::DotDotEq => ("..=", 3, 4), + TokenKind::Eq => (Infix::Assign, 2, 1), + TokenKind::PipeGt => (Infix::Op(BinaryOp::Pipe), 8, 9), + TokenKind::Plus => (Infix::Op(BinaryOp::Add), 9, 10), + TokenKind::Minus => (Infix::Op(BinaryOp::Sub), 9, 10), + TokenKind::Star => (Infix::Op(BinaryOp::Mul), 10, 11), + TokenKind::Slash => (Infix::Op(BinaryOp::Div), 10, 11), + TokenKind::Percent => (Infix::Op(BinaryOp::Rem), 10, 11), + TokenKind::EqEq => (Infix::Op(BinaryOp::Eq), 6, 7), + TokenKind::Ne => (Infix::Op(BinaryOp::Ne), 6, 7), + TokenKind::Lt => (Infix::Op(BinaryOp::Lt), 7, 8), + TokenKind::Gt => (Infix::Op(BinaryOp::Gt), 7, 8), + TokenKind::Le => (Infix::Op(BinaryOp::Le), 7, 8), + TokenKind::Ge => (Infix::Op(BinaryOp::Ge), 7, 8), + TokenKind::AmpAmp => (Infix::Op(BinaryOp::And), 5, 6), + TokenKind::PipePipe => (Infix::Op(BinaryOp::Or), 4, 5), + TokenKind::DotDot => (Infix::Op(BinaryOp::Range), 3, 4), + TokenKind::DotDotEq => (Infix::Op(BinaryOp::RangeInclusive), 3, 4), _ => return None, }) } @@ -1254,7 +1303,7 @@ impl Parser { } fn recover_until(&mut self, kinds: &[TokenKind]) { - while !self.eof() && !kinds.iter().any(|kind| self.at(kind.clone())) { + while !self.eof() && !kinds.iter().any(|kind| self.at(*kind)) { self.bump(); } } @@ -1276,7 +1325,7 @@ impl Parser { fn at_ident_text(&self, text: &str) -> bool { self.peek() - .is_some_and(|token| token.kind == TokenKind::Ident && token.lexeme == text) + .is_some_and(|token| token.kind == TokenKind::Ident && token.text(self.source) == text) } fn eat_ident_text(&mut self, text: &str) -> Option { @@ -1296,7 +1345,7 @@ impl Parser { } fn expect(&mut self, kind: TokenKind) -> Option { - self.eat(kind.clone()).or_else(|| { + self.eat(kind).or_else(|| { self.error_here(format!("expected `{kind:?}`")); None }) @@ -1311,13 +1360,11 @@ impl Parser { } fn peek_kind(&self) -> Option { - self.peek().map(|token| token.kind.clone()) + self.peek().map(|token| token.kind) } fn peek_kind_at(&self, offset: usize) -> Option { - self.tokens - .get(self.pos + offset) - .map(|token| token.kind.clone()) + self.tokens.get(self.pos + offset).map(|token| token.kind) } fn can_start_path_segment_at(&self, offset: usize) -> bool { @@ -1342,7 +1389,7 @@ impl Parser { } fn bump(&mut self) -> Token { - let token = self.tokens[self.pos].clone(); + let token = self.tokens[self.pos]; self.pos += 1; token } @@ -1366,6 +1413,63 @@ impl Parser { } } +/// Canonicalizes the overlapping `attr_arg` alternatives (§16): a bare +/// `IDENT`, `IDENT "=" expr`, and `IDENT "(" expr ")"` are all valid +/// expressions too, so they parse as expressions and the most specific attr +/// form is recovered from the shape afterwards. +fn classify_attr_expr(expr: Expr) -> AttrArg { + let span = expr.span; + match expr.kind { + ExprKind::Path(path) if is_attr_ident(&path) => { + let name = path.segments.into_iter().next().unwrap(); + AttrArg::Ident(Ident::new(name, path.span)) + } + ExprKind::Assign { target, value } => match target.kind { + ExprKind::Path(path) if is_attr_ident(&path) => { + let name = path.segments.into_iter().next().unwrap(); + AttrArg::Assigned { + name: Ident::new(name, path.span), + value: *value, + } + } + kind => AttrArg::Expr(Expr { + kind: ExprKind::Assign { + target: Box::new(Expr { + kind, + span: target.span, + }), + value, + }, + span, + }), + }, + ExprKind::Call { + callee, + type_args, + mut args, + } if type_args.is_empty() && args.len() == 1 && is_attr_ident_expr(&callee) => { + let ExprKind::Path(path) = callee.kind else { + unreachable!(); + }; + let name = path.segments.into_iter().next().unwrap(); + AttrArg::Call { + name: Ident::new(name, path.span), + arg: args.pop().unwrap(), + } + } + kind => AttrArg::Expr(Expr { kind, span }), + } +} + +/// A single plain identifier — `self`/`Self` are keywords, not `IDENT` (§16.1). +fn is_attr_ident(path: &Path) -> bool { + path.segments.len() == 1 && path.segments[0] != "self" && path.segments[0] != "Self" +} + +fn is_attr_ident_expr(expr: &Expr) -> bool { + matches!(&expr.kind, ExprKind::Path(path) if is_attr_ident(path)) +} + /// The §2.7 send-statement operand shape: `handle.method(args)` — a call whose /// callee is a field access. fn is_method_call(expr: &Expr) -> bool { @@ -1384,13 +1488,13 @@ fn is_assign_target(expr: &Expr) -> bool { path.segments.len() == 1 && path.segments[0] != "self" && path.segments[0] != "Self" } ExprKind::Field { .. } | ExprKind::Index { .. } => true, - ExprKind::Unary { op, .. } => op == "*", + ExprKind::Unary { op, .. } => *op == UnaryOp::Deref, _ => false, } } /// Tokens that can begin an expression — must stay in sync with `prefix()`. -fn can_begin_expr(kind: &TokenKind) -> bool { +fn can_begin_expr(kind: TokenKind) -> bool { matches!( kind, TokenKind::IntLit @@ -1474,6 +1578,122 @@ mod tests { assert!(parse_program(source).is_ok()); } + #[test] + fn attribute_arguments_are_preserved() { + let source = "@derive(Debug, Eq)\nstruct User { id: u64 }"; + let program = parse_program(source).unwrap(); + let attr = &program.items[0].attrs[0]; + assert_eq!(attr.name.name, "derive"); + // Attribute span covers `@derive(Debug, Eq)`. + assert_eq!(attr.span, Span::new(0, source.find('\n').unwrap())); + let names: Vec<_> = attr + .args + .iter() + .map(|arg| { + let AttrArg::Ident(ident) = arg else { + panic!("expected bare ident arg, got {arg:?}"); + }; + ident.name.as_str() + }) + .collect(); + assert_eq!(names, ["Debug", "Eq"]); + } + + #[test] + fn attribute_named_args_are_preserved() { + let source = r#" + @supervisor(strategy: "one_for_one", max_restarts: 5) + struct Sup { x: i64 } + "#; + let program = parse_program(source).unwrap(); + let attr = &program.items[0].attrs[0]; + assert_eq!(attr.name.name, "supervisor"); + assert_eq!(attr.args.len(), 2); + let AttrArg::Named { name, value } = &attr.args[0] else { + panic!("expected named arg, got {:?}", attr.args[0]); + }; + assert_eq!(name.name, "strategy"); + assert!(matches!( + &value.kind, + ExprKind::Literal(Literal::String(text)) if text == "\"one_for_one\"" + )); + let AttrArg::Named { name, value } = &attr.args[1] else { + panic!("expected named arg, got {:?}", attr.args[1]); + }; + assert_eq!(name.name, "max_restarts"); + assert!(matches!( + &value.kind, + ExprKind::Literal(Literal::Int(text)) if text == "5" + )); + } + + #[test] + fn attribute_assigned_call_and_expr_args_are_preserved() { + let source = "@cfg(target = evm, feature(fast), CAP + 1)\nstruct S { x: i64 }"; + let program = parse_program(source).unwrap(); + let attr = &program.items[0].attrs[0]; + assert_eq!(attr.args.len(), 3); + let AttrArg::Assigned { name, value } = &attr.args[0] else { + panic!("expected assigned arg, got {:?}", attr.args[0]); + }; + assert_eq!(name.name, "target"); + assert!(matches!(&value.kind, ExprKind::Path(path) if path.segments == ["evm"])); + let AttrArg::Call { name, arg } = &attr.args[1] else { + panic!("expected call arg, got {:?}", attr.args[1]); + }; + assert_eq!(name.name, "feature"); + assert!(matches!(&arg.kind, ExprKind::Path(path) if path.segments == ["fast"])); + let AttrArg::Expr(expr) = &attr.args[2] else { + panic!("expected expr arg, got {:?}", attr.args[2]); + }; + assert!(matches!( + &expr.kind, + ExprKind::Binary { + op: BinaryOp::Add, + .. + } + )); + } + + #[test] + fn bare_attribute_has_no_args_and_name_span() { + let program = parse_program("@test\nfn t() {}").unwrap(); + let attr = &program.items[0].attrs[0]; + assert_eq!(attr.name.name, "test"); + assert!(attr.args.is_empty()); + assert_eq!(attr.span, Span::new(0, 5)); + } + + #[test] + fn actor_handler_attributes_are_preserved() { + let source = r#" + actor Worker { + state: i64, + @mailbox(capacity: 2048) + pub fn run(&mut self, n: i64) {} + } + "#; + let program = parse_program(source).unwrap(); + let ItemKind::Actor(actor) = &program.items[0].kind else { + panic!("expected actor"); + }; + let handler = &actor.handlers[0]; + assert_eq!(handler.function.name.name, "run"); + assert_eq!(handler.attrs.len(), 1); + let attr = &handler.attrs[0]; + assert_eq!(attr.name.name, "mailbox"); + // Span anchors to the `@` in the original source. + assert_eq!(attr.span.start, source.find('@').unwrap()); + let AttrArg::Named { name, value } = &attr.args[0] else { + panic!("expected named arg, got {:?}", attr.args[0]); + }; + assert_eq!(name.name, "capacity"); + assert!(matches!( + &value.kind, + ExprKind::Literal(Literal::Int(text)) if text == "2048" + )); + } + #[test] fn declaration_names_cannot_be_reserved_keywords() { let errors = parse_program("fn self() {}").unwrap_err(); @@ -1509,7 +1729,7 @@ mod tests { let ItemKind::Actor(actor) = &program.items[0].kind else { panic!("expected actor"); }; - assert_eq!(actor.handlers[0].visibility, Visibility::Public); + assert_eq!(actor.handlers[0].function.visibility, Visibility::Public); let ItemKind::ExternBlock(extern_block) = &program.items[1].kind else { panic!("expected extern block"); }; @@ -1557,7 +1777,7 @@ mod tests { let ExprKind::Binary { op, .. } = &tail.kind else { panic!("expected binary expression"); }; - assert_eq!(op, "/"); + assert_eq!(*op, BinaryOp::Div); } #[test] @@ -1780,7 +2000,7 @@ mod tests { let ExprKind::Binary { op, left, right } = &inner.kind else { panic!("expected pipe binary inside ErrorProp"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); assert!(path_named(left, "input")); assert!(path_named(right, "parse")); } @@ -1795,7 +2015,7 @@ mod tests { let ExprKind::Binary { op, left, right } = &outer.kind else { panic!("expected outer pipe binary"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); assert!(path_named(right, "g")); let ExprKind::ErrorProp(mid) = &left.kind else { panic!("expected inner ErrorProp"); @@ -1803,7 +2023,7 @@ mod tests { let ExprKind::Binary { op, left, right } = &mid.kind else { panic!("expected inner pipe binary"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); assert!(path_named(left, "a")); assert!(path_named(right, "f")); } @@ -1817,7 +2037,7 @@ mod tests { let ExprKind::Binary { op, right, .. } = &inner.kind else { panic!("expected pipe binary"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); let ExprKind::Call { callee, args, .. } = &right.kind else { panic!("expected call stage"); }; @@ -1835,7 +2055,7 @@ mod tests { let ExprKind::Binary { op, right, .. } = &inner.kind else { panic!("expected pipe binary"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); let ExprKind::Field { base, name } = &right.kind else { panic!("expected field-chain stage"); }; @@ -1873,12 +2093,12 @@ mod tests { let ExprKind::Binary { op, left, right } = &value.kind else { panic!("expected `+` at the top"); }; - assert_eq!(op, "+"); + assert_eq!(*op, BinaryOp::Add); assert!(path_named(right, "b")); let ExprKind::Binary { op, left, right } = &left.kind else { panic!("expected pipe binary on the left"); }; - assert_eq!(op, "|>"); + assert_eq!(*op, BinaryOp::Pipe); assert!(path_named(left, "x")); assert!(path_named(right, "a")); } diff --git a/crates/sploosh-parser/tests/corpus.rs b/crates/sploosh-parser/tests/corpus.rs index 78ecba6..95508ea 100644 --- a/crates/sploosh-parser/tests/corpus.rs +++ b/crates/sploosh-parser/tests/corpus.rs @@ -1,22 +1,25 @@ use sploosh_parser::parse_program; +/// Parses every `.sp` fixture in `tests/corpus/` at the repo root. Fixtures +/// are discovered, not listed, so a new file cannot be silently skipped +/// (crates/AGENTS.md: corpus tests for every accepted grammar shape). #[test] fn parses_corpus_files() { - for path in [ - "tests/corpus/basic.sp", - "tests/corpus/actor.sp", - "tests/corpus/control_flow.sp", - "tests/corpus/traits_impls.sp", - "tests/corpus/pipes.sp", - "tests/corpus/send_assign.sp", - "tests/corpus/ranges_modifiers.sp", - ] { - let path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .join("../..") - .join(path); - let source = std::fs::read_to_string(&path).unwrap_or_else(|err| { - panic!("{}: {err}", path.display()); - }); + let corpus_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../tests/corpus"); + let mut paths: Vec<_> = std::fs::read_dir(&corpus_dir) + .unwrap_or_else(|err| panic!("{}: {err}", corpus_dir.display())) + .map(|entry| entry.expect("corpus dir entry").path()) + .filter(|path| path.extension().is_some_and(|ext| ext == "sp")) + .collect(); + paths.sort(); + assert!( + !paths.is_empty(), + "no .sp fixtures found in {}", + corpus_dir.display() + ); + for path in paths { + let source = std::fs::read_to_string(&path) + .unwrap_or_else(|err| panic!("{}: {err}", path.display())); parse_program(&source).unwrap_or_else(|errors| panic!("{}: {errors:#?}", path.display())); } } diff --git a/tests/corpus/async_await.sp b/tests/corpus/async_await.sp new file mode 100644 index 0000000..838a243 --- /dev/null +++ b/tests/corpus/async_await.sp @@ -0,0 +1,18 @@ +/// Async functions, `.await`, and `?` error propagation outside pipes +/// (§6 errors, §8.9 async). `.context(...)` is an ordinary method call. +async fn fetch(url: &str) -> Result { + let response = net::get(url).await?; + Ok(response) +} + +pub async fn retry(url: &str) -> Result { + let first = fetch(url).await; + let second = fetch(url).await?; + Ok(second) +} + +fn propagate(input: &str) -> Result { + let n = parse::(input)?; + let checked = validate(n).context("validating input")?; + Ok(checked) +} diff --git a/tests/corpus/attributes.sp b/tests/corpus/attributes.sp new file mode 100644 index 0000000..11c38ad --- /dev/null +++ b/tests/corpus/attributes.sp @@ -0,0 +1,41 @@ +/// Attribute shapes (§12, §16 attrs/attr_args): bare markers, derive lists, +/// named arguments, and actor-handler attributes. +@derive(Serialize, Clone, Debug) +struct Payload { + pub body: String, +} + +@error +enum AppError { + NotFound, + Denied, +} + +@overflow(wrapping) +fn wrap_add(a: u8, b: u8) -> u8 { + a + b +} + +@fast_math(contract, afn) +fn mix(a: f64, b: f64) -> f64 { + a * b + a +} + +@supervisor(strategy: "one_for_one", max_restarts: 5, window_secs: 60) +actor Sup { + children: i64, +} + +actor Mailer { + queued: i64, + + @mailbox(capacity: 2048) + pub fn enqueue(&mut self, n: i64) { + self.queued = self.queued + n; + } +} + +@test +fn smoke() { + let ok = true; +} diff --git a/tests/corpus/casts_literals.sp b/tests/corpus/casts_literals.sp new file mode 100644 index 0000000..d4569dd --- /dev/null +++ b/tests/corpus/casts_literals.sp @@ -0,0 +1,35 @@ +/// Numeric casts (§3.2 `as`, numeric-only) and the §16.1 literal zoo: based +/// integers, separators, suffixes, float exponents, string escapes, and +/// character literals. Lifetimes appear in generic params and reference types. +fn convert(n: i64, ratio: f64) -> u32 { + let small = n as i32; + let wide = small as i64; + let scaled = ratio as f32; + let back = scaled as f64; + let total = wide + n; + total as u32 +} + +fn literals() -> f64 { + let hex = 0xFF_FFu32; + let oct = 0o777; + let bin = 0b1010_1010u8; + let million = 1_000_000; + let big = 340_282_366_920_938u128; + let chain_cap = 115_792u256; + let pi = 3.14159f64; + let tiny = 2.5e-3; + let large = 1e10f64; + let greeting = "hi\n\t\\\"\x41\u{1F600}"; + let letter = 'a'; + let newline = '\n'; + let quote = '\''; + let escaped = '\u{41}'; + let yes = true; + let no = false; + 3.0 +} + +fn longest<'a>(a: &'a str, b: &'a str) -> &'a str { + a +} diff --git a/tests/corpus/expressions.sp b/tests/corpus/expressions.sp new file mode 100644 index 0000000..9485099 --- /dev/null +++ b/tests/corpus/expressions.sp @@ -0,0 +1,34 @@ +/// Struct literals (§5.1 incl. the parenthesized block-head escape and +/// shorthand field init), nesting, and the boolean/comparison operator set. +struct Point { + pub x: i64, + pub y: i64, +} + +struct Cfg { + pub on: bool, +} + +fn build(x: i64, y: i64) -> Point { + let origin = Point { x: 0, y: 0 }; + let shorthand = Point { x, y }; + let nested = Wrap { inner: Point { x: 1, y: 2 } }; + origin +} + +fn guarded() -> i64 { + if (Cfg { on: true }).on { + 1 + } else { + 2 + } +} + +fn logic(a: bool, b: bool, lo: i64, hi: i64) -> bool { + let both = a && b; + let either = a || b; + let cmp = lo <= hi; + let ne = lo != hi; + let modulo = hi % 7 == 0; + both && either && cmp && ne && modulo +} diff --git a/tests/corpus/extern_onchain.sp b/tests/corpus/extern_onchain.sp new file mode 100644 index 0000000..ebd3a3d --- /dev/null +++ b/tests/corpus/extern_onchain.sp @@ -0,0 +1,34 @@ +/// FFI and on-chain surfaces: extern "C" blocks (§4.9), string-target async +/// extern blocks, extern onchain mod declarations (§11.4a), a top-level +/// onchain enum, and an onchain mod with a storage block (§11.1a). +extern "C" { + pub fn puts(s: &str) -> i32; + fn c_open(path: &str, flags: i32) -> Result; +} + +extern "C" async { + fn poll_events(mask: u32) -> u64; +} + +extern onchain mod token { + pub fn balance_of(account: Address) -> Result; +} + +onchain enum TokenError { + InsufficientBalance, + Unauthorized, +} + +onchain mod vault { + storage { + balances: Map, + supply: u256, + } + + pub fn deposit(amount: u256) -> Result { + let caller = ctx::caller(); + let current = storage::get(&self.balances, caller)?; + storage::set(&mut self.balances, caller, current + amount); + Ok(current + amount) + } +} diff --git a/tests/corpus/modules_use.sp b/tests/corpus/modules_use.sp new file mode 100644 index 0000000..894d63c --- /dev/null +++ b/tests/corpus/modules_use.sp @@ -0,0 +1,23 @@ +/// Module trees and use declarations (§10): inline modules, file modules, +/// nested paths, brace imports, contextual `crate`/`super` heads, and +/// re-exports. +mod auth { + pub mod login; + pub mod token; + + pub fn is_enabled() -> bool { + true + } +} + +mod models; + +use std::collections::Map; +use crate::models::{User, Role}; +pub use crate::models::User; +use super::shared; +use crate::api; + +fn wire() -> bool { + true +}