Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions crates/sploosh-lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,17 @@ use sploosh_ast::Span;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token {
pub kind: TokenKind,
pub lexeme: String,
pub span: Span,
}

impl Token {
/// The token's source text, sliced from the file it was lexed from.
/// Tokens carry only spans; slicing on demand keeps lexing allocation-free.
pub fn text<'src>(&self, source: &'src str) -> &'src str {
&source[self.span.start..self.span.end]
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TokenKind {
Ident,
Expand Down Expand Up @@ -559,7 +566,6 @@ impl Lexer<'_> {
fn push(&mut self, kind: TokenKind, start: usize, end: usize) {
self.tokens.push(Token {
kind,
lexeme: self.source[start..end].to_string(),
span: Span::new(start, end),
});
}
Expand Down Expand Up @@ -639,7 +645,7 @@ mod tests {
TokenKind::IntLit
};
assert_eq!(tokens[0].kind, expected, "{source}");
assert_eq!(tokens[0].lexeme, source);
assert_eq!(tokens[0].text(&source), source);
assert_eq!(tokens[0].span, Span::new(0, source.len()));
}
}
Expand Down
59 changes: 39 additions & 20 deletions crates/sploosh-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub struct ParseError {

pub fn parse_program(source: &str) -> Result<Program, Vec<ParseError>> {
let tokens = lex(source).map_err(lex_errors)?;
Parser::new(tokens).parse_program()
Parser::new(tokens, source).parse_program()
}

fn lex_errors(errors: Vec<LexError>) -> Vec<ParseError> {
Expand All @@ -24,25 +24,32 @@ fn lex_errors(errors: Vec<LexError>) -> Vec<ParseError> {
.collect()
}

struct Parser {
struct Parser<'src> {
tokens: Vec<Token>,
source: &'src str,
pos: usize,
errors: Vec<ParseError>,
/// When set, a `struct_literal` may not be the outermost expression — the
/// block-head restriction (§5.1, §5.2; §16 `struct_literal` side condition).
no_struct_literal: bool,
}

impl Parser {
fn new(tokens: Vec<Token>) -> Self {
impl<'src> Parser<'src> {
fn new(tokens: Vec<Token>, source: &'src str) -> Self {
Self {
tokens,
source,
pos: 0,
errors: Vec::new(),
no_struct_literal: false,
}
}

/// Source text for a token; tokens carry only spans (see `Token::text`).
fn text(&self, token: &Token) -> &'src str {
token.text(self.source)
}

fn parse_program(mut self) -> Result<Program, Vec<ParseError>> {
let mut items = Vec::new();
while !self.eof() {
Expand Down Expand Up @@ -477,7 +484,8 @@ impl Parser {
fn extern_block(&mut self) -> Option<ExternBlock> {
self.expect_keyword(Keyword::Extern)?;
let target = if self.at(TokenKind::StringLit) {
self.bump().lexeme
let token = self.bump();
self.text(&token).to_string()
} else {
self.expect_keyword(Keyword::Onchain)?;
self.expect_keyword(Keyword::Mod)?;
Expand Down Expand Up @@ -734,15 +742,15 @@ impl Parser {
if left_bp < min_bp {
break;
}
let op_text = self.bump().lexeme;
self.bump();
if op == "|>" {
// §16: the RHS of `|>` is a `pipe_stage`, not a precedence-climbed
// expression.
let stage = self.pipe_stage()?;
let span = lhs.span.join(stage.span);
lhs = Expr {
kind: ExprKind::Binary {
op: op_text,
op: op.to_string(),
left: Box::new(lhs),
right: Box::new(stage),
},
Expand Down Expand Up @@ -776,7 +784,7 @@ impl Parser {
} else {
Expr {
kind: ExprKind::Binary {
op: op_text,
op: op.to_string(),
left: Box::new(lhs),
right: Box::new(rhs),
},
Expand Down Expand Up @@ -826,11 +834,12 @@ impl Parser {
match token.kind {
TokenKind::IntLit | TokenKind::FloatLit | TokenKind::StringLit | TokenKind::CharLit => {
self.bump();
let text = self.text(&token).to_string();
let lit = match token.kind {
TokenKind::IntLit => Literal::Int(token.lexeme),
TokenKind::FloatLit => Literal::Float(token.lexeme),
TokenKind::StringLit => Literal::String(token.lexeme),
TokenKind::CharLit => Literal::Char(token.lexeme),
TokenKind::IntLit => Literal::Int(text),
TokenKind::FloatLit => Literal::Float(text),
TokenKind::StringLit => Literal::String(text),
TokenKind::CharLit => Literal::Char(text),
_ => unreachable!(),
};
Some(Expr {
Expand All @@ -841,11 +850,14 @@ impl Parser {
TokenKind::Keyword(Keyword::True | Keyword::False) => {
self.bump();
Some(Expr {
kind: ExprKind::Literal(Literal::Bool(token.lexeme == "true")),
kind: ExprKind::Literal(Literal::Bool(matches!(
token.kind,
TokenKind::Keyword(Keyword::True)
))),
span: token.span,
})
}
TokenKind::Ident if token.lexeme == "vec" => {
TokenKind::Ident if self.text(&token) == "vec" => {
self.bump();
if self.eat(TokenKind::Bang).is_some() {
// §16 `vec_literal`: `vec` "!" only ever binds to square
Expand Down Expand Up @@ -885,7 +897,7 @@ impl Parser {
}
Some(Expr {
kind: ExprKind::Path(Path {
segments: vec![token.lexeme],
segments: vec![self.text(&token).to_string()],
span: token.span,
}),
span: token.span,
Expand All @@ -908,15 +920,22 @@ impl Parser {
})
}
TokenKind::Bang | TokenKind::Minus | TokenKind::Star | TokenKind::Amp => {
let op = self.bump().lexeme;
let op = match token.kind {
TokenKind::Bang => "!",
TokenKind::Minus => "-",
TokenKind::Star => "*",
TokenKind::Amp => "&",
_ => unreachable!(),
};
self.bump();
if op == "&" {
let _mutable = self.eat_ident_text("mut");
}
let expr = self.expr(11)?;
let span = token.span.join(expr.span);
Some(Expr {
kind: ExprKind::Unary {
op,
op: op.to_string(),
expr: Box::new(expr),
},
span,
Expand Down Expand Up @@ -1091,7 +1110,7 @@ impl Parser {
| TokenKind::Keyword(Keyword::SelfType)
| TokenKind::Keyword(Keyword::SelfValue) => {
let token = self.bump();
Some(Ident::new(token.lexeme, token.span))
Some(Ident::new(self.text(&token), token.span))
}
_ => {
self.error_here("expected path segment");
Expand All @@ -1105,7 +1124,7 @@ impl Parser {
match token.kind {
TokenKind::Ident => {
self.bump();
Some(Ident::new(token.lexeme, token.span))
Some(Ident::new(self.text(&token), token.span))
}
_ => {
self.error_here("expected identifier");
Expand Down Expand Up @@ -1276,7 +1295,7 @@ impl Parser {

fn at_ident_text(&self, text: &str) -> bool {
self.peek()
.is_some_and(|token| token.kind == TokenKind::Ident && token.lexeme == text)
.is_some_and(|token| token.kind == TokenKind::Ident && token.text(self.source) == text)
}

fn eat_ident_text(&mut self, text: &str) -> Option<Token> {
Expand Down