From e31a214bec69d29f28e1d399fac3f17b919c8b95 Mon Sep 17 00:00:00 2001 From: StreamDemon Date: Thu, 2 Jul 2026 19:00:17 +0800 Subject: [PATCH] Dedupe the lexer numeric-suffix list `numeric_suffix` and `validate_numeric_body` each carried their own copy of the 13-entry suffix table, so any future suffix change had to be made twice or the two paths would drift apart. Hoist the table into a shared `NUMERIC_SUFFIXES` const and return the matched `&'static str` instead of allocating a fresh `String` on every suffix scan. A new test iterates the shared table and lexes `1` for every entry, so additions to the list are covered automatically. --- crates/sploosh-lexer/src/lib.rs | 40 +++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/crates/sploosh-lexer/src/lib.rs b/crates/sploosh-lexer/src/lib.rs index e21e234..1ad0525 100644 --- a/crates/sploosh-lexer/src/lib.rs +++ b/crates/sploosh-lexer/src/lib.rs @@ -148,6 +148,13 @@ pub fn is_contextual_keyword(text: &str) -> bool { ) } +/// ยง16.1 numeric suffixes, longest-first so prefix scanning never matches a +/// shorter suffix inside a longer one. Shared by suffix scanning and +/// separator validation. +const NUMERIC_SUFFIXES: [&str; 13] = [ + "i128", "u128", "u256", "i64", "u64", "f64", "i32", "u32", "f32", "i16", "u16", "i8", "u8", +]; + #[derive(Debug, Clone, PartialEq, Eq)] pub struct LexError { pub message: String, @@ -339,16 +346,12 @@ impl Lexer<'_> { } } - fn numeric_suffix(&mut self) -> Option { + fn numeric_suffix(&mut self) -> Option<&'static str> { let rest = &self.source[self.pos..]; - let suffixes = [ - "i128", "u128", "u256", "i64", "u64", "f64", "i32", "u32", "f32", "i16", "u16", "i8", - "u8", - ]; - for suffix in suffixes { + for suffix in NUMERIC_SUFFIXES { if rest.starts_with(suffix) { self.pos += suffix.len(); - return Some(suffix.to_string()); + return Some(suffix); } } None @@ -356,11 +359,7 @@ impl Lexer<'_> { fn validate_numeric_body(&mut self, start: usize, base: u8) { let text = &self.source[start..self.pos]; - let suffixes = [ - "i128", "u128", "u256", "i64", "u64", "f64", "i32", "u32", "f32", "i16", "u16", "i8", - "u8", - ]; - let body = suffixes + let body = NUMERIC_SUFFIXES .iter() .find_map(|suffix| text.strip_suffix(suffix)) .unwrap_or(text); @@ -628,6 +627,23 @@ mod tests { assert_eq!(tokens[4].kind, TokenKind::IntLit); } + #[test] + fn every_numeric_suffix_lexes() { + for suffix in NUMERIC_SUFFIXES { + let source = format!("1{suffix}"); + let tokens = lex(&source).unwrap(); + assert_eq!(tokens.len(), 1, "{source}"); + let expected = if suffix.starts_with('f') { + TokenKind::FloatLit + } else { + TokenKind::IntLit + }; + assert_eq!(tokens[0].kind, expected, "{source}"); + assert_eq!(tokens[0].lexeme, source); + assert_eq!(tokens[0].span, Span::new(0, source.len())); + } + } + #[test] fn rejects_bad_numeric_separators() { let err = lex("1__2").unwrap_err();