From c1c50a448ebf8fd2fcbdd4766905083501e7f47e Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Thu, 21 May 2026 13:54:19 +0200 Subject: [PATCH] fix(parser): allow underscores in identifier modify the lexer to allow underscores in an identifier, but keep scanning single underscores as a specific underscore token --- lexer/midas.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lexer/midas.py b/lexer/midas.py index be3bfdf..054f91d 100644 --- a/lexer/midas.py +++ b/lexer/midas.py @@ -43,7 +43,7 @@ class MidasLexer(Lexer): self.add_token(TokenType.QMARK) # case ",": # self.add_token(TokenType.COMMA) - case "_": + case "_" if not self.is_identifier_char(self.peek_next(), start=False): self.add_token(TokenType.UNDERSCORE) case "-" if self.match(">"): self.add_token(TokenType.ARROW) @@ -71,7 +71,7 @@ class MidasLexer(Lexer): case _: if char.isdigit(): self.scan_number() - elif char.isalpha(): + elif self.is_identifier_char(char, start=True): self.scan_identifier() else: self.error("Unexpected character") @@ -100,7 +100,7 @@ class MidasLexer(Lexer): An identifier starts with a letter, followed by any number of alphanumerical characters or underscores """ - while self.peek().isalnum() or self.peek() == "_": + while self.is_identifier_char(self.peek(), start=False): self.advance() lexeme: str = self.source[self.start : self.idx] @@ -131,3 +131,12 @@ class MidasLexer(Lexer): if not self.is_at_end(): self.advance() self.add_token(TokenType.COMMENT) + + def is_identifier_char(self, char: str, *, start: bool) -> bool: + if char == "_": + return True + if char.isalpha(): + return True + if not start and char.isdigit(): + return True + return False