fix(parser): allow underscores in identifier

modify the lexer to allow underscores in an identifier, but keep scanning single underscores as a specific underscore token
This commit is contained in:
2026-05-21 13:54:19 +02:00
parent 19229db0b1
commit c1c50a448e

View File

@@ -43,7 +43,7 @@ class MidasLexer(Lexer):
self.add_token(TokenType.QMARK) self.add_token(TokenType.QMARK)
# case ",": # case ",":
# self.add_token(TokenType.COMMA) # self.add_token(TokenType.COMMA)
case "_": case "_" if not self.is_identifier_char(self.peek_next(), start=False):
self.add_token(TokenType.UNDERSCORE) self.add_token(TokenType.UNDERSCORE)
case "-" if self.match(">"): case "-" if self.match(">"):
self.add_token(TokenType.ARROW) self.add_token(TokenType.ARROW)
@@ -71,7 +71,7 @@ class MidasLexer(Lexer):
case _: case _:
if char.isdigit(): if char.isdigit():
self.scan_number() self.scan_number()
elif char.isalpha(): elif self.is_identifier_char(char, start=True):
self.scan_identifier() self.scan_identifier()
else: else:
self.error("Unexpected character") self.error("Unexpected character")
@@ -100,7 +100,7 @@ class MidasLexer(Lexer):
An identifier starts with a letter, followed by any number of An identifier starts with a letter, followed by any number of
alphanumerical characters or underscores alphanumerical characters or underscores
""" """
while self.peek().isalnum() or self.peek() == "_": while self.is_identifier_char(self.peek(), start=False):
self.advance() self.advance()
lexeme: str = self.source[self.start : self.idx] lexeme: str = self.source[self.start : self.idx]
@@ -131,3 +131,12 @@ class MidasLexer(Lexer):
if not self.is_at_end(): if not self.is_at_end():
self.advance() self.advance()
self.add_token(TokenType.COMMENT) self.add_token(TokenType.COMMENT)
def is_identifier_char(self, char: str, *, start: bool) -> bool:
if char == "_":
return True
if char.isalpha():
return True
if not start and char.isdigit():
return True
return False