feat(parser): add base parser class

the parser was adapted from another project (see docstring on the Parser class)
2026-05-13 22:40:27 +02:00
parent cc4b5dabf2
commit 8252f452f2
2 changed files with 165 additions and 0 deletions
@@ -0,0 +1,163 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Generic, TypeVar
+
+from lexer.token import Token, TokenType
+from parser.errors import ParsingError
+
+
+@dataclass(frozen=True)
+class TokenError:
+    """A parsing error linked to a particular token"""
+
+    token: Token
+    message: str
+
+    def get_report(self) -> str:
+        """Get a detailed error message
+
+        Returns:
+            str: the complete error message
+        """
+        where: str = f"'{self.token.lexeme}'"
+        if self.token.type == TokenType.EOF:
+            where = "end"
+        return f"({self.token.position}) Error at {where}: {self.message}"
+
+
+T = TypeVar("T")
+
+
+class Parser(ABC, Generic[T]):
+    """An abstract parser which provides methods to easily extend it into a concrete one
+
+    This implementation is based on the [_Crafting Interpreters_][1] book by Robert Nystrom,
+    more specifically on my [previous Python implementation](https://git.kb28.ch/HEL/pebble)
+
+    [1]: https://craftinginterpreters.com/
+    """
+
+    IGNORE: set[TokenType] = {
+        TokenType.WHITESPACE,
+        TokenType.COMMENT,
+        TokenType.NEWLINE,
+    }
+
+    def __init__(self, tokens: list[Token]) -> None:
+        """Create a new parser to parse the given tokens
+
+        Args:
+            tokens (list[Token]): the tokens to parse
+        """
+        self.tokens: list[Token] = list(
+            filter(lambda t: t.type not in self.IGNORE, tokens)
+        )
+        self.current: int = 0
+        self.length: int = len(self.tokens)
+        self.errors: list[TokenError]
+
+    def error(self, token: Token, message: str):
+        """Record an error
+
+        Args:
+            token (Token): the token at which the error was detected
+            message (str): a message explaining the error
+
+        Returns:
+            ParsingError: the parsing error to raise
+        """
+        self.errors.append(TokenError(token=token, message=message))
+        return ParsingError()
+
+    @abstractmethod
+    def parse(self) -> T:
+        """Parse the tokens
+
+        Returns:
+            T: the parsed element(s)
+        """
+        pass
+
+    def is_at_end(self) -> bool:
+        """Whether the parser is at the end of the token list
+
+        Returns:
+            bool: True if the current index is at the end of the token list
+        """
+        return self.peek().type == TokenType.EOF
+
+    def peek(self) -> Token:
+        """Get the current token without advancing
+
+        Returns:
+            Token: the current token
+        """
+        return self.tokens[self.current]
+
+    def previous(self) -> Token:
+        """Get the previous token
+
+        This function is unsafe and will raise an IndexError if called when
+        the parser is at the begin of the token list
+
+        Returns:
+            Token: the previous token
+        """
+        return self.tokens[self.current - 1]
+
+    def check(self, token_type: TokenType) -> bool:
+        """Check whether the current token is of the given type
+
+        This function always returns False if the parser is at the EOF token
+
+        Args:
+            token_type (TokenType): the type of token to check
+
+        Returns:
+            bool: True if the current token is of the given type and not EOF
+        """
+        if self.is_at_end():
+            return False
+        return self.peek().type == token_type
+
+    def advance(self) -> Token:
+        """Consume and return the current token, if not at the EOF
+
+        Returns:
+            Token: the current token, before advancing
+        """
+        if not self.is_at_end():
+            self.current += 1
+        return self.previous()
+
+    def match(self, *types: TokenType) -> bool:
+        """Consume the next token if it matches one of the given types
+
+        Returns:
+            bool: whether a token was matched and consumed
+        """
+        for token_type in types:
+            if self.check(token_type):
+                self.advance()
+                return True
+        return False
+
+    def consume(self, token_type: TokenType, error_msg: str) -> Token:
+        """Consume the current token if it matches the given type or raise an error
+
+        If the current token doesn't match the given type, an error is raised
+        with the provided message
+
+        Args:
+            token_type (TokenType): the expected token type
+            error_msg (str): the error message if the token doesn't match
+
+        Raises:
+            SyntaxError: if the current token doesn't match the given type
+
+        Returns:
+            Token: the current token which matched the given type
+        """
+        if self.check(token_type):
+            return self.advance()
+        raise self.error(self.peek(), error_msg)
@@ -0,0 +1,2 @@
+class ParsingError(RuntimeError):
+    pass