feat(parser): add base parser class
the parser was adapted from another project (see docstring on the Parser class)
This commit is contained in:
163
parser/base.py
Normal file
163
parser/base.py
Normal file
@@ -0,0 +1,163 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Generic, TypeVar
|
||||
|
||||
from lexer.token import Token, TokenType
|
||||
from parser.errors import ParsingError
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TokenError:
|
||||
"""A parsing error linked to a particular token"""
|
||||
|
||||
token: Token
|
||||
message: str
|
||||
|
||||
def get_report(self) -> str:
|
||||
"""Get a detailed error message
|
||||
|
||||
Returns:
|
||||
str: the complete error message
|
||||
"""
|
||||
where: str = f"'{self.token.lexeme}'"
|
||||
if self.token.type == TokenType.EOF:
|
||||
where = "end"
|
||||
return f"({self.token.position}) Error at {where}: {self.message}"
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class Parser(ABC, Generic[T]):
|
||||
"""An abstract parser which provides methods to easily extend it into a concrete one
|
||||
|
||||
This implementation is based on the [_Crafting Interpreters_][1] book by Robert Nystrom,
|
||||
more specifically on my [previous Python implementation](https://git.kb28.ch/HEL/pebble)
|
||||
|
||||
[1]: https://craftinginterpreters.com/
|
||||
"""
|
||||
|
||||
IGNORE: set[TokenType] = {
|
||||
TokenType.WHITESPACE,
|
||||
TokenType.COMMENT,
|
||||
TokenType.NEWLINE,
|
||||
}
|
||||
|
||||
def __init__(self, tokens: list[Token]) -> None:
|
||||
"""Create a new parser to parse the given tokens
|
||||
|
||||
Args:
|
||||
tokens (list[Token]): the tokens to parse
|
||||
"""
|
||||
self.tokens: list[Token] = list(
|
||||
filter(lambda t: t.type not in self.IGNORE, tokens)
|
||||
)
|
||||
self.current: int = 0
|
||||
self.length: int = len(self.tokens)
|
||||
self.errors: list[TokenError]
|
||||
|
||||
def error(self, token: Token, message: str):
|
||||
"""Record an error
|
||||
|
||||
Args:
|
||||
token (Token): the token at which the error was detected
|
||||
message (str): a message explaining the error
|
||||
|
||||
Returns:
|
||||
ParsingError: the parsing error to raise
|
||||
"""
|
||||
self.errors.append(TokenError(token=token, message=message))
|
||||
return ParsingError()
|
||||
|
||||
@abstractmethod
|
||||
def parse(self) -> T:
|
||||
"""Parse the tokens
|
||||
|
||||
Returns:
|
||||
T: the parsed element(s)
|
||||
"""
|
||||
pass
|
||||
|
||||
def is_at_end(self) -> bool:
|
||||
"""Whether the parser is at the end of the token list
|
||||
|
||||
Returns:
|
||||
bool: True if the current index is at the end of the token list
|
||||
"""
|
||||
return self.peek().type == TokenType.EOF
|
||||
|
||||
def peek(self) -> Token:
|
||||
"""Get the current token without advancing
|
||||
|
||||
Returns:
|
||||
Token: the current token
|
||||
"""
|
||||
return self.tokens[self.current]
|
||||
|
||||
def previous(self) -> Token:
|
||||
"""Get the previous token
|
||||
|
||||
This function is unsafe and will raise an IndexError if called when
|
||||
the parser is at the begin of the token list
|
||||
|
||||
Returns:
|
||||
Token: the previous token
|
||||
"""
|
||||
return self.tokens[self.current - 1]
|
||||
|
||||
def check(self, token_type: TokenType) -> bool:
|
||||
"""Check whether the current token is of the given type
|
||||
|
||||
This function always returns False if the parser is at the EOF token
|
||||
|
||||
Args:
|
||||
token_type (TokenType): the type of token to check
|
||||
|
||||
Returns:
|
||||
bool: True if the current token is of the given type and not EOF
|
||||
"""
|
||||
if self.is_at_end():
|
||||
return False
|
||||
return self.peek().type == token_type
|
||||
|
||||
def advance(self) -> Token:
|
||||
"""Consume and return the current token, if not at the EOF
|
||||
|
||||
Returns:
|
||||
Token: the current token, before advancing
|
||||
"""
|
||||
if not self.is_at_end():
|
||||
self.current += 1
|
||||
return self.previous()
|
||||
|
||||
def match(self, *types: TokenType) -> bool:
|
||||
"""Consume the next token if it matches one of the given types
|
||||
|
||||
Returns:
|
||||
bool: whether a token was matched and consumed
|
||||
"""
|
||||
for token_type in types:
|
||||
if self.check(token_type):
|
||||
self.advance()
|
||||
return True
|
||||
return False
|
||||
|
||||
def consume(self, token_type: TokenType, error_msg: str) -> Token:
|
||||
"""Consume the current token if it matches the given type or raise an error
|
||||
|
||||
If the current token doesn't match the given type, an error is raised
|
||||
with the provided message
|
||||
|
||||
Args:
|
||||
token_type (TokenType): the expected token type
|
||||
error_msg (str): the error message if the token doesn't match
|
||||
|
||||
Raises:
|
||||
SyntaxError: if the current token doesn't match the given type
|
||||
|
||||
Returns:
|
||||
Token: the current token which matched the given type
|
||||
"""
|
||||
if self.check(token_type):
|
||||
return self.advance()
|
||||
raise self.error(self.peek(), error_msg)
|
||||
2
parser/errors.py
Normal file
2
parser/errors.py
Normal file
@@ -0,0 +1,2 @@
|
||||
class ParsingError(RuntimeError):
|
||||
pass
|
||||
Reference in New Issue
Block a user