feat(parser)!: adapt parser for revised syntax

This commit is contained in:
2026-05-21 13:57:38 +02:00
parent 8f9ec8d73b
commit 5cb4d587e3

View File

@@ -1,16 +1,24 @@
from typing import Optional from typing import Optional
from core.ast.midas import ( from core.ast.midas import (
ConstraintExpr, BinaryExpr,
ConstraintStmt, ComplexTypeStmt,
Expr, Expr,
ExtendStmt,
GetExpr,
GroupingExpr,
LiteralExpr, LiteralExpr,
LogicalExpr,
OpStmt, OpStmt,
PredicateStmt,
PropertyStmt, PropertyStmt,
SimpleTypeExpr,
SimpleTypeStmt,
Stmt, Stmt,
TypeBodyExpr, TemplateExpr,
TypeExpr, TypeExpr,
TypeStmt, UnaryExpr,
VariableExpr,
WildcardExpr, WildcardExpr,
) )
from lexer.token import Token, TokenType from lexer.token import Token, TokenType
@@ -21,7 +29,12 @@ from parser.errors import ParsingError
class MidasParser(Parser): class MidasParser(Parser):
"""A simple parser for midas type definitions""" """A simple parser for midas type definitions"""
SYNC_BOUNDARY: set[TokenType] = {TokenType.TYPE, TokenType.OP, TokenType.CONSTRAINT} SYNC_BOUNDARY: set[TokenType] = {
TokenType.TYPE,
TokenType.OP,
TokenType.EXTEND,
TokenType.PREDICATE,
}
def parse(self) -> list[Stmt]: def parse(self) -> list[Stmt]:
statements: list[Stmt] = [] statements: list[Stmt] = []
@@ -58,16 +71,16 @@ class MidasParser(Parser):
try: try:
if self.match(TokenType.TYPE): if self.match(TokenType.TYPE):
return self.type_declaration() return self.type_declaration()
if self.match(TokenType.OP): if self.match(TokenType.EXTEND):
return self.op_declaration() return self.extend_declaration()
if self.match(TokenType.CONSTRAINT): if self.match(TokenType.PREDICATE):
return self.constraint_declaration() return self.predicate_declaration()
raise self.error(self.peek(), "Unexpected token") raise self.error(self.peek(), "Unexpected token")
except ParsingError: except ParsingError:
self.synchronize() self.synchronize()
return None return None
def type_declaration(self) -> TypeStmt: def type_declaration(self) -> SimpleTypeStmt | ComplexTypeStmt:
"""Parse a type declaration """Parse a type declaration
A type declaration is written `type Name<TypeExpr, ...>` optionally followed by a brace-wrapped body A type declaration is written `type Name<TypeExpr, ...>` optionally followed by a brace-wrapped body
@@ -76,19 +89,28 @@ class MidasParser(Parser):
TypeStmt: the parsed type declaration statement TypeStmt: the parsed type declaration statement
""" """
name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
self.consume(TokenType.LESS, "Expected '<' after type name") template: Optional[TemplateExpr] = None
bases: list[TypeExpr] = [] if self.check(TokenType.LEFT_BRACKET):
while not self.check(TokenType.GREATER) and not self.is_at_end(): template = self.template_expr()
bases.append(self.type_expr())
if not self.check(TokenType.GREATER):
self.consume(TokenType.COMMA, "Expected ',' between type bases")
self.consume(TokenType.GREATER, "Expected '>' after base type")
body: Optional[TypeBodyExpr] = None if self.match(TokenType.LEFT_PAREN):
base: TypeExpr = self.type_expr()
self.consume(TokenType.RIGHT_PAREN, "Unclosed base type parenthesis")
constraint: Optional[Expr] = None
if self.match(TokenType.WHERE):
constraint = self.constraint()
return SimpleTypeStmt(
name=name, template=template, base=base, constraint=constraint
)
else:
properties: list[PropertyStmt] = self.type_properties()
return ComplexTypeStmt(name=name, template=template, properties=properties)
if self.check(TokenType.LEFT_BRACE): def template_expr(self) -> TemplateExpr:
body = self.type_body_expr() self.consume(TokenType.LEFT_BRACKET, "Missing '[' before template expression")
return TypeStmt(name=name, bases=bases, body=body) type: TypeExpr = self.type_expr()
self.consume(TokenType.RIGHT_BRACKET, "Missing ']' after template expression")
return TemplateExpr(type=type)
def type_expr(self) -> TypeExpr: def type_expr(self) -> TypeExpr:
"""Parse a type expression """Parse a type expression
@@ -97,33 +119,66 @@ class MidasParser(Parser):
TypeExpr: the parsed type expression TypeExpr: the parsed type expression
""" """
name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
constraints: list[ConstraintExpr] = [] template: Optional[TemplateExpr] = None
if self.check(TokenType.LEFT_BRACKET):
template = self.template_expr()
optional: bool = self.match(TokenType.QMARK)
return TypeExpr(name=name, template=template, optional=optional)
while not self.is_at_end() and self.match(TokenType.PLUS): def simple_type_expr(self) -> SimpleTypeExpr:
self.consume(TokenType.LEFT_PAREN, "Expected '(' before type constraint") name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
constraints.append(self.constraint_expr()) optional: bool = self.match(TokenType.QMARK)
self.consume(TokenType.RIGHT_PAREN, "Expected ')' after type constraint") return SimpleTypeExpr(name=name, optional=optional)
return TypeExpr(name=name, constraints=constraints) def constraint(self) -> Expr:
return self.and_()
def constraint_expr(self) -> ConstraintExpr: def and_(self) -> Expr:
"""Parse a type constraint expr: Expr = self.equality()
while self.match(TokenType.AND):
operator: Token = self.previous()
right: Expr = self.equality()
expr = LogicalExpr(left=expr, operator=operator, right=right)
return expr
Returns: def equality(self) -> Expr:
ConstraintExpr: the parsed type constraint expression expr: Expr = self.comparison()
""" while self.match(TokenType.BANG_EQUAL, TokenType.EQUAL_EQUAL):
operator: Token = self.previous()
right: Expr = self.comparison()
expr = BinaryExpr(left=expr, operator=operator, right=right)
return expr
left: Expr = self.constraint_value() def comparison(self) -> Expr:
op: Token = self.constraint_operator() expr: Expr = self.unary()
right: Expr = self.constraint_value() while self.match(
return ConstraintExpr(left=left, op=op, right=right) TokenType.LESS,
TokenType.LESS_EQUAL,
TokenType.GREATER,
TokenType.GREATER_EQUAL,
):
operator: Token = self.previous()
right: Expr = self.unary()
expr = BinaryExpr(left=expr, operator=operator, right=right)
return expr
def constraint_value(self) -> Expr: def unary(self) -> Expr:
if self.match(TokenType.UNDERSCORE): if self.match(TokenType.MINUS):
return WildcardExpr(self.previous()) operator: Token = self.previous()
return self.literal() right: Expr = self.unary()
return UnaryExpr(operator=operator, right=right)
return self.reference()
def literal(self) -> LiteralExpr: def reference(self) -> Expr:
expr: Expr = self.primary()
while self.match(TokenType.DOT):
name: Token = self.consume(
TokenType.IDENTIFIER, "Expected property name after '.'"
)
expr = GetExpr(expr=expr, name=name)
return expr
def primary(self) -> Expr:
if self.match(TokenType.FALSE): if self.match(TokenType.FALSE):
return LiteralExpr(False) return LiteralExpr(False)
if self.match(TokenType.TRUE): if self.match(TokenType.TRUE):
@@ -134,35 +189,34 @@ class MidasParser(Parser):
if self.match(TokenType.NUMBER): if self.match(TokenType.NUMBER):
return LiteralExpr(self.previous().value) return LiteralExpr(self.previous().value)
raise self.error(self.peek(), "Expected literal") if self.match(TokenType.IDENTIFIER):
return VariableExpr(self.previous())
def constraint_operator(self) -> Token: if self.match(TokenType.UNDERSCORE):
if self.match( return WildcardExpr(self.previous())
TokenType.LESS,
TokenType.LESS_EQUAL,
TokenType.GREATER,
TokenType.GREATER_EQUAL,
TokenType.EQUAL_EQUAL,
TokenType.BANG_EQUAL,
):
return self.previous()
raise self.error(self.peek(), "Expected constraint operator")
def type_body_expr(self) -> TypeBodyExpr: if self.match(TokenType.LEFT_PAREN):
expr: Expr = self.constraint()
self.consume(TokenType.RIGHT_PAREN, "Unclosed parenthesis")
return GroupingExpr(expr)
raise self.error(self.peek(), "Expected expression")
def type_properties(self) -> list[PropertyStmt]:
"""Parse a type definition body """Parse a type definition body
A type definition body is a set of whitespace-separated A type definition body is a set of whitespace-separated
property statements enclosed in curly braces property statements enclosed in curly braces
Returns: Returns:
TypeBodyExpr: the parsed type body expression TypeBodyStmt: the parsed type body expression
""" """
self.consume(TokenType.LEFT_BRACE, "Expected '{' to start type body") self.consume(TokenType.LEFT_BRACE, "Expected '{' to start type body")
properties: list[PropertyStmt] = [] properties: list[PropertyStmt] = []
while not self.check(TokenType.RIGHT_BRACE) and not self.is_at_end(): while not self.check(TokenType.RIGHT_BRACE) and not self.is_at_end():
properties.append(self.property_stmt()) properties.append(self.property_stmt())
self.consume(TokenType.RIGHT_BRACE, "Unclosed type body") self.consume(TokenType.RIGHT_BRACE, "Unclosed type body")
return TypeBodyExpr(properties=properties) return properties
def property_stmt(self) -> PropertyStmt: def property_stmt(self) -> PropertyStmt:
"""Parse a property statement """Parse a property statement
@@ -175,7 +229,19 @@ class MidasParser(Parser):
name: Token = self.consume(TokenType.IDENTIFIER, "Expected property name") name: Token = self.consume(TokenType.IDENTIFIER, "Expected property name")
self.consume(TokenType.COLON, "Expected ':' after property name") self.consume(TokenType.COLON, "Expected ':' after property name")
type: TypeExpr = self.type_expr() type: TypeExpr = self.type_expr()
return PropertyStmt(name=name, type=type) constraint: Optional[Expr] = None
if self.match(TokenType.WHERE):
constraint = self.constraint()
return PropertyStmt(name=name, type=type, constraint=constraint)
def extend_declaration(self) -> ExtendStmt:
type: TypeExpr = self.type_expr()
self.consume(TokenType.LEFT_BRACE, "Expected '{' to start extend body")
operations: list[OpStmt] = []
while not self.is_at_end() and not self.check(TokenType.RIGHT_BRACE):
operations.append(self.op_declaration())
self.consume(TokenType.RIGHT_BRACE, "Unclosed extend body")
return ExtendStmt(type=type, operations=operations)
def op_declaration(self) -> OpStmt: def op_declaration(self) -> OpStmt:
"""Parse an operation definition """Parse an operation definition
@@ -185,25 +251,19 @@ class MidasParser(Parser):
Returns: Returns:
OpStmt: the parsed operation statement OpStmt: the parsed operation statement
""" """
self.consume(TokenType.LESS, "Expected '<' before first type") self.consume(TokenType.OP, "Expected 'op' keyword")
left: TypeExpr = self.type_expr()
self.consume(TokenType.GREATER, "Expected '>' after first type")
op: Token = self.advance() name: Token = self.consume(TokenType.IDENTIFIER, "Expected operation name")
self.consume(TokenType.LEFT_PAREN, "Expected '(' before operand type")
operand: TypeExpr = self.type_expr()
self.consume(TokenType.RIGHT_PAREN, "Expected ')' after operand type")
self.consume(TokenType.LESS, "Expected '<' before second type") self.consume(TokenType.ARROW, "Expected '->' before result type")
right: TypeExpr = self.type_expr()
self.consume(TokenType.GREATER, "Expected '>' after second type")
self.consume(TokenType.EQUAL, "Expected '=' after second type")
self.consume(TokenType.LESS, "Expected '<' before result type")
result: TypeExpr = self.type_expr() result: TypeExpr = self.type_expr()
self.consume(TokenType.GREATER, "Expected '>' after result type")
return OpStmt(left=left, op=op, right=right, result=result) return OpStmt(name=name, operand=operand, result=result)
def constraint_declaration(self) -> ConstraintStmt: def predicate_declaration(self) -> PredicateStmt:
"""Parse a type constraint declaration """Parse a type constraint declaration
A constraint is written `constraint Name = constraint_expression` A constraint is written `constraint Name = constraint_expression`
@@ -211,7 +271,12 @@ class MidasParser(Parser):
Returns: Returns:
ConstraintStmt: the parsed constraint declaration statement ConstraintStmt: the parsed constraint declaration statement
""" """
name: Token = self.consume(TokenType.IDENTIFIER, "Expected constraint name") name: Token = self.consume(TokenType.IDENTIFIER, "Expected predicate name")
self.consume(TokenType.EQUAL, "Expected '=' after constraint name") self.consume(TokenType.LEFT_PAREN, "Expected '(' before predicate subject")
constraint: ConstraintExpr = self.constraint_expr() subject: Token = self.consume(TokenType.IDENTIFIER, "Expected subject name")
return ConstraintStmt(name=name, constraint=constraint) self.consume(TokenType.COLON, "Expected ':' after subject name")
type: TypeExpr = self.type_expr()
self.consume(TokenType.RIGHT_PAREN, "Expected ')' after predicate subject")
self.consume(TokenType.EQUAL, "Expected '=' after predicate subject")
condition: Expr = self.constraint()
return PredicateStmt(name=name, subject=subject, type=type, condition=condition)