diff --git a/parser/midas.py b/parser/midas.py index a49bc5e..71d3e3a 100644 --- a/parser/midas.py +++ b/parser/midas.py @@ -1,16 +1,24 @@ from typing import Optional from core.ast.midas import ( - ConstraintExpr, - ConstraintStmt, + BinaryExpr, + ComplexTypeStmt, Expr, + ExtendStmt, + GetExpr, + GroupingExpr, LiteralExpr, + LogicalExpr, OpStmt, + PredicateStmt, PropertyStmt, + SimpleTypeExpr, + SimpleTypeStmt, Stmt, - TypeBodyExpr, + TemplateExpr, TypeExpr, - TypeStmt, + UnaryExpr, + VariableExpr, WildcardExpr, ) from lexer.token import Token, TokenType @@ -21,7 +29,12 @@ from parser.errors import ParsingError class MidasParser(Parser): """A simple parser for midas type definitions""" - SYNC_BOUNDARY: set[TokenType] = {TokenType.TYPE, TokenType.OP, TokenType.CONSTRAINT} + SYNC_BOUNDARY: set[TokenType] = { + TokenType.TYPE, + TokenType.OP, + TokenType.EXTEND, + TokenType.PREDICATE, + } def parse(self) -> list[Stmt]: statements: list[Stmt] = [] @@ -58,16 +71,16 @@ class MidasParser(Parser): try: if self.match(TokenType.TYPE): return self.type_declaration() - if self.match(TokenType.OP): - return self.op_declaration() - if self.match(TokenType.CONSTRAINT): - return self.constraint_declaration() + if self.match(TokenType.EXTEND): + return self.extend_declaration() + if self.match(TokenType.PREDICATE): + return self.predicate_declaration() raise self.error(self.peek(), "Unexpected token") except ParsingError: self.synchronize() return None - def type_declaration(self) -> TypeStmt: + def type_declaration(self) -> SimpleTypeStmt | ComplexTypeStmt: """Parse a type declaration A type declaration is written `type Name` optionally followed by a brace-wrapped body @@ -76,19 +89,28 @@ class MidasParser(Parser): TypeStmt: the parsed type declaration statement """ name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") - self.consume(TokenType.LESS, "Expected '<' after type name") - bases: list[TypeExpr] = [] - while not self.check(TokenType.GREATER) and not self.is_at_end(): - bases.append(self.type_expr()) - if not self.check(TokenType.GREATER): - self.consume(TokenType.COMMA, "Expected ',' between type bases") - self.consume(TokenType.GREATER, "Expected '>' after base type") + template: Optional[TemplateExpr] = None + if self.check(TokenType.LEFT_BRACKET): + template = self.template_expr() - body: Optional[TypeBodyExpr] = None + if self.match(TokenType.LEFT_PAREN): + base: TypeExpr = self.type_expr() + self.consume(TokenType.RIGHT_PAREN, "Unclosed base type parenthesis") + constraint: Optional[Expr] = None + if self.match(TokenType.WHERE): + constraint = self.constraint() + return SimpleTypeStmt( + name=name, template=template, base=base, constraint=constraint + ) + else: + properties: list[PropertyStmt] = self.type_properties() + return ComplexTypeStmt(name=name, template=template, properties=properties) - if self.check(TokenType.LEFT_BRACE): - body = self.type_body_expr() - return TypeStmt(name=name, bases=bases, body=body) + def template_expr(self) -> TemplateExpr: + self.consume(TokenType.LEFT_BRACKET, "Missing '[' before template expression") + type: TypeExpr = self.type_expr() + self.consume(TokenType.RIGHT_BRACKET, "Missing ']' after template expression") + return TemplateExpr(type=type) def type_expr(self) -> TypeExpr: """Parse a type expression @@ -97,33 +119,66 @@ class MidasParser(Parser): TypeExpr: the parsed type expression """ name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") - constraints: list[ConstraintExpr] = [] + template: Optional[TemplateExpr] = None + if self.check(TokenType.LEFT_BRACKET): + template = self.template_expr() + optional: bool = self.match(TokenType.QMARK) + return TypeExpr(name=name, template=template, optional=optional) - while not self.is_at_end() and self.match(TokenType.PLUS): - self.consume(TokenType.LEFT_PAREN, "Expected '(' before type constraint") - constraints.append(self.constraint_expr()) - self.consume(TokenType.RIGHT_PAREN, "Expected ')' after type constraint") + def simple_type_expr(self) -> SimpleTypeExpr: + name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") + optional: bool = self.match(TokenType.QMARK) + return SimpleTypeExpr(name=name, optional=optional) - return TypeExpr(name=name, constraints=constraints) + def constraint(self) -> Expr: + return self.and_() - def constraint_expr(self) -> ConstraintExpr: - """Parse a type constraint + def and_(self) -> Expr: + expr: Expr = self.equality() + while self.match(TokenType.AND): + operator: Token = self.previous() + right: Expr = self.equality() + expr = LogicalExpr(left=expr, operator=operator, right=right) + return expr - Returns: - ConstraintExpr: the parsed type constraint expression - """ + def equality(self) -> Expr: + expr: Expr = self.comparison() + while self.match(TokenType.BANG_EQUAL, TokenType.EQUAL_EQUAL): + operator: Token = self.previous() + right: Expr = self.comparison() + expr = BinaryExpr(left=expr, operator=operator, right=right) + return expr - left: Expr = self.constraint_value() - op: Token = self.constraint_operator() - right: Expr = self.constraint_value() - return ConstraintExpr(left=left, op=op, right=right) + def comparison(self) -> Expr: + expr: Expr = self.unary() + while self.match( + TokenType.LESS, + TokenType.LESS_EQUAL, + TokenType.GREATER, + TokenType.GREATER_EQUAL, + ): + operator: Token = self.previous() + right: Expr = self.unary() + expr = BinaryExpr(left=expr, operator=operator, right=right) + return expr - def constraint_value(self) -> Expr: - if self.match(TokenType.UNDERSCORE): - return WildcardExpr(self.previous()) - return self.literal() + def unary(self) -> Expr: + if self.match(TokenType.MINUS): + operator: Token = self.previous() + right: Expr = self.unary() + return UnaryExpr(operator=operator, right=right) + return self.reference() - def literal(self) -> LiteralExpr: + def reference(self) -> Expr: + expr: Expr = self.primary() + while self.match(TokenType.DOT): + name: Token = self.consume( + TokenType.IDENTIFIER, "Expected property name after '.'" + ) + expr = GetExpr(expr=expr, name=name) + return expr + + def primary(self) -> Expr: if self.match(TokenType.FALSE): return LiteralExpr(False) if self.match(TokenType.TRUE): @@ -134,35 +189,34 @@ class MidasParser(Parser): if self.match(TokenType.NUMBER): return LiteralExpr(self.previous().value) - raise self.error(self.peek(), "Expected literal") + if self.match(TokenType.IDENTIFIER): + return VariableExpr(self.previous()) - def constraint_operator(self) -> Token: - if self.match( - TokenType.LESS, - TokenType.LESS_EQUAL, - TokenType.GREATER, - TokenType.GREATER_EQUAL, - TokenType.EQUAL_EQUAL, - TokenType.BANG_EQUAL, - ): - return self.previous() - raise self.error(self.peek(), "Expected constraint operator") + if self.match(TokenType.UNDERSCORE): + return WildcardExpr(self.previous()) - def type_body_expr(self) -> TypeBodyExpr: + if self.match(TokenType.LEFT_PAREN): + expr: Expr = self.constraint() + self.consume(TokenType.RIGHT_PAREN, "Unclosed parenthesis") + return GroupingExpr(expr) + + raise self.error(self.peek(), "Expected expression") + + def type_properties(self) -> list[PropertyStmt]: """Parse a type definition body A type definition body is a set of whitespace-separated property statements enclosed in curly braces Returns: - TypeBodyExpr: the parsed type body expression + TypeBodyStmt: the parsed type body expression """ self.consume(TokenType.LEFT_BRACE, "Expected '{' to start type body") properties: list[PropertyStmt] = [] while not self.check(TokenType.RIGHT_BRACE) and not self.is_at_end(): properties.append(self.property_stmt()) self.consume(TokenType.RIGHT_BRACE, "Unclosed type body") - return TypeBodyExpr(properties=properties) + return properties def property_stmt(self) -> PropertyStmt: """Parse a property statement @@ -175,7 +229,19 @@ class MidasParser(Parser): name: Token = self.consume(TokenType.IDENTIFIER, "Expected property name") self.consume(TokenType.COLON, "Expected ':' after property name") type: TypeExpr = self.type_expr() - return PropertyStmt(name=name, type=type) + constraint: Optional[Expr] = None + if self.match(TokenType.WHERE): + constraint = self.constraint() + return PropertyStmt(name=name, type=type, constraint=constraint) + + def extend_declaration(self) -> ExtendStmt: + type: TypeExpr = self.type_expr() + self.consume(TokenType.LEFT_BRACE, "Expected '{' to start extend body") + operations: list[OpStmt] = [] + while not self.is_at_end() and not self.check(TokenType.RIGHT_BRACE): + operations.append(self.op_declaration()) + self.consume(TokenType.RIGHT_BRACE, "Unclosed extend body") + return ExtendStmt(type=type, operations=operations) def op_declaration(self) -> OpStmt: """Parse an operation definition @@ -185,25 +251,19 @@ class MidasParser(Parser): Returns: OpStmt: the parsed operation statement """ - self.consume(TokenType.LESS, "Expected '<' before first type") - left: TypeExpr = self.type_expr() - self.consume(TokenType.GREATER, "Expected '>' after first type") + self.consume(TokenType.OP, "Expected 'op' keyword") - op: Token = self.advance() + name: Token = self.consume(TokenType.IDENTIFIER, "Expected operation name") + self.consume(TokenType.LEFT_PAREN, "Expected '(' before operand type") + operand: TypeExpr = self.type_expr() + self.consume(TokenType.RIGHT_PAREN, "Expected ')' after operand type") - self.consume(TokenType.LESS, "Expected '<' before second type") - right: TypeExpr = self.type_expr() - self.consume(TokenType.GREATER, "Expected '>' after second type") - - self.consume(TokenType.EQUAL, "Expected '=' after second type") - - self.consume(TokenType.LESS, "Expected '<' before result type") + self.consume(TokenType.ARROW, "Expected '->' before result type") result: TypeExpr = self.type_expr() - self.consume(TokenType.GREATER, "Expected '>' after result type") - return OpStmt(left=left, op=op, right=right, result=result) + return OpStmt(name=name, operand=operand, result=result) - def constraint_declaration(self) -> ConstraintStmt: + def predicate_declaration(self) -> PredicateStmt: """Parse a type constraint declaration A constraint is written `constraint Name = constraint_expression` @@ -211,7 +271,12 @@ class MidasParser(Parser): Returns: ConstraintStmt: the parsed constraint declaration statement """ - name: Token = self.consume(TokenType.IDENTIFIER, "Expected constraint name") - self.consume(TokenType.EQUAL, "Expected '=' after constraint name") - constraint: ConstraintExpr = self.constraint_expr() - return ConstraintStmt(name=name, constraint=constraint) + name: Token = self.consume(TokenType.IDENTIFIER, "Expected predicate name") + self.consume(TokenType.LEFT_PAREN, "Expected '(' before predicate subject") + subject: Token = self.consume(TokenType.IDENTIFIER, "Expected subject name") + self.consume(TokenType.COLON, "Expected ':' after subject name") + type: TypeExpr = self.type_expr() + self.consume(TokenType.RIGHT_PAREN, "Expected ')' after predicate subject") + self.consume(TokenType.EQUAL, "Expected '=' after predicate subject") + condition: Expr = self.constraint() + return PredicateStmt(name=name, subject=subject, type=type, condition=condition)