diff --git a/core/ast/annotations.py b/core/ast/annotations.py deleted file mode 100644 index a885e29..0000000 --- a/core/ast/annotations.py +++ /dev/null @@ -1,107 +0,0 @@ -from __future__ import annotations - -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Any, Generic, Optional, TypeVar - -from lexer.token import Token - -T = TypeVar("T") - - -@dataclass(frozen=True) -class Stmt(ABC): - @abstractmethod - def accept(self, visitor: Visitor[T]) -> T: ... - - class Visitor(ABC, Generic[T]): - @abstractmethod - def visit_annotation_stmt(self, stmt: AnnotationStmt) -> T: ... - - -@dataclass(frozen=True) -class AnnotationStmt(Stmt): - name: Token - schema: Optional[SchemaExpr] - - def accept(self, visitor: Stmt.Visitor[T]) -> T: - return visitor.visit_annotation_stmt(self) - - -@dataclass(frozen=True) -class Expr(ABC): - @abstractmethod - def accept(self, visitor: Visitor[T]) -> T: ... - - class Visitor(ABC, Generic[T]): - @abstractmethod - def visit_wildcard_expr(self, expr: WildcardExpr) -> T: ... - - @abstractmethod - def visit_literal_expr(self, expr: LiteralExpr) -> T: ... - - @abstractmethod - def visit_type_expr(self, expr: TypeExpr) -> T: ... - - @abstractmethod - def visit_constraint_expr(self, expr: ConstraintExpr) -> T: ... - - @abstractmethod - def visit_schema_expr(self, expr: SchemaExpr) -> T: ... - - @abstractmethod - def visit_schema_element_expr(self, expr: SchemaElementExpr) -> T: ... - - -@dataclass(frozen=True) -class WildcardExpr(Expr): - token: Token - - def accept(self, visitor: Expr.Visitor[T]) -> T: - return visitor.visit_wildcard_expr(self) - - -@dataclass(frozen=True) -class LiteralExpr(Expr): - value: Any - - def accept(self, visitor: Expr.Visitor[T]) -> T: - return visitor.visit_literal_expr(self) - - -@dataclass(frozen=True) -class TypeExpr(Expr): - name: Token - constraints: list[ConstraintExpr] - - def accept(self, visitor: Expr.Visitor[T]) -> T: - return visitor.visit_type_expr(self) - - -@dataclass(frozen=True) -class ConstraintExpr(Expr): - left: Expr - op: Token - right: Expr - - def accept(self, visitor: Expr.Visitor[T]) -> T: - return visitor.visit_constraint_expr(self) - - -@dataclass(frozen=True) -class SchemaExpr(Expr): - left: Token - elements: list[Expr] - right: Token - - def accept(self, visitor: Expr.Visitor[T]) -> T: - return visitor.visit_schema_expr(self) - - -@dataclass(frozen=True) -class SchemaElementExpr(Expr): - name: Optional[Token] - type: Optional[Expr] - - def accept(self, visitor: Expr.Visitor[T]) -> T: - return visitor.visit_schema_element_expr(self) diff --git a/core/ast/json_serializer.py b/core/ast/json_serializer.py index 0c3d773..0064726 100644 --- a/core/ast/json_serializer.py +++ b/core/ast/json_serializer.py @@ -1,14 +1,24 @@ +from typing import Optional, Sequence + from core.ast.midas import ( - ConstraintExpr, - ConstraintStmt, + BinaryExpr, + ComplexTypeStmt, Expr, + ExtendStmt, + GetExpr, + GroupingExpr, LiteralExpr, + LogicalExpr, OpStmt, + PredicateStmt, PropertyStmt, + SimpleTypeExpr, + SimpleTypeStmt, Stmt, - TypeBodyExpr, + TemplateExpr, TypeExpr, - TypeStmt, + UnaryExpr, + VariableExpr, WildcardExpr, ) @@ -19,42 +29,29 @@ class AstJsonSerializer(Stmt.Visitor[dict], Expr.Visitor[dict]): def serialize(self, stmts: list[Stmt]) -> list[dict]: return [stmt.accept(self) for stmt in stmts] - def visit_type_stmt(self, stmt: TypeStmt) -> dict: + def _serialize_optional(self, element: Optional[Stmt | Expr]) -> Optional[dict]: + if element is None: + return None + return element.accept(self) + + def _serialize_list(self, elements: Sequence[Stmt | Expr]) -> list[dict]: + return [element.accept(self) for element in elements] + + def visit_simple_type_stmt(self, stmt: SimpleTypeStmt) -> dict: return { - "_type": "TypeStmt", + "_type": "SimpleTypeStmt", + "template": self._serialize_optional(stmt.template), "name": stmt.name.lexeme, - "bases": [base.accept(self) for base in stmt.bases], - "body": stmt.body.accept(self) if stmt.body is not None else None, + "base": stmt.base.accept(self), + "constraint": self._serialize_optional(stmt.constraint), } - def visit_type_expr(self, expr: TypeExpr) -> dict: + def visit_complex_type_stmt(self, stmt: ComplexTypeStmt) -> dict: return { - "_type": "TypeExpr", - "name": expr.name.lexeme, - "constraints": [constraint.accept(self) for constraint in expr.constraints], - } - - def visit_constraint_expr(self, expr: ConstraintExpr) -> dict: - return { - "_type": "ConstraintExpr", - "left": expr.left.accept(self), - "op": expr.op.lexeme, - "right": expr.right.accept(self), - } - - def visit_wildcard_expr(self, expr: WildcardExpr) -> dict: - return {"_type": "WildcardExpr"} - - def visit_literal_expr(self, expr: LiteralExpr) -> dict: - return { - "_type": "LiteralExpr", - "value": expr.value, - } - - def visit_type_body_expr(self, expr: TypeBodyExpr) -> dict: - return { - "_type": "TypeBodyExpr", - "properties": [prop.accept(self) for prop in expr.properties], + "_type": "ComplexTypeStmt", + "name": stmt.name.lexeme, + "template": self._serialize_optional(stmt.template), + "properties": self._serialize_list(stmt.properties), } def visit_property_stmt(self, stmt: PropertyStmt) -> dict: @@ -62,20 +59,101 @@ class AstJsonSerializer(Stmt.Visitor[dict], Expr.Visitor[dict]): "_type": "PropertyStmt", "name": stmt.name.lexeme, "type": stmt.type.accept(self), + "constraint": self._serialize_optional(stmt.constraint), + } + + def visit_extend_stmt(self, stmt: ExtendStmt) -> dict: + return { + "_type": "ExtendStmt", + "type": stmt.type.accept(self), + "operations": self._serialize_list(stmt.operations), } def visit_op_stmt(self, stmt: OpStmt) -> dict: return { "_type": "OpStmt", - "left": stmt.left.accept(self), - "op": stmt.op.lexeme, - "right": stmt.right.accept(self), + "name": stmt.name.lexeme, + "operand": stmt.operand.accept(self), "result": stmt.result.accept(self), } - def visit_constraint_stmt(self, stmt: ConstraintStmt) -> dict: + def visit_predicate_stmt(self, stmt: PredicateStmt) -> dict: return { - "_type": "ConstraintStmt", + "_type": "PredicateStmt", "name": stmt.name.lexeme, - "constraint": stmt.constraint.accept(self), + "subject": stmt.subject.lexeme, + "type": stmt.type.accept(self), + "condition": stmt.condition.accept(self), + } + + def visit_simple_type_expr(self, expr: SimpleTypeExpr) -> dict: + return { + "_type": "SimpleTypeExpr", + "name": expr.name.lexeme, + "optional": expr.optional, + } + + def visit_logical_expr(self, expr: LogicalExpr) -> dict: + return { + "_type": "LogicalExpr", + "left": expr.left.accept(self), + "operator": expr.operator.lexeme, + "right": expr.right.accept(self), + } + + def visit_binary_expr(self, expr: BinaryExpr) -> dict: + return { + "_type": "BinaryExpr", + "left": expr.left.accept(self), + "operator": expr.operator.lexeme, + "right": expr.right.accept(self), + } + + def visit_unary_expr(self, expr: UnaryExpr) -> dict: + return { + "_type": "UnaryExpr", + "operator": expr.operator.lexeme, + "right": expr.right.accept(self), + } + + def visit_get_expr(self, expr: GetExpr) -> dict: + return { + "_type": "GetExpr", + "expr": expr.expr.accept(self), + "name": expr.name.lexeme, + } + + def visit_variable_expr(self, expr: VariableExpr) -> dict: + return { + "_type": "VariableExpr", + "name": expr.name.lexeme, + } + + def visit_grouping_expr(self, expr: GroupingExpr) -> dict: + return { + "_type": "GroupingExpr", + "expr": expr.expr.accept(self), + } + + def visit_literal_expr(self, expr: LiteralExpr) -> dict: + return { + "_type": "LiteralExpr", + "value": expr.value, + } + + def visit_wildcard_expr(self, expr: WildcardExpr) -> dict: + return {"_type": "WildcardExpr"} + + def visit_template_expr(self, expr: TemplateExpr) -> dict: + return { + "_type": "TemplateExpr", + "type": expr.type.accept(self), + } + + def visit_type_expr(self, expr: TypeExpr) -> dict: + return { + "_type": "TypeExpr", + "name": expr.name.lexeme, + "template": self._serialize_optional(expr.template), + "optional": expr.optional, } diff --git a/core/ast/midas.py b/core/ast/midas.py index 4f2b03f..f4280fb 100644 --- a/core/ast/midas.py +++ b/core/ast/midas.py @@ -1,3 +1,8 @@ +""" +This file was generated by a script. Any manual changes might be overwritten. +Please modify gen/ast.py instead and run gen/gen.py +""" + from __future__ import annotations from abc import ABC, abstractmethod @@ -8,8 +13,9 @@ from lexer.token import Token T = TypeVar("T") - -# Statements +############## +# Statements # +############## @dataclass(frozen=True) @@ -19,42 +25,68 @@ class Stmt(ABC): class Visitor(ABC, Generic[T]): @abstractmethod - def visit_type_stmt(self, stmt: TypeStmt) -> T: ... + def visit_simple_type_stmt(self, stmt: SimpleTypeStmt) -> T: ... + + @abstractmethod + def visit_complex_type_stmt(self, stmt: ComplexTypeStmt) -> T: ... @abstractmethod def visit_property_stmt(self, stmt: PropertyStmt) -> T: ... + @abstractmethod + def visit_extend_stmt(self, stmt: ExtendStmt) -> T: ... + @abstractmethod def visit_op_stmt(self, stmt: OpStmt) -> T: ... @abstractmethod - def visit_constraint_stmt(self, stmt: ConstraintStmt) -> T: ... + def visit_predicate_stmt(self, stmt: PredicateStmt) -> T: ... @dataclass(frozen=True) -class TypeStmt(Stmt): +class SimpleTypeStmt(Stmt): name: Token - bases: list[TypeExpr] - body: Optional[TypeBodyExpr] + template: Optional[TemplateExpr] + base: TypeExpr + constraint: Optional[Expr] def accept(self, visitor: Stmt.Visitor[T]) -> T: - return visitor.visit_type_stmt(self) + return visitor.visit_simple_type_stmt(self) + + +@dataclass(frozen=True) +class ComplexTypeStmt(Stmt): + name: Token + template: Optional[TemplateExpr] + properties: list[PropertyStmt] + + def accept(self, visitor: Stmt.Visitor[T]) -> T: + return visitor.visit_complex_type_stmt(self) @dataclass(frozen=True) class PropertyStmt(Stmt): name: Token type: TypeExpr + constraint: Optional[Expr] def accept(self, visitor: Stmt.Visitor[T]) -> T: return visitor.visit_property_stmt(self) +@dataclass(frozen=True) +class ExtendStmt(Stmt): + type: TypeExpr + operations: list[OpStmt] + + def accept(self, visitor: Stmt.Visitor[T]) -> T: + return visitor.visit_extend_stmt(self) + + @dataclass(frozen=True) class OpStmt(Stmt): - left: TypeExpr - op: Token - right: TypeExpr + name: Token + operand: TypeExpr result: TypeExpr def accept(self, visitor: Stmt.Visitor[T]) -> T: @@ -62,15 +94,19 @@ class OpStmt(Stmt): @dataclass(frozen=True) -class ConstraintStmt(Stmt): +class PredicateStmt(Stmt): name: Token - constraint: ConstraintExpr + subject: Token + type: TypeExpr + condition: Expr def accept(self, visitor: Stmt.Visitor[T]) -> T: - return visitor.visit_constraint_stmt(self) + return visitor.visit_predicate_stmt(self) -# Expressions +############### +# Expressions # +############### @dataclass(frozen=True) @@ -80,27 +116,100 @@ class Expr(ABC): class Visitor(ABC, Generic[T]): @abstractmethod - def visit_wildcard_expr(self, expr: WildcardExpr) -> T: ... + def visit_simple_type_expr(self, expr: SimpleTypeExpr) -> T: ... + + @abstractmethod + def visit_logical_expr(self, expr: LogicalExpr) -> T: ... + + @abstractmethod + def visit_binary_expr(self, expr: BinaryExpr) -> T: ... + + @abstractmethod + def visit_unary_expr(self, expr: UnaryExpr) -> T: ... + + @abstractmethod + def visit_get_expr(self, expr: GetExpr) -> T: ... + + @abstractmethod + def visit_variable_expr(self, expr: VariableExpr) -> T: ... + + @abstractmethod + def visit_grouping_expr(self, expr: GroupingExpr) -> T: ... @abstractmethod def visit_literal_expr(self, expr: LiteralExpr) -> T: ... + @abstractmethod + def visit_wildcard_expr(self, expr: WildcardExpr) -> T: ... + + @abstractmethod + def visit_template_expr(self, expr: TemplateExpr) -> T: ... + @abstractmethod def visit_type_expr(self, expr: TypeExpr) -> T: ... - @abstractmethod - def visit_constraint_expr(self, expr: ConstraintExpr) -> T: ... - - @abstractmethod - def visit_type_body_expr(self, expr: TypeBodyExpr) -> T: ... - @dataclass(frozen=True) -class WildcardExpr(Expr): - token: Token +class SimpleTypeExpr(Expr): + name: Token + optional: bool def accept(self, visitor: Expr.Visitor[T]) -> T: - return visitor.visit_wildcard_expr(self) + return visitor.visit_simple_type_expr(self) + + +@dataclass(frozen=True) +class LogicalExpr(Expr): + left: Expr + operator: Token + right: Expr + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_logical_expr(self) + + +@dataclass(frozen=True) +class BinaryExpr(Expr): + left: Expr + operator: Token + right: Expr + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_binary_expr(self) + + +@dataclass(frozen=True) +class UnaryExpr(Expr): + operator: Token + right: Expr + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_unary_expr(self) + + +@dataclass(frozen=True) +class GetExpr(Expr): + expr: Expr + name: Token + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_get_expr(self) + + +@dataclass(frozen=True) +class VariableExpr(Expr): + name: Token + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_variable_expr(self) + + +@dataclass(frozen=True) +class GroupingExpr(Expr): + expr: Expr + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_grouping_expr(self) @dataclass(frozen=True) @@ -111,28 +220,27 @@ class LiteralExpr(Expr): return visitor.visit_literal_expr(self) +@dataclass(frozen=True) +class WildcardExpr(Expr): + token: Token + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_wildcard_expr(self) + + +@dataclass(frozen=True) +class TemplateExpr(Expr): + type: TypeExpr + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_template_expr(self) + + @dataclass(frozen=True) class TypeExpr(Expr): name: Token - constraints: list[ConstraintExpr] + template: Optional[TemplateExpr] + optional: bool def accept(self, visitor: Expr.Visitor[T]) -> T: return visitor.visit_type_expr(self) - - -@dataclass(frozen=True) -class ConstraintExpr(Expr): - left: Expr - op: Token - right: Expr - - def accept(self, visitor: Expr.Visitor[T]) -> T: - return visitor.visit_constraint_expr(self) - - -@dataclass(frozen=True) -class TypeBodyExpr(Expr): - properties: list[PropertyStmt] - - def accept(self, visitor: Expr.Visitor[T]) -> T: - return visitor.visit_type_body_expr(self) diff --git a/core/ast/printer.py b/core/ast/printer.py index 086c581..61fede8 100644 --- a/core/ast/printer.py +++ b/core/ast/printer.py @@ -1,11 +1,10 @@ from __future__ import annotations +import io from contextlib import contextmanager from enum import Enum, auto -import io from typing import Generator, Generic, Optional, Protocol, TypeVar -import core.ast.annotations as a import core.ast.midas as m @@ -39,8 +38,8 @@ class AstPrinter(Generic[T]): return self._buf.getvalue() @contextmanager - def _child_level(self, last: bool = False) -> Generator[None, None, None]: - self._levels.append(_Level.LAST if last else _Level.ACTIVE) + def _child_level(self, single: bool = False) -> Generator[None, None, None]: + self._levels.append(_Level.LAST if single else _Level.ACTIVE) try: yield finally: @@ -80,215 +79,170 @@ class AstPrinter(Generic[T]): self._write_line(f"{label}: None") else: self._write_line(label) - with self._child_level(last=True): + with self._child_level(single=True): child.accept(self) -class AnnotationAstPrinter(AstPrinter, a.Expr.Visitor[None], a.Stmt.Visitor[None]): - def visit_annotation_stmt(self, stmt: a.AnnotationStmt) -> None: - self._write_line("AnnotationStmt") - with self._child_level(): - self._write_line(f'name: "{stmt.name.lexeme}"') - self._write_optional_child("schema", stmt.schema, last=True) - - def visit_type_expr(self, expr: a.TypeExpr): - self._write_line("TypeExpr") - with self._child_level(): - self._write_line(f'name: "{expr.name.lexeme}"') - self._write_line("constraints", last=True) - with self._child_level(): - for i, constraint in enumerate(expr.constraints): - self._idx = i - if i == len(expr.constraints) - 1: - self._mark_last() - constraint.accept(self) - - def visit_constraint_expr(self, expr: a.ConstraintExpr) -> None: - self._write_line("ConstraintExpr") - with self._child_level(): - self._write_line("left") - with self._child_level(): - self._mark_last() - expr.left.accept(self) - - self._write_line(f"operator: {expr.op.lexeme}") - - self._write_line("right", last=True) - with self._child_level(): - self._mark_last() - expr.right.accept(self) - - def visit_schema_expr(self, expr: a.SchemaExpr): - self._write_line("SchemaExpr") - with self._child_level(): - for i, elmt in enumerate(expr.elements): - self._idx = i - if i == len(expr.elements) - 1: - self._mark_last() - elmt.accept(self) - - def visit_schema_element_expr(self, expr: a.SchemaElementExpr): - self._write_line("SchemaElementExpr") - with self._child_level(): - name_text: str = "None" if expr.name is None else f'"{expr.name.lexeme}"' - self._write_line(f"name: {name_text}") - self._write_optional_child("type", expr.type, last=True) - - def visit_wildcard_expr(self, expr: a.WildcardExpr) -> None: - self._write_line("WildcardExpr") - - def visit_literal_expr(self, expr: a.LiteralExpr) -> None: - self._write_line("LiteralExpr") - with self._child_level(): - self._write_line(f"value: {expr.value}", last=True) - - -class AnnotationPrinter(a.Expr.Visitor[str], a.Stmt.Visitor[str]): - def print(self, expr: a.Expr | a.Stmt): - return expr.accept(self) - - def visit_annotation_stmt(self, stmt: a.AnnotationStmt) -> str: - schema: str = "" - if stmt.schema is not None: - schema = stmt.schema.accept(self) - return f"{stmt.name.lexeme}{schema}" - - def visit_type_expr(self, expr: a.TypeExpr) -> str: - parts: list[str] = [expr.name.lexeme] - for constraint in expr.constraints: - parts.append("(" + constraint.accept(self) + ")") - return " + ".join(parts) - - def visit_constraint_expr(self, expr: a.ConstraintExpr) -> str: - parts: list[str] = [ - expr.left.accept(self), - expr.op.lexeme, - expr.right.accept(self), - ] - return " ".join(parts) - - def visit_schema_expr(self, expr: a.SchemaExpr) -> str: - res: str = expr.left.lexeme - res += ", ".join(elmt.accept(self) for elmt in expr.elements) - res += expr.right.lexeme - return res - - def visit_schema_element_expr(self, expr: a.SchemaElementExpr) -> str: - parts: list[str] = [] - if expr.name is not None: - parts.append(expr.name.lexeme) - - if expr.type is None: - parts.append("_") - else: - parts.append(expr.type.accept(self)) - return ": ".join(parts) - - def visit_wildcard_expr(self, expr: a.WildcardExpr) -> str: - return "_" - - def visit_literal_expr(self, expr: a.LiteralExpr) -> str: - return str(expr.value) - - class MidasAstPrinter(AstPrinter, m.Expr.Visitor[None], m.Stmt.Visitor[None]): - def visit_type_stmt(self, stmt: m.TypeStmt): - self._write_line("TypeStmt") + #Statements + + def visit_simple_type_stmt(self, stmt: m.SimpleTypeStmt): + self._write_line("SimpleTypeStmt") with self._child_level(): self._write_line(f'name: "{stmt.name.lexeme}"') - self._write_line("bases") + self._write_optional_child("template", stmt.template) + self._write_line("base") + with self._child_level(single=True): + stmt.base.accept(self) + self._write_optional_child("constraint", stmt.constraint, last=True) + + def visit_complex_type_stmt(self, stmt: m.ComplexTypeStmt): + self._write_line("ComplexTypeStmt") + with self._child_level(): + self._write_line(f'name: "{stmt.name.lexeme}"') + self._write_optional_child("template", stmt.template) + self._write_line("properties", last=True) with self._child_level(): - for i, base in enumerate(stmt.bases): + for i, prop in enumerate(stmt.properties): self._idx = i - if i == len(stmt.bases) - 1: + if i == len(stmt.properties) - 1: self._mark_last() - base.accept(self) - self._write_optional_child("body", stmt.body, last=True) + prop.accept(self) def visit_property_stmt(self, stmt: m.PropertyStmt): self._write_line("PropertyStmt") with self._child_level(): self._write_line(f'name: "{stmt.name.lexeme}"') - self._write_line("type", last=True) - with self._child_level(): - self._mark_last() + self._write_line("type") + with self._child_level(single=True): stmt.type.accept(self) + self._write_optional_child("constraint", stmt.constraint, last=True) + + def visit_extend_stmt(self, stmt: m.ExtendStmt) -> None: + self._write_line("ExtendStmt") + with self._child_level(): + self._write_line("type") + with self._child_level(single=True): + stmt.type.accept(self) + self._write_line("operations", last=True) + with self._child_level(): + for i, op in enumerate(stmt.operations): + self._idx = i + if i == len(stmt.operations) - 1: + self._mark_last() + op.accept(self) def visit_op_stmt(self, stmt: m.OpStmt) -> None: self._write_line("OpStmt") with self._child_level(): - self._write_line("left") - with self._child_level(): - self._mark_last() - stmt.left.accept(self) + self._write_line(f'name: "{stmt.name.lexeme}"') - self._write_line(f'op: "{stmt.op.lexeme}"') - - self._write_line("right") - with self._child_level(): - self._mark_last() - stmt.right.accept(self) + self._write_line("operand") + with self._child_level(single=True): + stmt.operand.accept(self) self._write_line("result", last=True) - with self._child_level(): - self._mark_last() + with self._child_level(single=True): stmt.result.accept(self) - def visit_constraint_stmt(self, stmt: m.ConstraintStmt): - self._write_line("ConstraintStmt") + def visit_predicate_stmt(self, stmt: m.PredicateStmt): + self._write_line("PredicateStmt") with self._child_level(): self._write_line(f'name: "{stmt.name.lexeme}"') - self._write_line("constraint", last=True) - with self._child_level(): - self._mark_last() - stmt.constraint.accept(self) + self._write_line(f'subject: "{stmt.subject.lexeme}"') + self._write_line("type") + with self._child_level(single=True): + stmt.type.accept(self) + self._write_line("condition", last=True) + with self._child_level(single=True): + stmt.condition.accept(self) - def visit_type_expr(self, expr: m.TypeExpr): - self._write_line("TypeExpr") + # Expressions + + def visit_simple_type_expr(self, expr: m.SimpleTypeExpr): + self._write_line("SimpleTypeExpr") with self._child_level(): self._write_line(f'name: "{expr.name.lexeme}"') - self._write_line("constraints", last=True) - with self._child_level(): - for i, constraint in enumerate(expr.constraints): - self._idx = i - if i == len(expr.constraints) - 1: - self._mark_last() - constraint.accept(self) + self._write_line(f"optional: {expr.optional}", last=True) - def visit_constraint_expr(self, expr: m.ConstraintExpr): - self._write_line("ConstraintExpr") + def visit_logical_expr(self, expr: m.LogicalExpr): + self._write_line("LogicalExpr") with self._child_level(): self._write_line("left") - with self._child_level(): - self._mark_last() + with self._child_level(single=True): expr.left.accept(self) - self._write_line(f"operator: {expr.op.lexeme}") + self._write_line(f"operator: {expr.operator.lexeme}") self._write_line("right", last=True) - with self._child_level(): - self._mark_last() + with self._child_level(single=True): expr.right.accept(self) - def visit_type_body_expr(self, expr: m.TypeBodyExpr): - self._write_line("TypeBodyExpr") + def visit_binary_expr(self, expr: m.BinaryExpr): + self._write_line("BinaryExpr") with self._child_level(): - self._write_line("properties", last=True) - with self._child_level(): - for i, property in enumerate(expr.properties): - self._idx = i - if i == len(expr.properties) - 1: - self._mark_last() - property.accept(self) + self._write_line("left") + with self._child_level(single=True): + expr.left.accept(self) - def visit_wildcard_expr(self, expr: m.WildcardExpr) -> None: - self._write_line("WildcardExpr") + self._write_line(f"operator: {expr.operator.lexeme}") + + self._write_line("right", last=True) + with self._child_level(single=True): + expr.right.accept(self) + + def visit_unary_expr(self, expr: m.UnaryExpr): + self._write_line("UnaryExpr") + with self._child_level(): + self._write_line(f"operator: {expr.operator.lexeme}") + + self._write_line("right", last=True) + with self._child_level(single=True): + expr.right.accept(self) + + def visit_get_expr(self, expr: m.GetExpr): + self._write_line("GetExpr") + with self._child_level(): + self._write_line("expr") + with self._child_level(single=True): + expr.expr.accept(self) + self._write_line(f'name: "{expr.name.lexeme}"', last=True) + + def visit_variable_expr(self, expr: m.VariableExpr): + self._write_line("VariableExpr") + with self._child_level(): + self._write_line(f'name: "{expr.name.lexeme}"', last=True) + + def visit_grouping_expr(self, expr: m.GroupingExpr): + self._write_line("GroupingExpr") + with self._child_level(): + self._write_line("expr", last=True) + with self._child_level(single=True): + expr.expr.accept(self) def visit_literal_expr(self, expr: m.LiteralExpr) -> None: self._write_line("LiteralExpr") with self._child_level(): self._write_line(f"value: {expr.value}", last=True) + def visit_wildcard_expr(self, expr: m.WildcardExpr) -> None: + self._write_line("WildcardExpr") + + def visit_template_expr(self, expr: m.TemplateExpr) -> None: + self._write_line("TemplateExpr") + with self._child_level(single=True): + self._write_line("type") + with self._child_level(single=True): + expr.type.accept(self) + + def visit_type_expr(self, expr: m.TypeExpr): + self._write_line("TypeExpr") + with self._child_level(): + self._write_line(f'name: "{expr.name.lexeme}"') + self._write_optional_child("template", expr.template) + self._write_line(f"optional: {expr.optional}", last=True) + + class MidasPrinter(m.Expr.Visitor[str], m.Stmt.Visitor[str]): def __init__(self, indent: int = 4): self.indent: int = indent @@ -301,60 +255,94 @@ class MidasPrinter(m.Expr.Visitor[str], m.Stmt.Visitor[str]): self.level = 0 return expr.accept(self) - def visit_type_stmt(self, stmt: m.TypeStmt): - bases: list[str] = [ - b.accept(self) - for b in stmt.bases - ] - - res: str = self.indented(f"type {stmt.name.lexeme}<{', '.join(bases)}>") - if stmt.body is not None: - res += " {\n" - self.level += 1 - res += stmt.body.accept(self) - self.level -= 1 - res += "\n" + self.indented("}") + def visit_simple_type_stmt(self, stmt: m.SimpleTypeStmt): + template: str = stmt.template.accept(self) if stmt.template is not None else "" + res: str = f"type {stmt.name.lexeme}{template}({stmt.base.accept(self)})" + if stmt.constraint is not None: + res += " where " + stmt.constraint.accept(self) + return self.indented(res) + def visit_complex_type_stmt(self, stmt: m.ComplexTypeStmt): + template: str = stmt.template.accept(self) if stmt.template is not None else "" + res: str = self.indented(f"type {stmt.name.lexeme}{template}") + res += " {\n" + self.level += 1 + for prop in stmt.properties: + res += prop.accept(self) + res += "\n" + self.level -= 1 + res += self.indented("}") return res def visit_property_stmt(self, stmt: m.PropertyStmt): - return f"{stmt.name.lexeme}: {stmt.type.accept(self)}" + res: str = f"{stmt.name.lexeme}: {stmt.type.accept(self)}" + if stmt.constraint is not None: + res += " where " + stmt.constraint.accept(self) + return self.indented(res) + + def visit_extend_stmt(self, stmt: m.ExtendStmt): + res: str = self.indented(f"extend {stmt.type.accept(self)}") + res += " {\n" + self.level += 1 + for op in stmt.operations: + res += op.accept(self) + self.level -= 1 + res += "\n" + self.indented("}") + return res def visit_op_stmt(self, stmt: m.OpStmt): - left: str = stmt.left.accept(self) - op: str = stmt.op.lexeme - right: str = stmt.right.accept(self) + operand: str = stmt.operand.accept(self) result: str = stmt.result.accept(self) - return self.indented(f"op <{left}> {op} <{right}> = <{result}>") + return self.indented(f"op {stmt.name.lexeme}({operand}) -> {result}") - def visit_constraint_stmt(self, stmt: m.ConstraintStmt): + def visit_predicate_stmt(self, stmt: m.PredicateStmt): name: str = stmt.name.lexeme - constraint: str = stmt.constraint.accept(self) - return self.indented(f"constraint {name} = {constraint}") + subject: str = stmt.subject.lexeme + type: str = stmt.type.accept(self) + condition: str = stmt.condition.accept(self) + return self.indented(f"predicate {name}({subject}: {type}) = {condition}") - def visit_type_expr(self, expr: m.TypeExpr): - parts: list[str] = [expr.name.lexeme] - for constraint in expr.constraints: - parts.append("(" + constraint.accept(self) + ")") - return " + ".join(parts) + def visit_simple_type_expr(self, expr: m.SimpleTypeExpr): + return f"{expr.name.lexeme}{'?' if expr.optional else ''}" - def visit_constraint_expr(self, expr: m.ConstraintExpr): - parts: list[str] = [ - expr.left.accept(self), - expr.op.lexeme, - expr.right.accept(self), - ] - return " ".join(parts) + def visit_logical_expr(self, expr: m.LogicalExpr): + left: str = expr.left.accept(self) + operator: str = expr.operator.lexeme + right: str = expr.right.accept(self) + return f"{left} {operator} {right}" - def visit_type_body_expr(self, expr: m.TypeBodyExpr): - properties: list[str] = [ - self.indented(prop.accept(self)) - for prop in expr.properties - ] - return "\n".join(properties) + def visit_binary_expr(self, expr: m.BinaryExpr): + left: str = expr.left.accept(self) + operator: str = expr.operator.lexeme + right: str = expr.right.accept(self) + return f"{left} {operator} {right}" + + def visit_unary_expr(self, expr: m.UnaryExpr): + operator: str = expr.operator.lexeme + right: str = expr.right.accept(self) + return f"{operator}{right}" + + def visit_get_expr(self, expr: m.GetExpr): + expr_: str = expr.expr.accept(self) + name: str = expr.name.lexeme + return f"{expr_}.{name}" + + def visit_variable_expr(self, expr: m.VariableExpr): + return expr.name.lexeme + + def visit_grouping_expr(self, expr: m.GroupingExpr): + expr_: str = expr.expr.accept(self) + return f"({expr_})" + + def visit_literal_expr(self, expr: m.LiteralExpr): + return str(expr.value) def visit_wildcard_expr(self, expr: m.WildcardExpr): return "_" - def visit_literal_expr(self, expr: m.LiteralExpr): - return str(expr.value) \ No newline at end of file + def visit_template_expr(self, expr: m.TemplateExpr): + return f"[{expr.type.accept(self)}]" + + def visit_type_expr(self, expr: m.TypeExpr): + template: str = expr.template.accept(self) if expr.template is not None else "" + return f"{expr.name.lexeme}{template}{'?' if expr.optional else ''}" diff --git a/examples/00_syntax_prototype/03_custom_types_v2.midas b/examples/00_syntax_prototype/03_custom_types_v2.midas new file mode 100644 index 0000000..31b0f53 --- /dev/null +++ b/examples/00_syntax_prototype/03_custom_types_v2.midas @@ -0,0 +1,73 @@ +// Simple custom type derived from float +type Custom(float) + +// Simple custom types with constraints +type Latitude(float) where (-90 <= _ <= 90) +type Longitude(float) where (-180 <= _ <= 180) + +// Generic custom type (a Difference of T is derived from T, e.g. a difference of floats is a float +type Difference[T](T) + +// Complex custom type, containing two values accessible through properties +type GeoLocation { + lat: Latitude + lon: Longitude +} + +// Define operations on our custom type +extend GeoLocation { + // This type is compatible with the `-` operation with another GeoLocation + // i.e. you can subtract a GeoLocation from another GeoLocation, resulting + // in a Difference of GeoLocations + op __sub__(GeoLocation) -> Difference[GeoLocation] +} + +// For complex generics, you need to specify how the genericity the properties +// are handled +type Difference[GeoLocation] { + lat: Difference[Latitude] + lon: Difference[Longitude] +} + +// Simple operation defined on our custom types +extend Latitude { + op __sub__(Latitude) -> Difference[Latitude] +} + +extend Longitude { + op __sub__(Longitude) -> Difference[Longitude] +} + +// Predefined custom predicates that can be referenced in other definitions +predicate Positive(v: float) = v >= 0 +predicate StrictlyPositive(v: float) = v > 0 +predicate Equatorial(loc: GeoLocation) = (-10 <= loc.lat <= 10) +predicate Arctic(loc: GeoLocation) = (loc.lat >= 66) + +type Person { + name: str + + // Property with an inline constraint + age: int? where (0 <= _ < 150) + + // Property referencing a predicate + height: float where StrictlyPositive + + home: GeoLocation +} + +// Custom complex type derived from another complex type, with a constraint +// on a property +// Multiple proposed syntaxes, not yet defined + +// Explicit, but new keyword +type EquatorialPerson refines Person where Equatorial(_.home) + +// Explicit with existing keyword, might be confusing if expectations regarding 'is' +type EquatorialPerson is Person where Equatorial(_.home) + +// Consistent and Python-friendly but can be confused with structural extension +type EquatorialPerson(Person) where Equatorial(_.home) + +// Allow new properties, probably not useful +type EquatorialPerson extends Person where Equatorial(_.home) diff --git a/gen/ast.py b/gen/ast.py new file mode 100644 index 0000000..6fca631 --- /dev/null +++ b/gen/ast.py @@ -0,0 +1,72 @@ +class SimpleTypeStmt: + name: Token + template: Optional[TemplateExpr] + base: TypeExpr + constraint: Optional[Expr] + +class SimpleTypeExpr: + name: Token + optional: bool + +class LogicalExpr: + left: Expr + operator: Token + right: Expr + +class BinaryExpr: + left: Expr + operator: Token + right: Expr + +class UnaryExpr: + operator: Token + right: Expr + +class GetExpr: + expr: Expr + name: Token + +class VariableExpr: + name: Token + +class GroupingExpr: + expr: Expr + +class LiteralExpr: + value: Any + +class WildcardExpr: + token: Token + +class TemplateExpr: + type: TypeExpr + +class TypeExpr: + name: Token + template: Optional[TemplateExpr] + optional: bool + +class ComplexTypeStmt: + name: Token + template: Optional[TemplateExpr] + properties: list[PropertyStmt] + +class PropertyStmt: + name: Token + type: TypeExpr + constraint: Optional[Expr] + +class ExtendStmt: + type: TypeExpr + operations: list[OpStmt] + +class OpStmt: + name: Token + operand: TypeExpr + result: TypeExpr + +class PredicateStmt: + name: Token + subject: Token + type: TypeExpr + condition: Expr diff --git a/gen/gen.py b/gen/gen.py new file mode 100644 index 0000000..47cb827 --- /dev/null +++ b/gen/gen.py @@ -0,0 +1,128 @@ +from pathlib import Path +import re + +HEADER = '''""" +This file was generated by a script. Any manual changes might be overwritten. +Please modify gen/ast.py instead and run gen/gen.py +"""''' + +TEMPLATE = """{header} + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Generic, Optional, TypeVar + +from lexer.token import Token + +T = TypeVar("T") + +############## +# Statements # +############## + + +@dataclass(frozen=True) +class Stmt(ABC): + @abstractmethod + def accept(self, visitor: Visitor[T]) -> T: ... + + class Visitor(ABC, Generic[T]): +{stmt_visitor_methods} + + +{statements} + + +############### +# Expressions # +############### + + +@dataclass(frozen=True) +class Expr(ABC): + @abstractmethod + def accept(self, visitor: Visitor[T]) -> T: ... + + class Visitor(ABC, Generic[T]): +{expr_visitor_methods} + + +{expressions} +""" + +VISITOR_METHOD_TEMPLATE = """ + @abstractmethod + def visit_{func_name}(self, {param}: {cls}) -> T: ... +""" + +CLASS_TEMPLATE = """ +@dataclass(frozen=True) +class {cls}({base}): +{body} + + def accept(self, visitor: {base}.Visitor[T]) -> T: + return visitor.visit_{func_name}(self) +""" + +def snake_case(text: str) -> str: + return re.sub(r"[A-Z]", lambda c: "_" + c.group().lower(), text).lower().strip("_") + +def make_visitor_method(cls: str, param: str): + method: str = VISITOR_METHOD_TEMPLATE.format( + func_name=snake_case(cls), + param=param, + cls=cls + ) + return method.strip("\n") + +def make_class(name: str, cls: str, base: str): + body: str = cls.split("\n", 1)[1] + func_name: str = snake_case(name) + cls_def: str = CLASS_TEMPLATE.format( + cls=name, + base=base, + body=body, + func_name=func_name, + ) + return cls_def.strip("\n") + +def generate(src: str): + classes: list[str] = src.split("\n\n") + stmt_visitor_methods: list[str] = [] + expr_visitor_methods: list[str] = [] + statements: list[str] = [] + expressions: list[str] = [] + + for cls in classes: + cls = cls.strip("\n") + name: str = re.match("class (.*?):", cls).group(1) # type: ignore + print(f"Processing {name}") + if name.endswith("Stmt"): + stmt_visitor_methods.append(make_visitor_method(name, "stmt")) + statements.append(make_class(name, cls, "Stmt")) + elif name.endswith("Expr"): + expr_visitor_methods.append(make_visitor_method(name, "expr")) + expressions.append(make_class(name, cls, "Expr")) + + return TEMPLATE.format( + header=HEADER, + stmt_visitor_methods="\n\n".join(stmt_visitor_methods), + expr_visitor_methods="\n\n".join(expr_visitor_methods), + statements="\n\n\n".join(statements), + expressions="\n\n\n".join(expressions), + ) + +def main(): + root: Path = Path(__file__).parent.parent + in_path: Path = root / "gen" / "ast.py" + out_path: Path = root / "core" / "ast" / "midas.py" + + src: str = in_path.read_text() + generated: str = generate(src) + out_path.write_text(generated) + + +if __name__ == "__main__": + main() diff --git a/lexer/annotations.py b/lexer/annotations.py deleted file mode 100644 index ae9faae..0000000 --- a/lexer/annotations.py +++ /dev/null @@ -1,102 +0,0 @@ -from lexer.base import Lexer -from lexer.keyword import ANNOTATION_KEYWORDS -from lexer.token import TokenType - - -class AnnotationLexer(Lexer): - def scan_token(self) -> None: - char: str = self.advance() - match char: - case "(": - self.add_token(TokenType.LEFT_PAREN) - case ")": - self.add_token(TokenType.RIGHT_PAREN) - case "[": - self.add_token(TokenType.LEFT_BRACKET) - case "]": - self.add_token(TokenType.RIGHT_BRACKET) - case "<": - self.add_token( - TokenType.LESS_EQUAL if self.match("=") else TokenType.LESS - ) - case ">": - self.add_token( - TokenType.GREATER_EQUAL if self.match("=") else TokenType.GREATER - ) - case "=": - self.add_token( - TokenType.EQUAL_EQUAL if self.match("=") else TokenType.EQUAL - ) - case "!": - if self.match("="): - self.add_token(TokenType.BANG_EQUAL) - else: - self.error("Unexpected single bang. Did you mean '!=' ?") - case ":": - self.add_token(TokenType.COLON) - case ",": - self.add_token(TokenType.COMMA) - case "_": - self.add_token(TokenType.UNDERSCORE) - case "+": - self.add_token(TokenType.PLUS) - case "#": - self.scan_comment() - case "\n": - self.add_token(TokenType.NEWLINE) - case " " | "\r" | "\t": - # Consume all whitespace characters until EOL or EOF - while ( - self.peek().isspace() - and self.peek() != "\n" - and not self.is_at_end() - ): - self.advance() - self.add_token(TokenType.WHITESPACE) - case _: - if char.isdigit(): - self.scan_number() - elif char.isalpha(): - self.scan_identifier() - else: - self.error("Unexpected character") - return None - - def scan_number(self): - """Scan the rest of number and add it as a token - - This method handles both simple integers and floats. Scientific notation - and base prefixes (0x, 0b, 0o) are not supported - """ - while self.peek().isdigit(): - self.advance() - - if self.peek() == "." and self.peek_next().isdigit(): - self.advance() - while self.peek().isdigit(): - self.advance() - - value: float = float(self.source[self.start : self.idx]) - self.add_token(TokenType.NUMBER, value) - - def scan_identifier(self): - """Scan the rest of an identifier and add it as a token - - An identifier starts with a letter, followed by any number of - alphanumerical characters or underscores - """ - while self.peek().isalnum() or self.peek() == "_": - self.advance() - - lexeme: str = self.source[self.start : self.idx] - token_type: TokenType = ANNOTATION_KEYWORDS.get(lexeme, TokenType.IDENTIFIER) - self.add_token(token_type) - - def scan_comment(self): - """Scan the rest of a comment and add it as a token - - A comment starts with a `#` character and ends at the EOL/EOF - """ - while self.peek() != "\n" and not self.is_at_end(): - self.advance() - self.add_token(TokenType.COMMENT) diff --git a/lexer/keyword.py b/lexer/keyword.py index b66f21a..e5c4b64 100644 --- a/lexer/keyword.py +++ b/lexer/keyword.py @@ -1,15 +1,11 @@ from lexer.token import TokenType -ANNOTATION_KEYWORDS: dict[str, TokenType] = { - "True": TokenType.TRUE, - "False": TokenType.FALSE, - "None": TokenType.NONE, -} - -MIDAS_KEYWORDS: dict[str, TokenType] = { +KEYWORDS: dict[str, TokenType] = { "type": TokenType.TYPE, "op": TokenType.OP, - "constraint": TokenType.CONSTRAINT, + "predicate": TokenType.PREDICATE, + "extend": TokenType.EXTEND, + "where": TokenType.WHERE, "true": TokenType.TRUE, "false": TokenType.FALSE, "none": TokenType.NONE, diff --git a/lexer/midas.py b/lexer/midas.py index ad29a68..054f91d 100644 --- a/lexer/midas.py +++ b/lexer/midas.py @@ -1,5 +1,5 @@ from lexer.base import Lexer -from lexer.keyword import MIDAS_KEYWORDS +from lexer.keyword import KEYWORDS from lexer.token import TokenType @@ -31,30 +31,32 @@ class MidasLexer(Lexer): self.add_token( TokenType.EQUAL_EQUAL if self.match("=") else TokenType.EQUAL ) - case "!": - if self.match("="): - self.add_token(TokenType.BANG_EQUAL) - else: - self.error("Unexpected single bang. Did you mean '!=' ?") + case "!" if self.match("="): + self.add_token(TokenType.BANG_EQUAL) case ":": self.add_token(TokenType.COLON) - case ",": - self.add_token(TokenType.COMMA) - case "_": + case ".": + self.add_token(TokenType.DOT) + case "&": + self.add_token(TokenType.AND) + case "?": + self.add_token(TokenType.QMARK) + # case ",": + # self.add_token(TokenType.COMMA) + case "_" if not self.is_identifier_char(self.peek_next(), start=False): self.add_token(TokenType.UNDERSCORE) - case "+": - self.add_token(TokenType.PLUS) + case "-" if self.match(">"): + self.add_token(TokenType.ARROW) + # case "+": + # self.add_token(TokenType.PLUS) case "-": self.add_token(TokenType.MINUS) - case "*": - self.add_token(TokenType.STAR) - case "/": - if self.match("/"): - self.scan_comment() - elif self.match("*"): - self.scan_comment_multiline() - else: - self.add_token(TokenType.SLASH) + # case "*": + # self.add_token(TokenType.STAR) + case "/" if self.match("/"): + self.scan_comment() + case "/" if self.match("*"): + self.scan_comment_multiline() case "\n": self.add_token(TokenType.NEWLINE) case " " | "\r" | "\t": @@ -69,7 +71,7 @@ class MidasLexer(Lexer): case _: if char.isdigit(): self.scan_number() - elif char.isalpha(): + elif self.is_identifier_char(char, start=True): self.scan_identifier() else: self.error("Unexpected character") @@ -98,11 +100,11 @@ class MidasLexer(Lexer): An identifier starts with a letter, followed by any number of alphanumerical characters or underscores """ - while self.peek().isalnum() or self.peek() == "_": + while self.is_identifier_char(self.peek(), start=False): self.advance() lexeme: str = self.source[self.start : self.idx] - token_type: TokenType = MIDAS_KEYWORDS.get(lexeme, TokenType.IDENTIFIER) + token_type: TokenType = KEYWORDS.get(lexeme, TokenType.IDENTIFIER) self.add_token(token_type) def scan_comment(self): @@ -129,3 +131,12 @@ class MidasLexer(Lexer): if not self.is_at_end(): self.advance() self.add_token(TokenType.COMMENT) + + def is_identifier_char(self, char: str, *, start: bool) -> bool: + if char == "_": + return True + if char.isalpha(): + return True + if not start and char.isdigit(): + return True + return False diff --git a/lexer/token.py b/lexer/token.py index 70a7a1b..1097493 100644 --- a/lexer/token.py +++ b/lexer/token.py @@ -14,14 +14,18 @@ class TokenType(Enum): LEFT_BRACE = auto() RIGHT_BRACE = auto() COLON = auto() - COMMA = auto() + # COMMA = auto() UNDERSCORE = auto() + ARROW = auto() + AND = auto() + QMARK = auto() + DOT = auto() # Operators - PLUS = auto() + # PLUS = auto() MINUS = auto() - STAR = auto() - SLASH = auto() + # STAR = auto() + # SLASH = auto() GREATER = auto() GREATER_EQUAL = auto() LESS = auto() @@ -40,7 +44,9 @@ class TokenType(Enum): # Keywords TYPE = auto() OP = auto() - CONSTRAINT = auto() + PREDICATE = auto() + EXTEND = auto() + WHERE = auto() # Misc COMMENT = auto() diff --git a/parser/annotations.py b/parser/annotations.py deleted file mode 100644 index 0bf99d6..0000000 --- a/parser/annotations.py +++ /dev/null @@ -1,152 +0,0 @@ -from typing import Optional - -from core.ast.annotations import ( - AnnotationStmt, - ConstraintExpr, - Expr, - LiteralExpr, - SchemaElementExpr, - SchemaExpr, - Stmt, - TypeExpr, - WildcardExpr, -) -from lexer.token import Token, TokenType -from parser.base import Parser -from parser.errors import ParsingError - - -class AnnotationParser(Parser): - """A simple parser for custom type annotations""" - - SYNC_BOUNDARY: set[TokenType] = set() - - def parse(self) -> Optional[Stmt]: - stmt: Optional[Stmt] = None - try: - stmt = self.annotation() - except ParsingError: - self.synchronize() - if not self.is_at_end(): - self.error(self.peek(), "Extra tokens") - return stmt - - def synchronize(self): - """Skip tokens until a synchronization boundary is found - - This method allows gracefully recovering from a parse error - to a safe place and continue parsing - """ - self.advance() - while not self.is_at_end(): - if self.peek().type in self.SYNC_BOUNDARY: - return - self.advance() - - def annotation(self) -> AnnotationStmt: - """Parse an annotation - - An annotation is written as `Type` or `Type[Schema]` - - Returns: - AnnotationStmt: the parsed annotation statement - """ - - name: Token = self.consume(TokenType.IDENTIFIER, "Expected type identifier") - schema: Optional[SchemaExpr] = None - if self.match(TokenType.LEFT_BRACKET): - schema = self.schema() - return AnnotationStmt(name=name, schema=schema) - - def type_expr(self) -> TypeExpr: - """Parse a type expression - - Returns: - TypeExpr: the parsed type expression - """ - name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") - constraints: list[ConstraintExpr] = [] - - while not self.is_at_end() and self.match(TokenType.PLUS): - self.consume(TokenType.LEFT_PAREN, "Expected '(' before type constraint") - constraints.append(self.constraint_expr()) - self.consume(TokenType.RIGHT_PAREN, "Expected ')' after type constraint") - - return TypeExpr(name=name, constraints=constraints) - - def constraint_expr(self) -> ConstraintExpr: - """Parse a type constraint - - Returns: - ConstraintExpr: the parsed type constraint expression - """ - - left: Expr = self.constraint_value() - op: Token = self.constraint_operator() - right: Expr = self.constraint_value() - return ConstraintExpr(left=left, op=op, right=right) - - def constraint_value(self) -> Expr: - if self.match(TokenType.UNDERSCORE): - return WildcardExpr(self.previous()) - return self.literal() - - def literal(self) -> LiteralExpr: - if self.match(TokenType.FALSE): - return LiteralExpr(False) - if self.match(TokenType.TRUE): - return LiteralExpr(True) - if self.match(TokenType.NONE): - return LiteralExpr(None) - - if self.match(TokenType.NUMBER): - return LiteralExpr(self.previous().value) - - raise self.error(self.peek(), "Expected literal") - - def constraint_operator(self) -> Token: - if self.match(TokenType.LESS, TokenType.LESS_EQUAL, TokenType.GREATER, TokenType.GREATER_EQUAL, TokenType.EQUAL_EQUAL, TokenType.BANG_EQUAL): - return self.previous() - raise self.error(self.peek(), "Expected constraint operator") - - def schema(self) -> SchemaExpr: - """Parse a schema definition - - A comma separated list of schema elements - - Returns: - SchemaExpr: the parsed schema expression - """ - left: Token = self.previous() - elements: list[Expr] = [] - while not self.check(TokenType.RIGHT_BRACKET) and not self.is_at_end(): - elements.append(self.schema_element()) - if not self.check(TokenType.RIGHT_BRACKET): - self.consume(TokenType.COMMA, "Expected ',' between schema elements") - - right: Token = self.consume(TokenType.RIGHT_BRACKET, "Unclosed schema") - return SchemaExpr(left=left, elements=elements, right=right) - - def schema_element(self) -> SchemaElementExpr: - """Parse a schema element - - An anonymous element (`_`), a type, an untyped named column (`name: _`), - or a named column (`name: Type`) - - Returns: - SchemaElementExpr: the parsed schema element expression - """ - if self.match(TokenType.UNDERSCORE): - return SchemaElementExpr(name=None, type=None) - - if not self.check(TokenType.IDENTIFIER): - raise self.error(self.peek(), "Expected schema element") - - name: Optional[Token] = None - type: Optional[TypeExpr] = None - if self.check_next(TokenType.COLON): - name = self.advance() - self.advance() - if not self.match(TokenType.UNDERSCORE): - type = self.type_expr() - return SchemaElementExpr(name=name, type=type) diff --git a/parser/midas.py b/parser/midas.py index a49bc5e..65e2786 100644 --- a/parser/midas.py +++ b/parser/midas.py @@ -1,16 +1,24 @@ from typing import Optional from core.ast.midas import ( - ConstraintExpr, - ConstraintStmt, + BinaryExpr, + ComplexTypeStmt, Expr, + ExtendStmt, + GetExpr, + GroupingExpr, LiteralExpr, + LogicalExpr, OpStmt, + PredicateStmt, PropertyStmt, + SimpleTypeExpr, + SimpleTypeStmt, Stmt, - TypeBodyExpr, + TemplateExpr, TypeExpr, - TypeStmt, + UnaryExpr, + VariableExpr, WildcardExpr, ) from lexer.token import Token, TokenType @@ -21,7 +29,12 @@ from parser.errors import ParsingError class MidasParser(Parser): """A simple parser for midas type definitions""" - SYNC_BOUNDARY: set[TokenType] = {TokenType.TYPE, TokenType.OP, TokenType.CONSTRAINT} + SYNC_BOUNDARY: set[TokenType] = { + TokenType.TYPE, + TokenType.OP, + TokenType.EXTEND, + TokenType.PREDICATE, + } def parse(self) -> list[Stmt]: statements: list[Stmt] = [] @@ -58,72 +71,186 @@ class MidasParser(Parser): try: if self.match(TokenType.TYPE): return self.type_declaration() - if self.match(TokenType.OP): - return self.op_declaration() - if self.match(TokenType.CONSTRAINT): - return self.constraint_declaration() + if self.match(TokenType.EXTEND): + return self.extend_declaration() + if self.match(TokenType.PREDICATE): + return self.predicate_declaration() raise self.error(self.peek(), "Unexpected token") except ParsingError: self.synchronize() return None - def type_declaration(self) -> TypeStmt: + def type_declaration(self) -> SimpleTypeStmt | ComplexTypeStmt: """Parse a type declaration - A type declaration is written `type Name` optionally followed by a brace-wrapped body + A type declaration can either be a simple type alias or a new complex type. + In either case, it can have an optional template expression after its name, wrapped in brackets. + A simple type alias is derived from a base type expression, and can have a optional constraint expression preceded by the `where` keyword. + A full simple type alias is thus written: + ``` + type Name[Template](TypeExpr) where Condition + ``` + + A new complex type has a set of properties which are named, have a type and an optional constraint expression (also preceded by the `where` keyword). + A full complex type definition is thus written: + ``` + type Name[Template] { + prop1: TypeExpr1 where Condition1 + prop2: TypeExpr2 where Condition2 + ... + } + ``` Returns: TypeStmt: the parsed type declaration statement """ name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") - self.consume(TokenType.LESS, "Expected '<' after type name") - bases: list[TypeExpr] = [] - while not self.check(TokenType.GREATER) and not self.is_at_end(): - bases.append(self.type_expr()) - if not self.check(TokenType.GREATER): - self.consume(TokenType.COMMA, "Expected ',' between type bases") - self.consume(TokenType.GREATER, "Expected '>' after base type") + template: Optional[TemplateExpr] = None + if self.check(TokenType.LEFT_BRACKET): + template = self.template_expr() - body: Optional[TypeBodyExpr] = None + if self.match(TokenType.LEFT_PAREN): + base: TypeExpr = self.type_expr() + self.consume(TokenType.RIGHT_PAREN, "Unclosed base type parenthesis") + constraint: Optional[Expr] = None + if self.match(TokenType.WHERE): + constraint = self.constraint() + return SimpleTypeStmt( + name=name, template=template, base=base, constraint=constraint + ) + else: + properties: list[PropertyStmt] = self.type_properties() + return ComplexTypeStmt(name=name, template=template, properties=properties) - if self.check(TokenType.LEFT_BRACE): - body = self.type_body_expr() - return TypeStmt(name=name, bases=bases, body=body) + def template_expr(self) -> TemplateExpr: + """Parse a generic template expression + + A template is written `[TypeExpr]` + + Returns: + TemplateExpr: the parsed template expression + """ + self.consume(TokenType.LEFT_BRACKET, "Missing '[' before template expression") + type: TypeExpr = self.type_expr() + self.consume(TokenType.RIGHT_BRACKET, "Missing ']' after template expression") + return TemplateExpr(type=type) def type_expr(self) -> TypeExpr: """Parse a type expression + A type is an identifier, optionally followed by a template expression. + It can also optionally be followed by a '?' to indicate a nullable type + Returns: TypeExpr: the parsed type expression """ name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") - constraints: list[ConstraintExpr] = [] + template: Optional[TemplateExpr] = None + if self.check(TokenType.LEFT_BRACKET): + template = self.template_expr() + optional: bool = self.match(TokenType.QMARK) + return TypeExpr(name=name, template=template, optional=optional) - while not self.is_at_end() and self.match(TokenType.PLUS): - self.consume(TokenType.LEFT_PAREN, "Expected '(' before type constraint") - constraints.append(self.constraint_expr()) - self.consume(TokenType.RIGHT_PAREN, "Expected ')' after type constraint") + def simple_type_expr(self) -> SimpleTypeExpr: + """Parse a simple type expression - return TypeExpr(name=name, constraints=constraints) - - def constraint_expr(self) -> ConstraintExpr: - """Parse a type constraint + A simple type is just an identifier optionally followed by a '?' Returns: - ConstraintExpr: the parsed type constraint expression + SimpleTypeExpr: the parsed simple type expression """ + name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") + optional: bool = self.match(TokenType.QMARK) + return SimpleTypeExpr(name=name, optional=optional) - left: Expr = self.constraint_value() - op: Token = self.constraint_operator() - right: Expr = self.constraint_value() - return ConstraintExpr(left=left, op=op, right=right) + def constraint(self) -> Expr: + """Parse a constraint - def constraint_value(self) -> Expr: - if self.match(TokenType.UNDERSCORE): - return WildcardExpr(self.previous()) - return self.literal() + A constraint is basically a logical predicate - def literal(self) -> LiteralExpr: + Returns: + Expr: the parsed constraint expression + """ + return self.and_() + + def and_(self) -> Expr: + """Parse a logical AND expression or a simpler expression + + Returns: + Expr: the parsed expression + """ + expr: Expr = self.equality() + while self.match(TokenType.AND): + operator: Token = self.previous() + right: Expr = self.equality() + expr = LogicalExpr(left=expr, operator=operator, right=right) + return expr + + def equality(self) -> Expr: + """Parse a logical equality expression or a simpler expression + + Returns: + Expr: the parsed expression + """ + expr: Expr = self.comparison() + while self.match(TokenType.BANG_EQUAL, TokenType.EQUAL_EQUAL): + operator: Token = self.previous() + right: Expr = self.comparison() + expr = BinaryExpr(left=expr, operator=operator, right=right) + return expr + + def comparison(self) -> Expr: + """Parse a logical comparison expression or a simpler expression + + Returns: + Expr: the parsed expression + """ + expr: Expr = self.unary() + while self.match( + TokenType.LESS, + TokenType.LESS_EQUAL, + TokenType.GREATER, + TokenType.GREATER_EQUAL, + ): + operator: Token = self.previous() + right: Expr = self.unary() + expr = BinaryExpr(left=expr, operator=operator, right=right) + return expr + + def unary(self) -> Expr: + """Parse a unary expression or a simpler expression + + Returns: + Expr: the parsed expression + """ + if self.match(TokenType.MINUS): + operator: Token = self.previous() + right: Expr = self.unary() + return UnaryExpr(operator=operator, right=right) + return self.reference() + + def reference(self) -> Expr: + """Parse an attribute access expression or a simpler expression + + Returns: + Expr: the parsed expression + """ + expr: Expr = self.primary() + while self.match(TokenType.DOT): + name: Token = self.consume( + TokenType.IDENTIFIER, "Expected property name after '.'" + ) + expr = GetExpr(expr=expr, name=name) + return expr + + def primary(self) -> Expr: + """Parse a primary expression + + This includes literals (booleans, numbers, etc.), wildcards, identifiers and grouped expressions + + Returns: + Expr: the parsed expression + """ if self.match(TokenType.FALSE): return LiteralExpr(False) if self.match(TokenType.TRUE): @@ -134,40 +261,39 @@ class MidasParser(Parser): if self.match(TokenType.NUMBER): return LiteralExpr(self.previous().value) - raise self.error(self.peek(), "Expected literal") + if self.match(TokenType.IDENTIFIER): + return VariableExpr(self.previous()) - def constraint_operator(self) -> Token: - if self.match( - TokenType.LESS, - TokenType.LESS_EQUAL, - TokenType.GREATER, - TokenType.GREATER_EQUAL, - TokenType.EQUAL_EQUAL, - TokenType.BANG_EQUAL, - ): - return self.previous() - raise self.error(self.peek(), "Expected constraint operator") + if self.match(TokenType.UNDERSCORE): + return WildcardExpr(self.previous()) - def type_body_expr(self) -> TypeBodyExpr: + if self.match(TokenType.LEFT_PAREN): + expr: Expr = self.constraint() + self.consume(TokenType.RIGHT_PAREN, "Unclosed parenthesis") + return GroupingExpr(expr) + + raise self.error(self.peek(), "Expected expression") + + def type_properties(self) -> list[PropertyStmt]: """Parse a type definition body A type definition body is a set of whitespace-separated property statements enclosed in curly braces Returns: - TypeBodyExpr: the parsed type body expression + list[PropertyStmt]: the parsed type properties """ self.consume(TokenType.LEFT_BRACE, "Expected '{' to start type body") properties: list[PropertyStmt] = [] while not self.check(TokenType.RIGHT_BRACE) and not self.is_at_end(): properties.append(self.property_stmt()) self.consume(TokenType.RIGHT_BRACE, "Unclosed type body") - return TypeBodyExpr(properties=properties) + return properties def property_stmt(self) -> PropertyStmt: """Parse a property statement - A type property statement is written `name: Type` + A type property statement is written `name: Type` or `name: Type where Condition` Returns: PropertyStmt: the parsed property statement @@ -175,43 +301,61 @@ class MidasParser(Parser): name: Token = self.consume(TokenType.IDENTIFIER, "Expected property name") self.consume(TokenType.COLON, "Expected ':' after property name") type: TypeExpr = self.type_expr() - return PropertyStmt(name=name, type=type) + constraint: Optional[Expr] = None + if self.match(TokenType.WHERE): + constraint = self.constraint() + return PropertyStmt(name=name, type=type, constraint=constraint) + + def extend_declaration(self) -> ExtendStmt: + """Parse an extension definition + + An extension is written `extend Type { operations }` + + Returns: + ExtendStmt: the parsed extension statement + """ + type: TypeExpr = self.type_expr() + self.consume(TokenType.LEFT_BRACE, "Expected '{' to start extend body") + operations: list[OpStmt] = [] + while not self.is_at_end() and not self.check(TokenType.RIGHT_BRACE): + operations.append(self.op_declaration()) + self.consume(TokenType.RIGHT_BRACE, "Unclosed extend body") + return ExtendStmt(type=type, operations=operations) def op_declaration(self) -> OpStmt: """Parse an operation definition - An operation is written `op operator = ` where `operator` can be any single token + An operation is written `op name(Type) -> Type` Returns: OpStmt: the parsed operation statement """ - self.consume(TokenType.LESS, "Expected '<' before first type") - left: TypeExpr = self.type_expr() - self.consume(TokenType.GREATER, "Expected '>' after first type") + self.consume(TokenType.OP, "Expected 'op' keyword") - op: Token = self.advance() + name: Token = self.consume(TokenType.IDENTIFIER, "Expected operation name") + self.consume(TokenType.LEFT_PAREN, "Expected '(' before operand type") + operand: TypeExpr = self.type_expr() + self.consume(TokenType.RIGHT_PAREN, "Expected ')' after operand type") - self.consume(TokenType.LESS, "Expected '<' before second type") - right: TypeExpr = self.type_expr() - self.consume(TokenType.GREATER, "Expected '>' after second type") - - self.consume(TokenType.EQUAL, "Expected '=' after second type") - - self.consume(TokenType.LESS, "Expected '<' before result type") + self.consume(TokenType.ARROW, "Expected '->' before result type") result: TypeExpr = self.type_expr() - self.consume(TokenType.GREATER, "Expected '>' after result type") - return OpStmt(left=left, op=op, right=right, result=result) + return OpStmt(name=name, operand=operand, result=result) - def constraint_declaration(self) -> ConstraintStmt: - """Parse a type constraint declaration + def predicate_declaration(self) -> PredicateStmt: + """Parse a predicate declaration - A constraint is written `constraint Name = constraint_expression` + A predicate is written `predicate Name(subject: Type) = constraint_expression` Returns: - ConstraintStmt: the parsed constraint declaration statement + PredicateStmt: the parsed predicate declaration statement """ - name: Token = self.consume(TokenType.IDENTIFIER, "Expected constraint name") - self.consume(TokenType.EQUAL, "Expected '=' after constraint name") - constraint: ConstraintExpr = self.constraint_expr() - return ConstraintStmt(name=name, constraint=constraint) + name: Token = self.consume(TokenType.IDENTIFIER, "Expected predicate name") + self.consume(TokenType.LEFT_PAREN, "Expected '(' before predicate subject") + subject: Token = self.consume(TokenType.IDENTIFIER, "Expected subject name") + self.consume(TokenType.COLON, "Expected ':' after subject name") + type: TypeExpr = self.type_expr() + self.consume(TokenType.RIGHT_PAREN, "Expected ')' after predicate subject") + self.consume(TokenType.EQUAL, "Expected '=' after predicate subject") + condition: Expr = self.constraint() + return PredicateStmt(name=name, subject=subject, type=type, condition=condition) diff --git a/syntax/midas.ebnf b/syntax/midas.ebnf index 71b4740..526e122 100644 --- a/syntax/midas.ebnf +++ b/syntax/midas.ebnf @@ -1,26 +1,35 @@ -identifier ::= '[a-zA-Z][a-zA-Z_]*' +// W3C EBNF syntax definition for Midas +Identifier ::= [a-zA-Z_] [a-zA-Z_0-9]* -integer ::= '\d+' -number ::= integer ["." integer] -boolean ::= "False" | "True" -none ::= "None" +Integer ::= '\d+' +Number ::= "-"? Integer ("." Integer)? +Boolean ::= "False" | "True" +None ::= "None" -value ::= number | boolean | none -lambda-value ::= "_" | value -lambda-operator ::= ">" | "<" | ">=" | "<=" | "==" | "!=" -lambda ::= lambda-value lambda-operator lambda-value +Value ::= Number | Boolean | None -constraint ::= identifier | "(" lambda ")" -base-type ::= identifier -type ::= base-type { "+" constraint } +ComparisonOp ::= ">" | "<" | ">=" | "<=" +EqualityOp ::= "==" | "!=" -type-property ::= 'identifier' ":" 'type' -type-body ::= "{" { 'type-property' } "}" +Grouping ::= "(" Constraint ")" +Primary ::= "_" | Value | Identifier | Grouping +Reference ::= Primary ("." Identifier)* +Unary ::= "-"? Unary | Reference +Comparison ::= Unary (ComparisonOp Unary)* +Equality ::= Comparison (EqualityOp Comparison)* +Constraint ::= Equality ("&" Equality)* -operation-type ::= "<" 'type' ">" +SimpleType ::= Identifier "?"? +Template ::= "[" Type "]" +Type ::= Identifier Template? "?"? -type-statement ::= "type" 'identifier' "<" 'type' {"," 'type'} ">" ['type-body'] -operation-statement ::= "op" 'operation-type' 'operator' 'operation-type' "=" 'operation-type' -constraint-statement ::= "constraint" 'identifier' "=" 'lambda' +TypeProperty ::= Identifier ":" Type ("where" Constraints)? +ComplexTypeBody ::= "{" TypeProperty* "}" +OpDefinition ::= "op" Identifier "(" Type ")" "->" Type +ExtendBody ::= "{" OpDefinition* "}" -statement ::= type-statement | operation-statement | constraint-statement \ No newline at end of file +TypeStatement ::= "type" Identifier Template? ("(" Type ")" ("where" Constraint)? | ComplexTypeBody) +ExtendStatement ::= "extend" Type ExtendBody +PredicateStatement ::= "predicate" Identifier "(" Identifier ":" Type ")" "=" Constraint + +Statement ::= TypeStatement | ExtendStatement | PredicateStatement diff --git a/syntax/midas.typ b/syntax/midas.typ index 17db6d6..3e16f19 100644 --- a/syntax/midas.typ +++ b/syntax/midas.typ @@ -1,4 +1,11 @@ -#import "@preview/fervojo:0.1.1": render +#import "@preview/fervojo:0.1.1": default-css, render + +#let extra-css = ```css +svg.railroad .terminal rect { + fill: #F7DCD4; +} +``` +#let css = default-css() + bytes(extra-css.text) #let value = ``` {[`value` < @@ -8,90 +15,157 @@ >]} ``` -#let constraint = ``` -{[`constraint` <"_", 'value'> <">", "<", ">=", "<=", "==", "!="> <"_", 'value'>]} +#let grouping = ``` +{[`grouping` "(" 'constraint' ")"]} ``` -#let type-with-constraints = ``` -{[`type-with-constraints` 'identifier' ]} +#let primary = ``` +{[`primary` <"_", 'value', 'identifier', 'grouping'>]} +``` + +#let reference = ``` +{[`reference` 'primary' ]} +``` + +#let unary = ``` +{[`unary` <[ 'unary'], 'reference'>]} +``` + +#let comparison = ``` +{[`comparison` 'unary'*<">", "<", ">=", "<=">]} +``` + +#let equality = ``` +{[`equality` 'comparison'*<"==", "!=">]} +``` + +#let constraint = ``` +{[`constraint` 'equality'*"&"]} +``` + +#let simple-type = ``` +{[`simple-type` 'identifier' ]} +``` + +#let template = ``` +{[`template` "[" 'type' "]"]} +``` + +#let type = ``` +{[`type` 'identifier' ]} ``` #let type-property = ``` -{[`type-property` 'identifier' ":" 'type-with-constraints']} +{[`type-property` 'identifier' ":" 'type' ]} ``` #let type-body = ``` {[`type-body` "{" "}"]} ``` -#let operation-type = ``` -{[`operation-type` "<" 'type-with-constraints' ">"]} -``` - #let type-statement = ``` -{[`type-statement` "type" 'identifier' "<" 'type-with-constraints'*"," ">" ]} +{[`type-statement` "type" 'identifier' <[["(" 'type' ")"] ], 'type-body'>]} ``` -#let operation-statement = ``` -{[`operation-statement` "op" 'operation-type' "operator" 'operation-type' "=" 'operation-type']} +#let op-definition = ``` +{[`op-definition` "op" 'identifier' "(" 'type' ")" "->" 'type']} ``` -#let constraint-statement = ``` -{[`constraint-statement` "constraint" 'identifier' "=" 'constraint']} +#let extend-statement = ``` +{[`extend-statement` "extend" 'type' "{" "}"]} +``` + +#let predicate-statement = ``` +{[`predicate-statement` "predicate" 'identifier' "(" 'identifier' ":" 'type' ")" "=" 'constraint']} ``` #let statement = ``` -{[`statement` <'type-statement', 'operation-statement', 'constraint-statement'>]} +{[`statement` <'type-statement', 'extend-statement', 'predicate-statement'>]} ``` #let rules = ( - value, - constraint, - type-with-constraints, - type-property, - type-body, - operation-type, - type-statement, - operation-statement, - constraint-statement, - statement, + value: value, + grouping: grouping, + primary: primary, + reference: reference, + unary: unary, + comparison: comparison, + equality: equality, + constraint: constraint, + simple-type: simple-type, + template: template, + type: type, + type-property: type-property, + type-body: type-body, + type-statement: type-statement, + op-definition: op-definition, + extend-statement: extend-statement, + predicate-statement: predicate-statement, + statement: statement, +) + +#let inline = ( + "grouping", + "value", + "template", + "simple-type", + "type-property", + "type-body", + "op-definition", + "type-statement", + "extend-statement", + "predicate-statement", ) #set text(font: "Source Sans 3") -= Midas type definition syntax +#title[Midas type definition syntax] -#for rule in rules { - render(rule) -} += Outline -/* -#let by-name = ( - value: value, - constraint: constraint, - type-with-constraints: type-with-constraints, - type-property: type-property, - type-body: type-body, - operation-type: operation-type, - type-statement: type-statement, - operation-statement: operation-statement, - constraint-statement: constraint-statement, +#box( + columns( + 2, + outline(title: none), + ), + height: 9cm, + stroke: 1pt, + inset: 1em, ) += Statements and expressions + +#for (name, rule) in rules.pairs().rev() { + [== #name] + render(rule, css: css) +} + #let substitute(base-rule) = { let new-rule = base-rule - for (key, rule) in by-name.pairs() { - new-rule = new-rule.replace("'" + key + "'", rule.text.slice(1, -1)) + for name in inline { + let rule = rules.at(name) + let replacement = rule.text.slice(1, -1).replace(regex("\[`.*?`"), "[") + replacement = "[" + replacement + "#`" + name + "`]" + new-rule = new-rule.replace( + "'" + name + "'", + replacement, + ) } if new-rule != base-rule { new-rule = substitute(new-rule) } - return new-rule.replace(regex("`.*?`"), "") + return new-rule } -#let combined = raw(substitute(statement.text)) - - #set page(flipped: true) -#render(combined) -*/ \ No newline at end of file + += Combined rules + +#for (name, rule) in rules.pairs() { + if not name in inline { + [== #name] + let combined = substitute(rule.text) + render(raw(combined), css: css) + //raw(block: true, combined) + } +} diff --git a/test.py b/test.py index 0476438..048329a 100644 --- a/test.py +++ b/test.py @@ -1,40 +1,21 @@ -import importlib +import json from pathlib import Path -from core.ast.printer import AnnotationAstPrinter, MidasAstPrinter -from lexer.annotations import AnnotationLexer +from core.ast.printer import MidasAstPrinter from lexer.midas import MidasLexer from lexer.token import Token -from parser.annotations import AnnotationParser from parser.midas import MidasParser -def test_annotation(): - # Frame annotation - mod = importlib.import_module("examples.00_syntax_prototype.01_simple_types") - - annotation: str = mod.__annotations__["df"] - lexer: AnnotationLexer = AnnotationLexer(annotation, "01_simple_types.py") - tokens: list[Token] = lexer.process() - # print([f"{t.type.name}('{t.lexeme}')" for t in tokens]) - - parser = AnnotationParser(tokens) - parsed = parser.parse() - print(parsed) - for err in parser.errors: - print(err.get_report()) - printer = AnnotationAstPrinter() - if parsed is not None: - print(printer.print(parsed)) - - def test_midas(): # Midas type definitions - path: Path = Path("examples") / "00_syntax_prototype" / "02_custom_types.midas" + path: Path = Path("examples") / "00_syntax_prototype" / "03_custom_types_v2.midas" definitions: str = path.read_text() midas_lexer: MidasLexer = MidasLexer(definitions, path.name) tokens: list[Token] = midas_lexer.process() # print([f"{t.type.name}('{t.lexeme}')" for t in tokens]) + with open("tokens.json", "w") as f: + json.dump([f"{t.type.name}('{t.lexeme}')" for t in tokens], f, indent=4) parser = MidasParser(tokens) parsed = parser.parse() diff --git a/tests/cases/parser/01_simple_types.midas b/tests/cases/parser/01_simple_types.midas index 017e40c..9432751 100644 --- a/tests/cases/parser/01_simple_types.midas +++ b/tests/cases/parser/01_simple_types.midas @@ -1,24 +1,57 @@ -// Simple custom type derived from floats -type Latitude -type Longitude +// Simple custom type derived from float +type Custom(float) + +// Simple custom types with constraints +type Latitude(float) where (-90 <= _ <= 90) +type Longitude(float) where (-180 <= _ <= 180) + +// Generic custom type (a Difference of T is derived from T, e.g. a difference of floats is a float +type Difference[T](T) // Complex custom type, containing two values accessible through properties -type GeoLocation { +type GeoLocation { lat: Latitude lon: Longitude } -type LatitudeDiff -type LongitudeDiff +// Define operations on our custom type +extend GeoLocation { + // This type is compatible with the `-` operation with another GeoLocation + // i.e. you can subtract a GeoLocation from another GeoLocation, resulting + // in a Difference of GeoLocations + op __sub__(GeoLocation) -> Difference[GeoLocation] +} + +// For complex generics, you need to specify how the genericity the properties +// are handled +type Difference[GeoLocation] { + lat: Difference[Latitude] + lon: Difference[Longitude] +} // Simple operation defined on our custom types -op - = -op - = +extend Latitude { + op __sub__(Latitude) -> Difference[Latitude] +} -// Simple custom type with a constraint -type Age +extend Longitude { + op __sub__(Longitude) -> Difference[Longitude] +} -// Predefined custom constraints that can be referenced in other definitions -constraint Positive = _ >= 0 -constraint StrictlyPositive = _ > 0 -//constraint Even = _ % 2 == 0 \ No newline at end of file +// Predefined custom predicates that can be referenced in other definitions +predicate Positive(v: float) = v >= 0 +predicate StrictlyPositive(v: float) = v > 0 +predicate Equatorial(loc: GeoLocation) = (-10 <= loc.lat <= 10) +predicate Arctic(loc: GeoLocation) = (loc.lat >= 66) + +type Person { + name: str + + // Property with an inline constraint + age: int? where (0 <= _ < 150) + + // Property referencing a predicate + height: float where StrictlyPositive + + home: GeoLocation +} diff --git a/tests/cases/parser/01_simple_types.midas.ref.json b/tests/cases/parser/01_simple_types.midas.ref.json index 0697e32..9c9aa5b 100644 --- a/tests/cases/parser/01_simple_types.midas.ref.json +++ b/tests/cases/parser/01_simple_types.midas.ref.json @@ -2,7 +2,7 @@ "tokens": [ { "type": "COMMENT", - "lexeme": "// Simple custom type derived from floats", + "lexeme": "// Simple custom type derived from float", "line": 1, "column": 1 }, @@ -10,7 +10,7 @@ "type": "NEWLINE", "lexeme": "\n", "line": 1, - "column": 42 + "column": 41 }, { "type": "TYPE", @@ -26,93 +26,183 @@ }, { "type": "IDENTIFIER", - "lexeme": "Latitude", + "lexeme": "Custom", "line": 2, "column": 6 }, { - "type": "LESS", - "lexeme": "<", + "type": "LEFT_PAREN", + "lexeme": "(", "line": 2, - "column": 14 + "column": 12 }, { "type": "IDENTIFIER", "lexeme": "float", "line": 2, - "column": 15 + "column": 13 }, { - "type": "GREATER", - "lexeme": ">", + "type": "RIGHT_PAREN", + "lexeme": ")", "line": 2, - "column": 20 + "column": 18 }, { "type": "NEWLINE", "lexeme": "\n", "line": 2, - "column": 21 + "column": 19 }, { - "type": "TYPE", - "lexeme": "type", + "type": "NEWLINE", + "lexeme": "\n", "line": 3, "column": 1 }, { - "type": "WHITESPACE", - "lexeme": " ", - "line": 3, - "column": 5 - }, - { - "type": "IDENTIFIER", - "lexeme": "Longitude", - "line": 3, - "column": 6 - }, - { - "type": "LESS", - "lexeme": "<", - "line": 3, - "column": 15 - }, - { - "type": "IDENTIFIER", - "lexeme": "float", - "line": 3, - "column": 16 - }, - { - "type": "GREATER", - "lexeme": ">", - "line": 3, - "column": 21 - }, - { - "type": "NEWLINE", - "lexeme": "\n", - "line": 3, - "column": 22 + "type": "COMMENT", + "lexeme": "// Simple custom types with constraints", + "line": 4, + "column": 1 }, { "type": "NEWLINE", "lexeme": "\n", "line": 4, + "column": 40 + }, + { + "type": "TYPE", + "lexeme": "type", + "line": 5, "column": 1 }, { - "type": "COMMENT", - "lexeme": "// Complex custom type, containing two values accessible through properties", + "type": "WHITESPACE", + "lexeme": " ", "line": 5, - "column": 1 + "column": 5 + }, + { + "type": "IDENTIFIER", + "lexeme": "Latitude", + "line": 5, + "column": 6 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 5, + "column": 14 + }, + { + "type": "IDENTIFIER", + "lexeme": "float", + "line": 5, + "column": 15 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 5, + "column": 20 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 5, + "column": 21 + }, + { + "type": "WHERE", + "lexeme": "where", + "line": 5, + "column": 22 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 5, + "column": 27 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 5, + "column": 28 + }, + { + "type": "MINUS", + "lexeme": "-", + "line": 5, + "column": 29 + }, + { + "type": "NUMBER", + "lexeme": "90", + "line": 5, + "column": 30 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 5, + "column": 32 + }, + { + "type": "LESS_EQUAL", + "lexeme": "<=", + "line": 5, + "column": 33 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 5, + "column": 35 + }, + { + "type": "UNDERSCORE", + "lexeme": "_", + "line": 5, + "column": 36 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 5, + "column": 37 + }, + { + "type": "LESS_EQUAL", + "lexeme": "<=", + "line": 5, + "column": 38 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 5, + "column": 40 + }, + { + "type": "NUMBER", + "lexeme": "90", + "line": 5, + "column": 41 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 5, + "column": 43 }, { "type": "NEWLINE", "lexeme": "\n", "line": 5, - "column": 76 + "column": 44 }, { "type": "TYPE", @@ -128,147 +218,201 @@ }, { "type": "IDENTIFIER", - "lexeme": "GeoLocation", + "lexeme": "Longitude", "line": 6, "column": 6 }, { - "type": "LESS", - "lexeme": "<", + "type": "LEFT_PAREN", + "lexeme": "(", "line": 6, - "column": 17 + "column": 15 }, { "type": "IDENTIFIER", - "lexeme": "Latitude", + "lexeme": "float", "line": 6, - "column": 18 + "column": 16 }, { - "type": "COMMA", - "lexeme": ",", + "type": "RIGHT_PAREN", + "lexeme": ")", "line": 6, - "column": 26 + "column": 21 }, { "type": "WHITESPACE", "lexeme": " ", "line": 6, - "column": 27 + "column": 22 }, { - "type": "IDENTIFIER", - "lexeme": "Longitude", + "type": "WHERE", + "lexeme": "where", + "line": 6, + "column": 23 + }, + { + "type": "WHITESPACE", + "lexeme": " ", "line": 6, "column": 28 }, { - "type": "GREATER", - "lexeme": ">", + "type": "LEFT_PAREN", + "lexeme": "(", "line": 6, - "column": 37 + "column": 29 + }, + { + "type": "MINUS", + "lexeme": "-", + "line": 6, + "column": 30 + }, + { + "type": "NUMBER", + "lexeme": "180", + "line": 6, + "column": 31 }, { "type": "WHITESPACE", "lexeme": " ", "line": 6, + "column": 34 + }, + { + "type": "LESS_EQUAL", + "lexeme": "<=", + "line": 6, + "column": 35 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 6, + "column": 37 + }, + { + "type": "UNDERSCORE", + "lexeme": "_", + "line": 6, "column": 38 }, { - "type": "LEFT_BRACE", - "lexeme": "{", + "type": "WHITESPACE", + "lexeme": " ", "line": 6, "column": 39 }, { - "type": "NEWLINE", - "lexeme": "\n", + "type": "LESS_EQUAL", + "lexeme": "<=", "line": 6, "column": 40 }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 7, - "column": 1 - }, - { - "type": "IDENTIFIER", - "lexeme": "lat", - "line": 7, - "column": 5 - }, - { - "type": "COLON", - "lexeme": ":", - "line": 7, - "column": 8 - }, { "type": "WHITESPACE", "lexeme": " ", - "line": 7, - "column": 9 + "line": 6, + "column": 42 }, { - "type": "IDENTIFIER", - "lexeme": "Latitude", - "line": 7, - "column": 10 + "type": "NUMBER", + "lexeme": "180", + "line": 6, + "column": 43 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 6, + "column": 46 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 6, + "column": 47 }, { "type": "NEWLINE", "lexeme": "\n", "line": 7, + "column": 1 + }, + { + "type": "COMMENT", + "lexeme": "// Generic custom type (a Difference of T is derived from T, e.g. a difference of floats is a float", + "line": 8, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 8, + "column": 100 + }, + { + "type": "TYPE", + "lexeme": "type", + "line": 9, + "column": 1 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 9, + "column": 5 + }, + { + "type": "IDENTIFIER", + "lexeme": "Difference", + "line": 9, + "column": 6 + }, + { + "type": "LEFT_BRACKET", + "lexeme": "[", + "line": 9, + "column": 16 + }, + { + "type": "IDENTIFIER", + "lexeme": "T", + "line": 9, + "column": 17 + }, + { + "type": "RIGHT_BRACKET", + "lexeme": "]", + "line": 9, "column": 18 }, { - "type": "WHITESPACE", - "lexeme": " ", - "line": 8, - "column": 1 - }, - { - "type": "IDENTIFIER", - "lexeme": "lon", - "line": 8, - "column": 5 - }, - { - "type": "COLON", - "lexeme": ":", - "line": 8, - "column": 8 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 8, - "column": 9 - }, - { - "type": "IDENTIFIER", - "lexeme": "Longitude", - "line": 8, - "column": 10 - }, - { - "type": "NEWLINE", - "lexeme": "\n", - "line": 8, + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 9, "column": 19 }, { - "type": "RIGHT_BRACE", - "lexeme": "}", + "type": "IDENTIFIER", + "lexeme": "T", "line": 9, - "column": 1 + "column": 20 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 9, + "column": 21 }, { "type": "NEWLINE", "lexeme": "\n", "line": 9, - "column": 2 + "column": 22 }, { "type": "NEWLINE", @@ -276,869 +420,2239 @@ "line": 10, "column": 1 }, + { + "type": "COMMENT", + "lexeme": "// Complex custom type, containing two values accessible through properties", + "line": 11, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 11, + "column": 76 + }, { "type": "TYPE", "lexeme": "type", - "line": 11, + "line": 12, "column": 1 }, { "type": "WHITESPACE", "lexeme": " ", - "line": 11, + "line": 12, "column": 5 }, { "type": "IDENTIFIER", - "lexeme": "LatitudeDiff", - "line": 11, + "lexeme": "GeoLocation", + "line": 12, "column": 6 }, { - "type": "LESS", - "lexeme": "<", - "line": 11, + "type": "WHITESPACE", + "lexeme": " ", + "line": 12, + "column": 17 + }, + { + "type": "LEFT_BRACE", + "lexeme": "{", + "line": 12, "column": 18 }, { - "type": "IDENTIFIER", - "lexeme": "float", - "line": 11, + "type": "NEWLINE", + "lexeme": "\n", + "line": 12, "column": 19 }, { - "type": "GREATER", - "lexeme": ">", - "line": 11, - "column": 24 - }, - { - "type": "NEWLINE", - "lexeme": "\n", - "line": 11, - "column": 25 - }, - { - "type": "TYPE", - "lexeme": "type", - "line": 12, + "type": "WHITESPACE", + "lexeme": " ", + "line": 13, "column": 1 }, + { + "type": "IDENTIFIER", + "lexeme": "lat", + "line": 13, + "column": 5 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 13, + "column": 8 + }, { "type": "WHITESPACE", "lexeme": " ", - "line": 12, - "column": 5 + "line": 13, + "column": 9 }, { "type": "IDENTIFIER", - "lexeme": "LongitudeDiff", - "line": 12, - "column": 6 - }, - { - "type": "LESS", - "lexeme": "<", - "line": 12, - "column": 19 - }, - { - "type": "IDENTIFIER", - "lexeme": "float", - "line": 12, - "column": 20 - }, - { - "type": "GREATER", - "lexeme": ">", - "line": 12, - "column": 25 - }, - { - "type": "NEWLINE", - "lexeme": "\n", - "line": 12, - "column": 26 + "lexeme": "Latitude", + "line": 13, + "column": 10 }, { "type": "NEWLINE", "lexeme": "\n", "line": 13, + "column": 18 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 14, + "column": 1 + }, + { + "type": "IDENTIFIER", + "lexeme": "lon", + "line": 14, + "column": 5 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 14, + "column": 8 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 14, + "column": 9 + }, + { + "type": "IDENTIFIER", + "lexeme": "Longitude", + "line": 14, + "column": 10 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 14, + "column": 19 + }, + { + "type": "RIGHT_BRACE", + "lexeme": "}", + "line": 15, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 15, + "column": 2 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 16, "column": 1 }, { "type": "COMMENT", - "lexeme": "// Simple operation defined on our custom types", - "line": 14, + "lexeme": "// Define operations on our custom type", + "line": 17, "column": 1 }, - { - "type": "NEWLINE", - "lexeme": "\n", - "line": 14, - "column": 48 - }, - { - "type": "OP", - "lexeme": "op", - "line": 15, - "column": 1 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 15, - "column": 3 - }, - { - "type": "LESS", - "lexeme": "<", - "line": 15, - "column": 4 - }, - { - "type": "IDENTIFIER", - "lexeme": "Latitude", - "line": 15, - "column": 5 - }, - { - "type": "GREATER", - "lexeme": ">", - "line": 15, - "column": 13 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 15, - "column": 14 - }, - { - "type": "MINUS", - "lexeme": "-", - "line": 15, - "column": 15 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 15, - "column": 16 - }, - { - "type": "LESS", - "lexeme": "<", - "line": 15, - "column": 17 - }, - { - "type": "IDENTIFIER", - "lexeme": "Latitude", - "line": 15, - "column": 18 - }, - { - "type": "GREATER", - "lexeme": ">", - "line": 15, - "column": 26 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 15, - "column": 27 - }, - { - "type": "EQUAL", - "lexeme": "=", - "line": 15, - "column": 28 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 15, - "column": 29 - }, - { - "type": "LESS", - "lexeme": "<", - "line": 15, - "column": 30 - }, - { - "type": "IDENTIFIER", - "lexeme": "LatitudeDiff", - "line": 15, - "column": 31 - }, - { - "type": "GREATER", - "lexeme": ">", - "line": 15, - "column": 43 - }, - { - "type": "NEWLINE", - "lexeme": "\n", - "line": 15, - "column": 44 - }, - { - "type": "OP", - "lexeme": "op", - "line": 16, - "column": 1 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 16, - "column": 3 - }, - { - "type": "LESS", - "lexeme": "<", - "line": 16, - "column": 4 - }, - { - "type": "IDENTIFIER", - "lexeme": "Longitude", - "line": 16, - "column": 5 - }, - { - "type": "GREATER", - "lexeme": ">", - "line": 16, - "column": 14 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 16, - "column": 15 - }, - { - "type": "MINUS", - "lexeme": "-", - "line": 16, - "column": 16 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 16, - "column": 17 - }, - { - "type": "LESS", - "lexeme": "<", - "line": 16, - "column": 18 - }, - { - "type": "IDENTIFIER", - "lexeme": "Longitude", - "line": 16, - "column": 19 - }, - { - "type": "GREATER", - "lexeme": ">", - "line": 16, - "column": 28 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 16, - "column": 29 - }, - { - "type": "EQUAL", - "lexeme": "=", - "line": 16, - "column": 30 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 16, - "column": 31 - }, - { - "type": "LESS", - "lexeme": "<", - "line": 16, - "column": 32 - }, - { - "type": "IDENTIFIER", - "lexeme": "LongitudeDiff", - "line": 16, - "column": 33 - }, - { - "type": "GREATER", - "lexeme": ">", - "line": 16, - "column": 46 - }, - { - "type": "NEWLINE", - "lexeme": "\n", - "line": 16, - "column": 47 - }, { "type": "NEWLINE", "lexeme": "\n", "line": 17, + "column": 40 + }, + { + "type": "EXTEND", + "lexeme": "extend", + "line": 18, + "column": 1 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 18, + "column": 7 + }, + { + "type": "IDENTIFIER", + "lexeme": "GeoLocation", + "line": 18, + "column": 8 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 18, + "column": 19 + }, + { + "type": "LEFT_BRACE", + "lexeme": "{", + "line": 18, + "column": 20 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 18, + "column": 21 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 19, "column": 1 }, { "type": "COMMENT", - "lexeme": "// Simple custom type with a constraint", - "line": 18, - "column": 1 - }, - { - "type": "NEWLINE", - "lexeme": "\n", - "line": 18, - "column": 40 - }, - { - "type": "TYPE", - "lexeme": "type", - "line": 19, - "column": 1 - }, - { - "type": "WHITESPACE", - "lexeme": " ", + "lexeme": "// This type is compatible with the `-` operation with another GeoLocation", "line": 19, "column": 5 }, - { - "type": "IDENTIFIER", - "lexeme": "Age", - "line": 19, - "column": 6 - }, - { - "type": "LESS", - "lexeme": "<", - "line": 19, - "column": 9 - }, - { - "type": "IDENTIFIER", - "lexeme": "int", - "line": 19, - "column": 10 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 19, - "column": 13 - }, - { - "type": "PLUS", - "lexeme": "+", - "line": 19, - "column": 14 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 19, - "column": 15 - }, - { - "type": "LEFT_PAREN", - "lexeme": "(", - "line": 19, - "column": 16 - }, - { - "type": "NUMBER", - "lexeme": "0", - "line": 19, - "column": 17 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 19, - "column": 18 - }, - { - "type": "LESS_EQUAL", - "lexeme": "<=", - "line": 19, - "column": 19 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 19, - "column": 21 - }, - { - "type": "UNDERSCORE", - "lexeme": "_", - "line": 19, - "column": 22 - }, - { - "type": "RIGHT_PAREN", - "lexeme": ")", - "line": 19, - "column": 23 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 19, - "column": 24 - }, - { - "type": "PLUS", - "lexeme": "+", - "line": 19, - "column": 25 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 19, - "column": 26 - }, - { - "type": "LEFT_PAREN", - "lexeme": "(", - "line": 19, - "column": 27 - }, - { - "type": "UNDERSCORE", - "lexeme": "_", - "line": 19, - "column": 28 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 19, - "column": 29 - }, - { - "type": "LESS", - "lexeme": "<", - "line": 19, - "column": 30 - }, - { - "type": "WHITESPACE", - "lexeme": " ", - "line": 19, - "column": 31 - }, - { - "type": "NUMBER", - "lexeme": "150", - "line": 19, - "column": 32 - }, - { - "type": "RIGHT_PAREN", - "lexeme": ")", - "line": 19, - "column": 35 - }, - { - "type": "GREATER", - "lexeme": ">", - "line": 19, - "column": 36 - }, { "type": "NEWLINE", "lexeme": "\n", "line": 19, - "column": 37 + "column": 79 }, { - "type": "NEWLINE", - "lexeme": "\n", + "type": "WHITESPACE", + "lexeme": " ", "line": 20, "column": 1 }, { "type": "COMMENT", - "lexeme": "// Predefined custom constraints that can be referenced in other definitions", + "lexeme": "// i.e. you can subtract a GeoLocation from another GeoLocation, resulting", + "line": 20, + "column": 5 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 20, + "column": 79 + }, + { + "type": "WHITESPACE", + "lexeme": " ", "line": 21, "column": 1 }, + { + "type": "COMMENT", + "lexeme": "// in a Difference of GeoLocations", + "line": 21, + "column": 5 + }, { "type": "NEWLINE", "lexeme": "\n", "line": 21, - "column": 77 + "column": 39 }, { - "type": "CONSTRAINT", - "lexeme": "constraint", + "type": "WHITESPACE", + "lexeme": " ", "line": 22, "column": 1 }, + { + "type": "OP", + "lexeme": "op", + "line": 22, + "column": 5 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 22, + "column": 7 + }, + { + "type": "IDENTIFIER", + "lexeme": "__sub__", + "line": 22, + "column": 8 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 22, + "column": 15 + }, + { + "type": "IDENTIFIER", + "lexeme": "GeoLocation", + "line": 22, + "column": 16 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 22, + "column": 27 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 22, + "column": 28 + }, + { + "type": "ARROW", + "lexeme": "->", + "line": 22, + "column": 29 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 22, + "column": 31 + }, + { + "type": "IDENTIFIER", + "lexeme": "Difference", + "line": 22, + "column": 32 + }, + { + "type": "LEFT_BRACKET", + "lexeme": "[", + "line": 22, + "column": 42 + }, + { + "type": "IDENTIFIER", + "lexeme": "GeoLocation", + "line": 22, + "column": 43 + }, + { + "type": "RIGHT_BRACKET", + "lexeme": "]", + "line": 22, + "column": 54 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 22, + "column": 55 + }, + { + "type": "RIGHT_BRACE", + "lexeme": "}", + "line": 23, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 23, + "column": 2 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 24, + "column": 1 + }, + { + "type": "COMMENT", + "lexeme": "// For complex generics, you need to specify how the genericity the properties", + "line": 25, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 25, + "column": 79 + }, + { + "type": "COMMENT", + "lexeme": "// are handled", + "line": 26, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 26, + "column": 15 + }, + { + "type": "TYPE", + "lexeme": "type", + "line": 27, + "column": 1 + }, { "type": "WHITESPACE", "lexeme": " ", - "line": 22, - "column": 11 + "line": 27, + "column": 5 }, { "type": "IDENTIFIER", - "lexeme": "Positive", - "line": 22, - "column": 12 + "lexeme": "Difference", + "line": 27, + "column": 6 + }, + { + "type": "LEFT_BRACKET", + "lexeme": "[", + "line": 27, + "column": 16 + }, + { + "type": "IDENTIFIER", + "lexeme": "GeoLocation", + "line": 27, + "column": 17 + }, + { + "type": "RIGHT_BRACKET", + "lexeme": "]", + "line": 27, + "column": 28 }, { "type": "WHITESPACE", "lexeme": " ", - "line": 22, + "line": 27, + "column": 29 + }, + { + "type": "LEFT_BRACE", + "lexeme": "{", + "line": 27, + "column": 30 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 27, + "column": 31 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 28, + "column": 1 + }, + { + "type": "IDENTIFIER", + "lexeme": "lat", + "line": 28, + "column": 5 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 28, + "column": 8 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 28, + "column": 9 + }, + { + "type": "IDENTIFIER", + "lexeme": "Difference", + "line": 28, + "column": 10 + }, + { + "type": "LEFT_BRACKET", + "lexeme": "[", + "line": 28, "column": 20 }, { - "type": "EQUAL", - "lexeme": "=", - "line": 22, + "type": "IDENTIFIER", + "lexeme": "Latitude", + "line": 28, "column": 21 }, { - "type": "WHITESPACE", - "lexeme": " ", - "line": 22, - "column": 22 + "type": "RIGHT_BRACKET", + "lexeme": "]", + "line": 28, + "column": 29 }, { - "type": "UNDERSCORE", - "lexeme": "_", - "line": 22, - "column": 23 + "type": "NEWLINE", + "lexeme": "\n", + "line": 28, + "column": 30 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 29, + "column": 1 + }, + { + "type": "IDENTIFIER", + "lexeme": "lon", + "line": 29, + "column": 5 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 29, + "column": 8 }, { "type": "WHITESPACE", "lexeme": " ", - "line": 22, + "line": 29, + "column": 9 + }, + { + "type": "IDENTIFIER", + "lexeme": "Difference", + "line": 29, + "column": 10 + }, + { + "type": "LEFT_BRACKET", + "lexeme": "[", + "line": 29, + "column": 20 + }, + { + "type": "IDENTIFIER", + "lexeme": "Longitude", + "line": 29, + "column": 21 + }, + { + "type": "RIGHT_BRACKET", + "lexeme": "]", + "line": 29, + "column": 30 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 29, + "column": 31 + }, + { + "type": "RIGHT_BRACE", + "lexeme": "}", + "line": 30, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 30, + "column": 2 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 31, + "column": 1 + }, + { + "type": "COMMENT", + "lexeme": "// Simple operation defined on our custom types", + "line": 32, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 32, + "column": 48 + }, + { + "type": "EXTEND", + "lexeme": "extend", + "line": 33, + "column": 1 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 33, + "column": 7 + }, + { + "type": "IDENTIFIER", + "lexeme": "Latitude", + "line": 33, + "column": 8 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 33, + "column": 16 + }, + { + "type": "LEFT_BRACE", + "lexeme": "{", + "line": 33, + "column": 17 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 33, + "column": 18 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 34, + "column": 1 + }, + { + "type": "OP", + "lexeme": "op", + "line": 34, + "column": 5 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 34, + "column": 7 + }, + { + "type": "IDENTIFIER", + "lexeme": "__sub__", + "line": 34, + "column": 8 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 34, + "column": 15 + }, + { + "type": "IDENTIFIER", + "lexeme": "Latitude", + "line": 34, + "column": 16 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 34, "column": 24 }, { - "type": "GREATER_EQUAL", - "lexeme": ">=", - "line": 22, + "type": "WHITESPACE", + "lexeme": " ", + "line": 34, + "column": 25 + }, + { + "type": "ARROW", + "lexeme": "->", + "line": 34, + "column": 26 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 34, + "column": 28 + }, + { + "type": "IDENTIFIER", + "lexeme": "Difference", + "line": 34, + "column": 29 + }, + { + "type": "LEFT_BRACKET", + "lexeme": "[", + "line": 34, + "column": 39 + }, + { + "type": "IDENTIFIER", + "lexeme": "Latitude", + "line": 34, + "column": 40 + }, + { + "type": "RIGHT_BRACKET", + "lexeme": "]", + "line": 34, + "column": 48 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 34, + "column": 49 + }, + { + "type": "RIGHT_BRACE", + "lexeme": "}", + "line": 35, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 35, + "column": 2 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 36, + "column": 1 + }, + { + "type": "EXTEND", + "lexeme": "extend", + "line": 37, + "column": 1 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 37, + "column": 7 + }, + { + "type": "IDENTIFIER", + "lexeme": "Longitude", + "line": 37, + "column": 8 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 37, + "column": 17 + }, + { + "type": "LEFT_BRACE", + "lexeme": "{", + "line": 37, + "column": 18 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 37, + "column": 19 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 38, + "column": 1 + }, + { + "type": "OP", + "lexeme": "op", + "line": 38, + "column": 5 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 38, + "column": 7 + }, + { + "type": "IDENTIFIER", + "lexeme": "__sub__", + "line": 38, + "column": 8 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 38, + "column": 15 + }, + { + "type": "IDENTIFIER", + "lexeme": "Longitude", + "line": 38, + "column": 16 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 38, "column": 25 }, { "type": "WHITESPACE", "lexeme": " ", - "line": 22, + "line": 38, + "column": 26 + }, + { + "type": "ARROW", + "lexeme": "->", + "line": 38, "column": 27 }, { - "type": "NUMBER", - "lexeme": "0", - "line": 22, - "column": 28 + "type": "WHITESPACE", + "lexeme": " ", + "line": 38, + "column": 29 + }, + { + "type": "IDENTIFIER", + "lexeme": "Difference", + "line": 38, + "column": 30 + }, + { + "type": "LEFT_BRACKET", + "lexeme": "[", + "line": 38, + "column": 40 + }, + { + "type": "IDENTIFIER", + "lexeme": "Longitude", + "line": 38, + "column": 41 + }, + { + "type": "RIGHT_BRACKET", + "lexeme": "]", + "line": 38, + "column": 50 }, { "type": "NEWLINE", "lexeme": "\n", - "line": 22, - "column": 29 + "line": 38, + "column": 51 }, { - "type": "CONSTRAINT", - "lexeme": "constraint", - "line": 23, + "type": "RIGHT_BRACE", + "lexeme": "}", + "line": 39, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 39, + "column": 2 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 40, + "column": 1 + }, + { + "type": "COMMENT", + "lexeme": "// Predefined custom predicates that can be referenced in other definitions", + "line": 41, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 41, + "column": 76 + }, + { + "type": "PREDICATE", + "lexeme": "predicate", + "line": 42, "column": 1 }, { "type": "WHITESPACE", "lexeme": " ", - "line": 23, - "column": 11 + "line": 42, + "column": 10 }, { "type": "IDENTIFIER", - "lexeme": "StrictlyPositive", - "line": 23, - "column": 12 + "lexeme": "Positive", + "line": 42, + "column": 11 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 42, + "column": 19 + }, + { + "type": "IDENTIFIER", + "lexeme": "v", + "line": 42, + "column": 20 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 42, + "column": 21 }, { "type": "WHITESPACE", "lexeme": " ", - "line": 23, + "line": 42, + "column": 22 + }, + { + "type": "IDENTIFIER", + "lexeme": "float", + "line": 42, + "column": 23 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 42, "column": 28 }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 42, + "column": 29 + }, { "type": "EQUAL", "lexeme": "=", - "line": 23, + "line": 42, + "column": 30 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 42, + "column": 31 + }, + { + "type": "IDENTIFIER", + "lexeme": "v", + "line": 42, + "column": 32 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 42, + "column": 33 + }, + { + "type": "GREATER_EQUAL", + "lexeme": ">=", + "line": 42, + "column": 34 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 42, + "column": 36 + }, + { + "type": "NUMBER", + "lexeme": "0", + "line": 42, + "column": 37 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 42, + "column": 38 + }, + { + "type": "PREDICATE", + "lexeme": "predicate", + "line": 43, + "column": 1 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 43, + "column": 10 + }, + { + "type": "IDENTIFIER", + "lexeme": "StrictlyPositive", + "line": 43, + "column": 11 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 43, + "column": 27 + }, + { + "type": "IDENTIFIER", + "lexeme": "v", + "line": 43, + "column": 28 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 43, "column": 29 }, { "type": "WHITESPACE", "lexeme": " ", - "line": 23, + "line": 43, "column": 30 }, { - "type": "UNDERSCORE", - "lexeme": "_", - "line": 23, + "type": "IDENTIFIER", + "lexeme": "float", + "line": 43, "column": 31 }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 43, + "column": 36 + }, { "type": "WHITESPACE", "lexeme": " ", - "line": 23, - "column": 32 + "line": 43, + "column": 37 + }, + { + "type": "EQUAL", + "lexeme": "=", + "line": 43, + "column": 38 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 43, + "column": 39 + }, + { + "type": "IDENTIFIER", + "lexeme": "v", + "line": 43, + "column": 40 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 43, + "column": 41 }, { "type": "GREATER", "lexeme": ">", - "line": 23, - "column": 33 + "line": 43, + "column": 42 }, { "type": "WHITESPACE", "lexeme": " ", - "line": 23, - "column": 34 + "line": 43, + "column": 43 }, { "type": "NUMBER", "lexeme": "0", - "line": 23, + "line": 43, + "column": 44 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 43, + "column": 45 + }, + { + "type": "PREDICATE", + "lexeme": "predicate", + "line": 44, + "column": 1 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 44, + "column": 10 + }, + { + "type": "IDENTIFIER", + "lexeme": "Equatorial", + "line": 44, + "column": 11 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 44, + "column": 21 + }, + { + "type": "IDENTIFIER", + "lexeme": "loc", + "line": 44, + "column": 22 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 44, + "column": 25 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 44, + "column": 26 + }, + { + "type": "IDENTIFIER", + "lexeme": "GeoLocation", + "line": 44, + "column": 27 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 44, + "column": 38 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 44, + "column": 39 + }, + { + "type": "EQUAL", + "lexeme": "=", + "line": 44, + "column": 40 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 44, + "column": 41 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 44, + "column": 42 + }, + { + "type": "MINUS", + "lexeme": "-", + "line": 44, + "column": 43 + }, + { + "type": "NUMBER", + "lexeme": "10", + "line": 44, + "column": 44 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 44, + "column": 46 + }, + { + "type": "LESS_EQUAL", + "lexeme": "<=", + "line": 44, + "column": 47 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 44, + "column": 49 + }, + { + "type": "IDENTIFIER", + "lexeme": "loc", + "line": 44, + "column": 50 + }, + { + "type": "DOT", + "lexeme": ".", + "line": 44, + "column": 53 + }, + { + "type": "IDENTIFIER", + "lexeme": "lat", + "line": 44, + "column": 54 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 44, + "column": 57 + }, + { + "type": "LESS_EQUAL", + "lexeme": "<=", + "line": 44, + "column": 58 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 44, + "column": 60 + }, + { + "type": "NUMBER", + "lexeme": "10", + "line": 44, + "column": 61 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 44, + "column": 63 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 44, + "column": 64 + }, + { + "type": "PREDICATE", + "lexeme": "predicate", + "line": 45, + "column": 1 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 45, + "column": 10 + }, + { + "type": "IDENTIFIER", + "lexeme": "Arctic", + "line": 45, + "column": 11 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 45, + "column": 17 + }, + { + "type": "IDENTIFIER", + "lexeme": "loc", + "line": 45, + "column": 18 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 45, + "column": 21 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 45, + "column": 22 + }, + { + "type": "IDENTIFIER", + "lexeme": "GeoLocation", + "line": 45, + "column": 23 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 45, + "column": 34 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 45, + "column": 35 + }, + { + "type": "EQUAL", + "lexeme": "=", + "line": 45, + "column": 36 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 45, + "column": 37 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 45, + "column": 38 + }, + { + "type": "IDENTIFIER", + "lexeme": "loc", + "line": 45, + "column": 39 + }, + { + "type": "DOT", + "lexeme": ".", + "line": 45, + "column": 42 + }, + { + "type": "IDENTIFIER", + "lexeme": "lat", + "line": 45, + "column": 43 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 45, + "column": 46 + }, + { + "type": "GREATER_EQUAL", + "lexeme": ">=", + "line": 45, + "column": 47 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 45, + "column": 49 + }, + { + "type": "NUMBER", + "lexeme": "66", + "line": 45, + "column": 50 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 45, + "column": 52 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 45, + "column": 53 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 46, + "column": 1 + }, + { + "type": "TYPE", + "lexeme": "type", + "line": 47, + "column": 1 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 47, + "column": 5 + }, + { + "type": "IDENTIFIER", + "lexeme": "Person", + "line": 47, + "column": 6 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 47, + "column": 12 + }, + { + "type": "LEFT_BRACE", + "lexeme": "{", + "line": 47, + "column": 13 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 47, + "column": 14 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 48, + "column": 1 + }, + { + "type": "IDENTIFIER", + "lexeme": "name", + "line": 48, + "column": 5 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 48, + "column": 9 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 48, + "column": 10 + }, + { + "type": "IDENTIFIER", + "lexeme": "str", + "line": 48, + "column": 11 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 48, + "column": 14 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 49, + "column": 1 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 50, + "column": 1 + }, + { + "type": "COMMENT", + "lexeme": "// Property with an inline constraint", + "line": 50, + "column": 5 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 50, + "column": 42 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 51, + "column": 1 + }, + { + "type": "IDENTIFIER", + "lexeme": "age", + "line": 51, + "column": 5 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 51, + "column": 8 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 51, + "column": 9 + }, + { + "type": "IDENTIFIER", + "lexeme": "int", + "line": 51, + "column": 10 + }, + { + "type": "QMARK", + "lexeme": "?", + "line": 51, + "column": 13 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 51, + "column": 14 + }, + { + "type": "WHERE", + "lexeme": "where", + "line": 51, + "column": 15 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 51, + "column": 20 + }, + { + "type": "LEFT_PAREN", + "lexeme": "(", + "line": 51, + "column": 21 + }, + { + "type": "NUMBER", + "lexeme": "0", + "line": 51, + "column": 22 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 51, + "column": 23 + }, + { + "type": "LESS_EQUAL", + "lexeme": "<=", + "line": 51, + "column": 24 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 51, + "column": 26 + }, + { + "type": "UNDERSCORE", + "lexeme": "_", + "line": 51, + "column": 27 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 51, + "column": 28 + }, + { + "type": "LESS", + "lexeme": "<", + "line": 51, + "column": 29 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 51, + "column": 30 + }, + { + "type": "NUMBER", + "lexeme": "150", + "line": 51, + "column": 31 + }, + { + "type": "RIGHT_PAREN", + "lexeme": ")", + "line": 51, + "column": 34 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 51, "column": 35 }, { "type": "NEWLINE", "lexeme": "\n", - "line": 23, - "column": 36 + "line": 52, + "column": 1 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 53, + "column": 1 }, { "type": "COMMENT", - "lexeme": "//constraint Even = _ % 2 == 0", - "line": 24, + "lexeme": "// Property referencing a predicate", + "line": 53, + "column": 5 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 53, + "column": 40 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 54, "column": 1 }, + { + "type": "IDENTIFIER", + "lexeme": "height", + "line": 54, + "column": 5 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 54, + "column": 11 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 54, + "column": 12 + }, + { + "type": "IDENTIFIER", + "lexeme": "float", + "line": 54, + "column": 13 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 54, + "column": 18 + }, + { + "type": "WHERE", + "lexeme": "where", + "line": 54, + "column": 19 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 54, + "column": 24 + }, + { + "type": "IDENTIFIER", + "lexeme": "StrictlyPositive", + "line": 54, + "column": 25 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 54, + "column": 41 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 55, + "column": 1 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 56, + "column": 1 + }, + { + "type": "IDENTIFIER", + "lexeme": "home", + "line": 56, + "column": 5 + }, + { + "type": "COLON", + "lexeme": ":", + "line": 56, + "column": 9 + }, + { + "type": "WHITESPACE", + "lexeme": " ", + "line": 56, + "column": 10 + }, + { + "type": "IDENTIFIER", + "lexeme": "GeoLocation", + "line": 56, + "column": 11 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 56, + "column": 22 + }, + { + "type": "RIGHT_BRACE", + "lexeme": "}", + "line": 57, + "column": 1 + }, + { + "type": "NEWLINE", + "lexeme": "\n", + "line": 57, + "column": 2 + }, { "type": "EOF", "lexeme": "", - "line": 24, - "column": 31 + "line": 58, + "column": 1 } ], "stmts": [ { - "_type": "TypeStmt", + "_type": "SimpleTypeStmt", + "template": null, + "name": "Custom", + "base": { + "_type": "TypeExpr", + "name": "float", + "template": null, + "optional": false + }, + "constraint": null + }, + { + "_type": "SimpleTypeStmt", + "template": null, "name": "Latitude", - "bases": [ - { - "_type": "TypeExpr", - "name": "float", - "constraints": [] - } - ], - "body": null - }, - { - "_type": "TypeStmt", - "name": "Longitude", - "bases": [ - { - "_type": "TypeExpr", - "name": "float", - "constraints": [] - } - ], - "body": null - }, - { - "_type": "TypeStmt", - "name": "GeoLocation", - "bases": [ - { - "_type": "TypeExpr", - "name": "Latitude", - "constraints": [] - }, - { - "_type": "TypeExpr", - "name": "Longitude", - "constraints": [] - } - ], - "body": { - "_type": "TypeBodyExpr", - "properties": [ - { - "_type": "PropertyStmt", - "name": "lat", - "type": { - "_type": "TypeExpr", - "name": "Latitude", - "constraints": [] - } - }, - { - "_type": "PropertyStmt", - "name": "lon", - "type": { - "_type": "TypeExpr", - "name": "Longitude", - "constraints": [] - } - } - ] - } - }, - { - "_type": "TypeStmt", - "name": "LatitudeDiff", - "bases": [ - { - "_type": "TypeExpr", - "name": "float", - "constraints": [] - } - ], - "body": null - }, - { - "_type": "TypeStmt", - "name": "LongitudeDiff", - "bases": [ - { - "_type": "TypeExpr", - "name": "float", - "constraints": [] - } - ], - "body": null - }, - { - "_type": "OpStmt", - "left": { + "base": { "_type": "TypeExpr", - "name": "Latitude", - "constraints": [] + "name": "float", + "template": null, + "optional": false }, - "op": "-", - "right": { - "_type": "TypeExpr", - "name": "Latitude", - "constraints": [] - }, - "result": { - "_type": "TypeExpr", - "name": "LatitudeDiff", - "constraints": [] - } - }, - { - "_type": "OpStmt", - "left": { - "_type": "TypeExpr", - "name": "Longitude", - "constraints": [] - }, - "op": "-", - "right": { - "_type": "TypeExpr", - "name": "Longitude", - "constraints": [] - }, - "result": { - "_type": "TypeExpr", - "name": "LongitudeDiff", - "constraints": [] - } - }, - { - "_type": "TypeStmt", - "name": "Age", - "bases": [ - { - "_type": "TypeExpr", - "name": "int", - "constraints": [ - { - "_type": "ConstraintExpr", - "left": { - "_type": "LiteralExpr", - "value": 0.0 - }, - "op": "<=", + "constraint": { + "_type": "GroupingExpr", + "expr": { + "_type": "BinaryExpr", + "left": { + "_type": "BinaryExpr", + "left": { + "_type": "UnaryExpr", + "operator": "-", "right": { - "_type": "WildcardExpr" + "_type": "LiteralExpr", + "value": 90.0 } }, - { - "_type": "ConstraintExpr", - "left": { - "_type": "WildcardExpr" + "operator": "<=", + "right": { + "_type": "WildcardExpr" + } + }, + "operator": "<=", + "right": { + "_type": "LiteralExpr", + "value": 90.0 + } + } + } + }, + { + "_type": "SimpleTypeStmt", + "template": null, + "name": "Longitude", + "base": { + "_type": "TypeExpr", + "name": "float", + "template": null, + "optional": false + }, + "constraint": { + "_type": "GroupingExpr", + "expr": { + "_type": "BinaryExpr", + "left": { + "_type": "BinaryExpr", + "left": { + "_type": "UnaryExpr", + "operator": "-", + "right": { + "_type": "LiteralExpr", + "value": 180.0 + } + }, + "operator": "<=", + "right": { + "_type": "WildcardExpr" + } + }, + "operator": "<=", + "right": { + "_type": "LiteralExpr", + "value": 180.0 + } + } + } + }, + { + "_type": "SimpleTypeStmt", + "template": { + "_type": "TemplateExpr", + "type": { + "_type": "TypeExpr", + "name": "T", + "template": null, + "optional": false + } + }, + "name": "Difference", + "base": { + "_type": "TypeExpr", + "name": "T", + "template": null, + "optional": false + }, + "constraint": null + }, + { + "_type": "ComplexTypeStmt", + "name": "GeoLocation", + "template": null, + "properties": [ + { + "_type": "PropertyStmt", + "name": "lat", + "type": { + "_type": "TypeExpr", + "name": "Latitude", + "template": null, + "optional": false + }, + "constraint": null + }, + { + "_type": "PropertyStmt", + "name": "lon", + "type": { + "_type": "TypeExpr", + "name": "Longitude", + "template": null, + "optional": false + }, + "constraint": null + } + ] + }, + { + "_type": "ExtendStmt", + "type": { + "_type": "TypeExpr", + "name": "GeoLocation", + "template": null, + "optional": false + }, + "operations": [ + { + "_type": "OpStmt", + "name": "__sub__", + "operand": { + "_type": "TypeExpr", + "name": "GeoLocation", + "template": null, + "optional": false + }, + "result": { + "_type": "TypeExpr", + "name": "Difference", + "template": { + "_type": "TemplateExpr", + "type": { + "_type": "TypeExpr", + "name": "GeoLocation", + "template": null, + "optional": false + } + }, + "optional": false + } + } + ] + }, + { + "_type": "ComplexTypeStmt", + "name": "Difference", + "template": { + "_type": "TemplateExpr", + "type": { + "_type": "TypeExpr", + "name": "GeoLocation", + "template": null, + "optional": false + } + }, + "properties": [ + { + "_type": "PropertyStmt", + "name": "lat", + "type": { + "_type": "TypeExpr", + "name": "Difference", + "template": { + "_type": "TemplateExpr", + "type": { + "_type": "TypeExpr", + "name": "Latitude", + "template": null, + "optional": false + } + }, + "optional": false + }, + "constraint": null + }, + { + "_type": "PropertyStmt", + "name": "lon", + "type": { + "_type": "TypeExpr", + "name": "Difference", + "template": { + "_type": "TemplateExpr", + "type": { + "_type": "TypeExpr", + "name": "Longitude", + "template": null, + "optional": false + } + }, + "optional": false + }, + "constraint": null + } + ] + }, + { + "_type": "ExtendStmt", + "type": { + "_type": "TypeExpr", + "name": "Latitude", + "template": null, + "optional": false + }, + "operations": [ + { + "_type": "OpStmt", + "name": "__sub__", + "operand": { + "_type": "TypeExpr", + "name": "Latitude", + "template": null, + "optional": false + }, + "result": { + "_type": "TypeExpr", + "name": "Difference", + "template": { + "_type": "TemplateExpr", + "type": { + "_type": "TypeExpr", + "name": "Latitude", + "template": null, + "optional": false + } + }, + "optional": false + } + } + ] + }, + { + "_type": "ExtendStmt", + "type": { + "_type": "TypeExpr", + "name": "Longitude", + "template": null, + "optional": false + }, + "operations": [ + { + "_type": "OpStmt", + "name": "__sub__", + "operand": { + "_type": "TypeExpr", + "name": "Longitude", + "template": null, + "optional": false + }, + "result": { + "_type": "TypeExpr", + "name": "Difference", + "template": { + "_type": "TemplateExpr", + "type": { + "_type": "TypeExpr", + "name": "Longitude", + "template": null, + "optional": false + } + }, + "optional": false + } + } + ] + }, + { + "_type": "PredicateStmt", + "name": "Positive", + "subject": "v", + "type": { + "_type": "TypeExpr", + "name": "float", + "template": null, + "optional": false + }, + "condition": { + "_type": "BinaryExpr", + "left": { + "_type": "VariableExpr", + "name": "v" + }, + "operator": ">=", + "right": { + "_type": "LiteralExpr", + "value": 0.0 + } + } + }, + { + "_type": "PredicateStmt", + "name": "StrictlyPositive", + "subject": "v", + "type": { + "_type": "TypeExpr", + "name": "float", + "template": null, + "optional": false + }, + "condition": { + "_type": "BinaryExpr", + "left": { + "_type": "VariableExpr", + "name": "v" + }, + "operator": ">", + "right": { + "_type": "LiteralExpr", + "value": 0.0 + } + } + }, + { + "_type": "PredicateStmt", + "name": "Equatorial", + "subject": "loc", + "type": { + "_type": "TypeExpr", + "name": "GeoLocation", + "template": null, + "optional": false + }, + "condition": { + "_type": "GroupingExpr", + "expr": { + "_type": "BinaryExpr", + "left": { + "_type": "BinaryExpr", + "left": { + "_type": "UnaryExpr", + "operator": "-", + "right": { + "_type": "LiteralExpr", + "value": 10.0 + } + }, + "operator": "<=", + "right": { + "_type": "GetExpr", + "expr": { + "_type": "VariableExpr", + "name": "loc" }, - "op": "<", + "name": "lat" + } + }, + "operator": "<=", + "right": { + "_type": "LiteralExpr", + "value": 10.0 + } + } + } + }, + { + "_type": "PredicateStmt", + "name": "Arctic", + "subject": "loc", + "type": { + "_type": "TypeExpr", + "name": "GeoLocation", + "template": null, + "optional": false + }, + "condition": { + "_type": "GroupingExpr", + "expr": { + "_type": "BinaryExpr", + "left": { + "_type": "GetExpr", + "expr": { + "_type": "VariableExpr", + "name": "loc" + }, + "name": "lat" + }, + "operator": ">=", + "right": { + "_type": "LiteralExpr", + "value": 66.0 + } + } + } + }, + { + "_type": "ComplexTypeStmt", + "name": "Person", + "template": null, + "properties": [ + { + "_type": "PropertyStmt", + "name": "name", + "type": { + "_type": "TypeExpr", + "name": "str", + "template": null, + "optional": false + }, + "constraint": null + }, + { + "_type": "PropertyStmt", + "name": "age", + "type": { + "_type": "TypeExpr", + "name": "int", + "template": null, + "optional": true + }, + "constraint": { + "_type": "GroupingExpr", + "expr": { + "_type": "BinaryExpr", + "left": { + "_type": "BinaryExpr", + "left": { + "_type": "LiteralExpr", + "value": 0.0 + }, + "operator": "<=", + "right": { + "_type": "WildcardExpr" + } + }, + "operator": "<", "right": { "_type": "LiteralExpr", "value": 150.0 } } - ] - } - ], - "body": null - }, - { - "_type": "ConstraintStmt", - "name": "Positive", - "constraint": { - "_type": "ConstraintExpr", - "left": { - "_type": "WildcardExpr" + } }, - "op": ">=", - "right": { - "_type": "LiteralExpr", - "value": 0.0 - } - } - }, - { - "_type": "ConstraintStmt", - "name": "StrictlyPositive", - "constraint": { - "_type": "ConstraintExpr", - "left": { - "_type": "WildcardExpr" + { + "_type": "PropertyStmt", + "name": "height", + "type": { + "_type": "TypeExpr", + "name": "float", + "template": null, + "optional": false + }, + "constraint": { + "_type": "VariableExpr", + "name": "StrictlyPositive" + } }, - "op": ">", - "right": { - "_type": "LiteralExpr", - "value": 0.0 + { + "_type": "PropertyStmt", + "name": "home", + "type": { + "_type": "TypeExpr", + "name": "GeoLocation", + "template": null, + "optional": false + }, + "constraint": null } - } + ] } ], "errors": [] diff --git a/tests/lexer/test_annotation_lexer.py b/tests/lexer/test_annotation_lexer.py deleted file mode 100644 index 33a83a1..0000000 --- a/tests/lexer/test_annotation_lexer.py +++ /dev/null @@ -1,129 +0,0 @@ -from typing import Any - -import pytest - -from lexer.annotations import AnnotationLexer -from lexer.token import Token, TokenType - - -def scan(source: str) -> list[Token]: - return AnnotationLexer(source).process() - - -def assert_n_tokens(tokens: list[Token], n: int): - assert len(tokens) == n + 1 - assert tokens[-1].type == TokenType.EOF - - -@pytest.mark.parametrize( - "src,expected", - [ - ("(", TokenType.LEFT_PAREN), - (")", TokenType.RIGHT_PAREN), - ("[", TokenType.LEFT_BRACKET), - ("]", TokenType.RIGHT_BRACKET), - (":", TokenType.COLON), - (",", TokenType.COMMA), - ("_", TokenType.UNDERSCORE), - ], -) -def test_punctuation(src: str, expected: TokenType): - tokens: list[Token] = scan(src) - assert_n_tokens(tokens, 1) - assert tokens[0].type == expected - - -@pytest.mark.parametrize( - "src,expected", - [ - ("+", TokenType.PLUS), - (">", TokenType.GREATER), - (">=", TokenType.GREATER_EQUAL), - ("<", TokenType.LESS), - ("<=", TokenType.LESS_EQUAL), - ("=", TokenType.EQUAL), - ("==", TokenType.EQUAL_EQUAL), - ("!=", TokenType.BANG_EQUAL), - ], -) -def test_operators(src: str, expected: TokenType): - tokens: list[Token] = scan(src) - assert_n_tokens(tokens, 1) - assert tokens[0].type == expected - - -@pytest.mark.parametrize( - "src,expected", - [ - ("a", TokenType.IDENTIFIER), - ("foo", TokenType.IDENTIFIER), - ("foo1", TokenType.IDENTIFIER), - ("foo_", TokenType.IDENTIFIER), - ("foo_bar1_baz2", TokenType.IDENTIFIER), - ("FOO_BAR1_BAZ2", TokenType.IDENTIFIER), - ("True", TokenType.TRUE), - ("False", TokenType.FALSE), - ("None", TokenType.NONE), - ], -) -def test_identifiers_keywords(src: str, expected: TokenType): - tokens: list[Token] = scan(src) - assert_n_tokens(tokens, 1) - assert tokens[0].type == expected - - -@pytest.mark.parametrize( - "src,expected", - [ - ("#", TokenType.COMMENT), - ("# This is a comment", TokenType.COMMENT), - (" ", TokenType.WHITESPACE), - ("\t", TokenType.WHITESPACE), - ("\r", TokenType.WHITESPACE), - (" \t \t", TokenType.WHITESPACE), - ("\n", TokenType.NEWLINE), - ], -) -def test_misc(src: str, expected: TokenType): - tokens: list[Token] = scan(src) - assert_n_tokens(tokens, 1) - assert tokens[0].type == expected - - -@pytest.mark.parametrize( - "src,expected_type,expected_value", - [ - ("0", TokenType.NUMBER, 0), - ("0.0", TokenType.NUMBER, 0), - ("1234.56", TokenType.NUMBER, 1234.56), - ], -) -def test_literals(src: str, expected_type: TokenType, expected_value: Any): - tokens: list[Token] = scan(src) - assert_n_tokens(tokens, 1) - assert tokens[0].type == expected_type - assert tokens[0].value == expected_value - - -def test_single_bang_error(): - with pytest.raises(SyntaxError): - scan("!") - - -@pytest.mark.parametrize( - "src", - [ - "-", - "*", - "/", - "{", - "}", - "@", - '"', - "'", - ".", - ], -) -def test_unexpected_character(src: str): - with pytest.raises(SyntaxError): - scan(src) diff --git a/tests/lexer/test_midas_lexer.py b/tests/lexer/test_midas_lexer.py deleted file mode 100644 index 9cffff2..0000000 --- a/tests/lexer/test_midas_lexer.py +++ /dev/null @@ -1,129 +0,0 @@ -from typing import Any - -import pytest - -from lexer.midas import MidasLexer -from lexer.token import Token, TokenType - - -def scan(source: str) -> list[Token]: - return MidasLexer(source).process() - - -def assert_n_tokens(tokens: list[Token], n: int): - assert len(tokens) == n + 1 - assert tokens[-1].type == TokenType.EOF - - -@pytest.mark.parametrize( - "src,expected", - [ - ("(", TokenType.LEFT_PAREN), - (")", TokenType.RIGHT_PAREN), - ("[", TokenType.LEFT_BRACKET), - ("]", TokenType.RIGHT_BRACKET), - ("{", TokenType.LEFT_BRACE), - ("}", TokenType.RIGHT_BRACE), - (":", TokenType.COLON), - (",", TokenType.COMMA), - ("_", TokenType.UNDERSCORE), - ], -) -def test_punctuation(src: str, expected: TokenType): - tokens: list[Token] = scan(src) - assert_n_tokens(tokens, 1) - assert tokens[0].type == expected - - -@pytest.mark.parametrize( - "src,expected", - [ - ("+", TokenType.PLUS), - ("-", TokenType.MINUS), - ("*", TokenType.STAR), - ("/", TokenType.SLASH), - (">", TokenType.GREATER), - (">=", TokenType.GREATER_EQUAL), - ("<", TokenType.LESS), - ("<=", TokenType.LESS_EQUAL), - ("=", TokenType.EQUAL), - ("==", TokenType.EQUAL_EQUAL), - ("!=", TokenType.BANG_EQUAL), - ], -) -def test_operators(src: str, expected: TokenType): - tokens: list[Token] = scan(src) - assert_n_tokens(tokens, 1) - assert tokens[0].type == expected - - -@pytest.mark.parametrize( - "src,expected", - [ - ("a", TokenType.IDENTIFIER), - ("foo", TokenType.IDENTIFIER), - ("foo1", TokenType.IDENTIFIER), - ("foo_", TokenType.IDENTIFIER), - ("foo_bar1_baz2", TokenType.IDENTIFIER), - ("FOO_BAR1_BAZ2", TokenType.IDENTIFIER), - ("true", TokenType.TRUE), - ("false", TokenType.FALSE), - ("none", TokenType.NONE), - ], -) -def test_identifiers_keywords(src: str, expected: TokenType): - tokens: list[Token] = scan(src) - assert_n_tokens(tokens, 1) - assert tokens[0].type == expected - - -@pytest.mark.parametrize( - "src,expected", - [ - ("// This is a comment", TokenType.COMMENT), - ("/* This is a comment */", TokenType.COMMENT), - (" ", TokenType.WHITESPACE), - ("\t", TokenType.WHITESPACE), - ("\r", TokenType.WHITESPACE), - (" \t \t", TokenType.WHITESPACE), - ("\n", TokenType.NEWLINE), - ], -) -def test_misc(src: str, expected: TokenType): - tokens: list[Token] = scan(src) - assert_n_tokens(tokens, 1) - assert tokens[0].type == expected - - -@pytest.mark.parametrize( - "src,expected_type,expected_value", - [ - ("0", TokenType.NUMBER, 0), - ("0.0", TokenType.NUMBER, 0), - ("1234.56", TokenType.NUMBER, 1234.56), - ], -) -def test_literals(src: str, expected_type: TokenType, expected_value: Any): - tokens: list[Token] = scan(src) - assert_n_tokens(tokens, 1) - assert tokens[0].type == expected_type - assert tokens[0].value == expected_value - - -def test_single_bang_error(): - with pytest.raises(SyntaxError): - scan("!") - - -@pytest.mark.parametrize( - "src", - [ - "@", - '"', - "'", - ".", - ], -) -def test_unexpected_character(src: str): - with pytest.raises(SyntaxError): - scan(src) diff --git a/tests/parser/test_annotation_parser.py b/tests/parser/test_annotation_parser.py deleted file mode 100644 index 9c034dd..0000000 --- a/tests/parser/test_annotation_parser.py +++ /dev/null @@ -1,130 +0,0 @@ -from typing import Optional - -import pytest - -from core.ast.annotations import ( - AnnotationStmt, - ConstraintExpr, - Expr, - LiteralExpr, - SchemaElementExpr, - SchemaExpr, - Stmt, - TypeExpr, - WildcardExpr, -) -from lexer.annotations import AnnotationLexer -from lexer.position import Position -from lexer.token import Token -from parser.annotations import AnnotationParser - - -class AstSerializer(Stmt.Visitor[str], Expr.Visitor[str]): - def serialize(self, stmt: Stmt): - return stmt.accept(self) - - def visit_annotation_stmt(self, stmt: AnnotationStmt) -> str: - schema: str = "" - if stmt.schema is not None: - schema = " " + stmt.schema.accept(self) - return f"(annotation {stmt.name.lexeme}{schema})" - - def visit_schema_expr(self, expr: SchemaExpr) -> str: - elements: list[str] = [elmt.accept(self) for elmt in expr.elements] - return f"(schema {' '.join(elements)})" - - def visit_schema_element_expr(self, expr: SchemaElementExpr) -> str: - name: str = expr.name.lexeme if expr.name is not None else "_" - type: str = expr.type.accept(self) if expr.type is not None else "_" - return f"({name} {type})" - - def visit_type_expr(self, expr: TypeExpr) -> str: - res: str = f"({expr.name.lexeme}" - for constraint in expr.constraints: - res += " " + constraint.accept(self) - res += ")" - return res - - def visit_constraint_expr(self, expr: ConstraintExpr) -> str: - return f"(constraint {expr.left.accept(self)} {expr.op.lexeme} {expr.right.accept(self)})" - - def visit_wildcard_expr(self, expr: WildcardExpr) -> str: - return "(_)" - - def visit_literal_expr(self, expr: LiteralExpr) -> str: - return f"({expr.value})" - - -def parse(source: str) -> Optional[Stmt]: - tokens: list[Token] = AnnotationLexer(source).process() - return AnnotationParser(tokens).parse() - - -def must_parse(source: str) -> Stmt: - stmt: Optional[Stmt] = parse(source) - assert stmt is not None - return stmt - - -def ast_str(source: str) -> str: - stmt: Stmt = must_parse(source) - return AstSerializer().serialize(stmt) - - -@pytest.mark.parametrize( - "src,expected", - [ - ("Type", "(annotation Type)"), - ("Type[]", "(annotation Type (schema ))"), - ( - """ - Frame[ - verified: bool, - birth_year: int, - height: float + ( _ > 0 ) + ( _ < 250 ), - name: str, - date: datetime, - float, # unnamed - unknown: _, # untyped - _ # unnamed and untyped - ] - """, - "(annotation Frame (schema (verified (bool)) (birth_year (int)) (height (float (constraint (_) > (0.0)) (constraint (_) < (250.0)))) (name (str)) (date (datetime)) (_ (float)) (unknown _) (_ _)))", - ), - ], -) -def test_expressions(src: str, expected: str): - assert ast_str(src) == expected - - -@pytest.mark.parametrize( - "src,pos,should_fail", - [ - ("", (1, 1), True), - ("42", (1, 1), True), - ("True", (1, 1), True), - ("Type[", (1, 6), True), - ("Type[] Type2", (1, 8), False), - ("Type[bool:]", (1, 11), True), - ("Type[3]", (1, 6), True), - ("Type[bool float]", (1, 11), True), - ("Type[bool (_ < 2)]", (1, 11), True), - ("Type[bool + _ < 2)]", (1, 13), True), - ("Type[bool + (_ < 2]", (1, 19), True), - ("Type[bool + (< 2)]", (1, 14), True), - ("Type[bool + (_ + 2)]", (1, 16), True), - ("Type[bool + (Foo + Bar)]", (1, 14), True), - # ("Type[bool,]", (1, 11), True), # trailing comma is accepted, TODO: update parser or EBNF - ("Type[bool, Type[]]", (1, 16), True), - ("Type[foo: 3]", (1, 11), True), - ], -) -def test_parsing_error(src: str, pos: tuple[int, int], should_fail: bool): - tokens: list[Token] = AnnotationLexer(src).process() - parser: AnnotationParser = AnnotationParser(tokens) - stmt: Optional[Stmt] = parser.parse() - if should_fail: - assert stmt is None - assert len(parser.errors) != 0 - error_pos: Position = parser.errors[0].token.position - assert (error_pos.line, error_pos.column) == pos diff --git a/tests/parser/test_midas_parser.py b/tests/parser/test_midas_parser.py deleted file mode 100644 index 28a6aa7..0000000 --- a/tests/parser/test_midas_parser.py +++ /dev/null @@ -1,202 +0,0 @@ -import textwrap - -import pytest - -from core.ast.midas import ( - ConstraintExpr, - ConstraintStmt, - Expr, - LiteralExpr, - OpStmt, - PropertyStmt, - Stmt, - TypeBodyExpr, - TypeExpr, - TypeStmt, - WildcardExpr, -) -from lexer.midas import MidasLexer -from lexer.position import Position -from lexer.token import Token -from parser.midas import MidasParser - - -class AstSerializer(Stmt.Visitor[str], Expr.Visitor[str]): - def serialize(self, stmt: Stmt): - return stmt.accept(self) - - def visit_type_stmt(self, stmt: TypeStmt) -> str: - res: str = f"(type_def {stmt.name.lexeme}" - for base in stmt.bases: - res += " " + base.accept(self) - if stmt.body is not None: - res += " " + stmt.body.accept(self) - res += ")" - return res - - def visit_type_expr(self, expr: TypeExpr) -> str: - res: str = f"({expr.name.lexeme}" - for constraint in expr.constraints: - res += " " + constraint.accept(self) - res += ")" - return res - - def visit_constraint_expr(self, expr: ConstraintExpr) -> str: - return f"(constraint {expr.left.accept(self)} {expr.op.lexeme} {expr.right.accept(self)})" - - def visit_wildcard_expr(self, expr: WildcardExpr) -> str: - return "(_)" - - def visit_literal_expr(self, expr: LiteralExpr) -> str: - return f"({expr.value})" - - def visit_type_body_expr(self, expr: TypeBodyExpr) -> str: - res: str = "(body" - for prop in expr.properties: - res += " " + prop.accept(self) - res += ")" - return res - - def visit_property_stmt(self, stmt: PropertyStmt) -> str: - return f"(property {stmt.name.lexeme} {stmt.type.accept(self)})" - - def visit_op_stmt(self, stmt: OpStmt) -> str: - left: str = stmt.left.accept(self) - right: str = stmt.right.accept(self) - result: str = stmt.result.accept(self) - return f"(op_def {left} {stmt.op.lexeme} {right} {result})" - - def visit_constraint_stmt(self, stmt: ConstraintStmt) -> str: - return f"(constraint_def {stmt.name.lexeme} {stmt.constraint.accept(self)})" - - -def parse(source: str) -> list[Stmt]: - tokens: list[Token] = MidasLexer(source).process() - return MidasParser(tokens).parse() - - -def ast_str(source: str) -> list[str]: - stmts: list[Stmt] = parse(source) - return [AstSerializer().serialize(stmt) for stmt in stmts] - - -@pytest.mark.parametrize( - "src,expected", - [ - ("type Foo<>", "(type_def Foo)"), - ("type Foo", "(type_def Foo (Bar))"), - ("type Foo", "(type_def Foo (Bar) (Baz))"), - ( - "type Foo", - "(type_def Foo (Bar (constraint (_) < (2.0))) (Baz))", - ), - ( - """ - type Foo<> { - foo: Bar - } - """, - "(type_def Foo (body (property foo (Bar))))", - ), - ( - """ - type Foo<> { - foo: Bar + (_ != none) - foo2: Bar2 + (0 <= _) + (_ <= 100) - } - """, - "(type_def Foo (body (property foo (Bar (constraint (_) != (None)))) (property foo2 (Bar2 (constraint (0.0) <= (_)) (constraint (_) <= (100.0))))))", - ), - ("op + = ", "(op_def (A) + (B) (C))"), - ( - "op + = ", - "(op_def (A (constraint (_) < (100.0))) + (B (constraint (_) < (100.0))) (C (constraint (_) < (200.0))))", - ), - ( - "constraint Positive = _ >= 0", - "(constraint_def Positive (constraint (_) >= (0.0)))", - ), - ], -) -def test_expressions(src: str, expected: str | list[str]): - if isinstance(expected, str): - expected = [expected] - assert ast_str(src) == expected - - -@pytest.mark.parametrize( - "src,pos", - [ - ### - # Misc - ### - ("42", (1, 1)), - ("true", (1, 1)), - ("foo", (1, 1)), - ### - # Type statements - ### - ("type", (1, 5)), - ("type true", (1, 6)), - ("type Foo", (1, 9)), - ("type Foo<1>", (1, 10)), - # ("type Foo", (1, 16)), # trailing comma is accepted, TODO: update parser or EBNF - ("type Foo", (1, 17)), - ("type Foo { 3 }", (1, 19)), - ( - """ - type Foo { - foo - } - """, - (4, 1), - ), - ( - """ - type Foo { - foo: 3 - } - """, - (3, 10), - ), - ### - # Operation statements - ### - ("op", (1, 3)), - ("op float", (1, 4)), - ("op <", (1, 5)), - ("op ", (1, 11)), - ("op +", (1, 13)), - ("op + float", (1, 14)), - ("op + <", (1, 15)), - ("op + + ", (1, 21)), - ("op + =", (1, 23)), - ("op + = float", (1, 24)), - ("op + = <", (1, 25)), - ("op + = + = ", (1, 13)), - ("op + = ", (1, 23)), - ("op + = ", (1, 33)), - ### - # Constraint statements - ### - ("constraint", (1, 11)), - ("constraint 3", (1, 12)), - ("constraint Foo", (1, 15)), - ("constraint Foo =", (1, 17)), - ("constraint Foo = 3", (1, 19)), - ("constraint Foo = 3 <", (1, 21)), - ], -) -def test_parsing_error(src: str, pos: tuple[int, int]): - src = textwrap.dedent(src) - tokens: list[Token] = MidasLexer(src).process() - parser: MidasParser = MidasParser(tokens) - stmt: list[Stmt] = parser.parse() - assert len(stmt) == 0 - assert len(parser.errors) != 0 - error_pos: Position = parser.errors[0].token.position - assert (error_pos.line, error_pos.column) == pos