From 6e0190a3780cfe5f8c2892e6c0252c8d75ccd2fe Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 11:05:47 +0200 Subject: [PATCH 01/27] refactor: move source files in subdirectory --- gen/gen.py | 11 +++++++---- {core => midas}/ast/json_serializer.py | 2 +- {core => midas}/ast/midas.py | 2 +- {core => midas}/ast/printer.py | 4 ++-- {lexer => midas/lexer}/__init__.py | 0 {lexer => midas/lexer}/base.py | 4 ++-- {lexer => midas/lexer}/keyword.py | 2 +- {lexer => midas/lexer}/midas.py | 6 +++--- {lexer => midas/lexer}/position.py | 1 + {lexer => midas/lexer}/token.py | 2 +- {parser => midas/parser}/base.py | 4 ++-- {parser => midas/parser}/errors.py | 0 {parser => midas/parser}/midas.py | 8 ++++---- test.py | 8 ++++---- tester.py | 12 ++++++------ 15 files changed, 35 insertions(+), 31 deletions(-) rename {core => midas}/ast/json_serializer.py (99%) rename {core => midas}/ast/midas.py (99%) rename {core => midas}/ast/printer.py (99%) rename {lexer => midas/lexer}/__init__.py (100%) rename {lexer => midas/lexer}/base.py (98%) rename {lexer => midas/lexer}/keyword.py (87%) rename {lexer => midas/lexer}/midas.py (97%) rename {lexer => midas/lexer}/position.py (99%) rename {lexer => midas/lexer}/token.py (96%) rename {parser => midas/parser}/base.py (98%) rename {parser => midas/parser}/errors.py (100%) rename {parser => midas/parser}/midas.py (98%) diff --git a/gen/gen.py b/gen/gen.py index 47cb827..106dcc7 100644 --- a/gen/gen.py +++ b/gen/gen.py @@ -66,17 +66,18 @@ class {cls}({base}): return visitor.visit_{func_name}(self) """ + def snake_case(text: str) -> str: return re.sub(r"[A-Z]", lambda c: "_" + c.group().lower(), text).lower().strip("_") + def make_visitor_method(cls: str, param: str): method: str = VISITOR_METHOD_TEMPLATE.format( - func_name=snake_case(cls), - param=param, - cls=cls + func_name=snake_case(cls), param=param, cls=cls ) return method.strip("\n") + def make_class(name: str, cls: str, base: str): body: str = cls.split("\n", 1)[1] func_name: str = snake_case(name) @@ -88,6 +89,7 @@ def make_class(name: str, cls: str, base: str): ) return cls_def.strip("\n") + def generate(src: str): classes: list[str] = src.split("\n\n") stmt_visitor_methods: list[str] = [] @@ -114,10 +116,11 @@ def generate(src: str): expressions="\n\n\n".join(expressions), ) + def main(): root: Path = Path(__file__).parent.parent in_path: Path = root / "gen" / "ast.py" - out_path: Path = root / "core" / "ast" / "midas.py" + out_path: Path = root / "midas" / "ast" / "midas.py" src: str = in_path.read_text() generated: str = generate(src) diff --git a/core/ast/json_serializer.py b/midas/ast/json_serializer.py similarity index 99% rename from core/ast/json_serializer.py rename to midas/ast/json_serializer.py index 0064726..d602117 100644 --- a/core/ast/json_serializer.py +++ b/midas/ast/json_serializer.py @@ -1,6 +1,6 @@ from typing import Optional, Sequence -from core.ast.midas import ( +from midas.ast.midas import ( BinaryExpr, ComplexTypeStmt, Expr, diff --git a/core/ast/midas.py b/midas/ast/midas.py similarity index 99% rename from core/ast/midas.py rename to midas/ast/midas.py index f4280fb..28a7819 100644 --- a/core/ast/midas.py +++ b/midas/ast/midas.py @@ -9,7 +9,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Any, Generic, Optional, TypeVar -from lexer.token import Token +from midas.lexer.token import Token T = TypeVar("T") diff --git a/core/ast/printer.py b/midas/ast/printer.py similarity index 99% rename from core/ast/printer.py rename to midas/ast/printer.py index 61fede8..474f0ec 100644 --- a/core/ast/printer.py +++ b/midas/ast/printer.py @@ -5,7 +5,7 @@ from contextlib import contextmanager from enum import Enum, auto from typing import Generator, Generic, Optional, Protocol, TypeVar -import core.ast.midas as m +import midas.ast.midas as m class _Level(Enum): @@ -84,7 +84,7 @@ class AstPrinter(Generic[T]): class MidasAstPrinter(AstPrinter, m.Expr.Visitor[None], m.Stmt.Visitor[None]): - #Statements + # Statements def visit_simple_type_stmt(self, stmt: m.SimpleTypeStmt): self._write_line("SimpleTypeStmt") diff --git a/lexer/__init__.py b/midas/lexer/__init__.py similarity index 100% rename from lexer/__init__.py rename to midas/lexer/__init__.py diff --git a/lexer/base.py b/midas/lexer/base.py similarity index 98% rename from lexer/base.py rename to midas/lexer/base.py index f6f357d..c4f4d82 100644 --- a/lexer/base.py +++ b/midas/lexer/base.py @@ -1,8 +1,8 @@ from abc import ABC, abstractmethod from typing import Any, Callable, Optional -from lexer.position import Position -from lexer.token import Token, TokenType +from midas.lexer.position import Position +from midas.lexer.token import Token, TokenType class MidasSyntaxError(Exception): diff --git a/lexer/keyword.py b/midas/lexer/keyword.py similarity index 87% rename from lexer/keyword.py rename to midas/lexer/keyword.py index e5c4b64..878f8cd 100644 --- a/lexer/keyword.py +++ b/midas/lexer/keyword.py @@ -1,4 +1,4 @@ -from lexer.token import TokenType +from midas.lexer.token import TokenType KEYWORDS: dict[str, TokenType] = { "type": TokenType.TYPE, diff --git a/lexer/midas.py b/midas/lexer/midas.py similarity index 97% rename from lexer/midas.py rename to midas/lexer/midas.py index 054f91d..fe521ce 100644 --- a/lexer/midas.py +++ b/midas/lexer/midas.py @@ -1,6 +1,6 @@ -from lexer.base import Lexer -from lexer.keyword import KEYWORDS -from lexer.token import TokenType +from midas.lexer.base import Lexer +from midas.lexer.keyword import KEYWORDS +from midas.lexer.token import TokenType class MidasLexer(Lexer): diff --git a/lexer/position.py b/midas/lexer/position.py similarity index 99% rename from lexer/position.py rename to midas/lexer/position.py index 306e24d..8ff0972 100644 --- a/lexer/position.py +++ b/midas/lexer/position.py @@ -5,6 +5,7 @@ from typing import Optional @dataclass(frozen=True) class Position: """A simple structure to store the position of a token""" + file: Optional[str] line: int column: int diff --git a/lexer/token.py b/midas/lexer/token.py similarity index 96% rename from lexer/token.py rename to midas/lexer/token.py index 1097493..76a0fb1 100644 --- a/lexer/token.py +++ b/midas/lexer/token.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from enum import Enum, auto from typing import Any -from lexer.position import Position +from midas.lexer.position import Position class TokenType(Enum): diff --git a/parser/base.py b/midas/parser/base.py similarity index 98% rename from parser/base.py rename to midas/parser/base.py index 74962db..255cd26 100644 --- a/parser/base.py +++ b/midas/parser/base.py @@ -2,8 +2,8 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Generic, TypeVar -from lexer.token import Token, TokenType -from parser.errors import ParsingError +from midas.lexer.token import Token, TokenType +from midas.parser.errors import ParsingError @dataclass(frozen=True) diff --git a/parser/errors.py b/midas/parser/errors.py similarity index 100% rename from parser/errors.py rename to midas/parser/errors.py diff --git a/parser/midas.py b/midas/parser/midas.py similarity index 98% rename from parser/midas.py rename to midas/parser/midas.py index 65e2786..a919994 100644 --- a/parser/midas.py +++ b/midas/parser/midas.py @@ -1,6 +1,6 @@ from typing import Optional -from core.ast.midas import ( +from midas.ast.midas import ( BinaryExpr, ComplexTypeStmt, Expr, @@ -21,9 +21,9 @@ from core.ast.midas import ( VariableExpr, WildcardExpr, ) -from lexer.token import Token, TokenType -from parser.base import Parser -from parser.errors import ParsingError +from midas.lexer.token import Token, TokenType +from midas.parser.base import Parser +from midas.parser.errors import ParsingError class MidasParser(Parser): diff --git a/test.py b/test.py index 048329a..522bbac 100644 --- a/test.py +++ b/test.py @@ -1,10 +1,10 @@ import json from pathlib import Path -from core.ast.printer import MidasAstPrinter -from lexer.midas import MidasLexer -from lexer.token import Token -from parser.midas import MidasParser +from midas.ast.printer import MidasAstPrinter +from midas.lexer.midas import MidasLexer +from midas.lexer.token import Token +from midas.parser.midas import MidasParser def test_midas(): diff --git a/tester.py b/tester.py index 597ddee..3238a67 100644 --- a/tester.py +++ b/tester.py @@ -8,12 +8,12 @@ from dataclasses import asdict, dataclass, field from pathlib import Path from typing import Iterator, Optional -from core.ast.json_serializer import AstJsonSerializer -from core.ast.midas import Stmt -from lexer.base import MidasSyntaxError -from lexer.midas import MidasLexer -from lexer.token import Token -from parser.midas import MidasParser +from midas.ast.json_serializer import AstJsonSerializer +from midas.ast.midas import Stmt +from midas.lexer.base import MidasSyntaxError +from midas.lexer.midas import MidasLexer +from midas.lexer.token import Token +from midas.parser.midas import MidasParser DEFAULT_BASE_DIR: Path = Path() / "tests" From 8a9bb6ef4e9509eb6d1ffcd704e8f359a47cface Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 11:09:24 +0200 Subject: [PATCH 02/27] feat: add pyproject.toml --- .gitignore | 4 +++- midas/__init__.py | 0 pyproject.toml | 10 ++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 midas/__init__.py create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore index f63541d..b540ed8 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ __pycache__ .env venv .venv -*.pyc \ No newline at end of file +*.pyc +uv.lock +.python-version \ No newline at end of file diff --git a/midas/__init__.py b/midas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e779591 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "midas" +version = "0.1.0" +description = "A static-first type checking framework for Python data-frames" +readme = "README.md" +requires-python = ">=3.11" + +[build-system] +requires = ['hatchling'] +build-backend = 'hatchling.build' From eb79cf6dc349ecef815aa802d89f6965058c8895 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 11:09:54 +0200 Subject: [PATCH 03/27] feat(cli): add basic CLI entrypoint --- midas/cli/__init__.py | 0 midas/cli/main.py | 6 ++++++ pyproject.toml | 4 ++++ 3 files changed, 10 insertions(+) create mode 100644 midas/cli/__init__.py create mode 100644 midas/cli/main.py diff --git a/midas/cli/__init__.py b/midas/cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/midas/cli/main.py b/midas/cli/main.py new file mode 100644 index 0000000..0c33a3c --- /dev/null +++ b/midas/cli/main.py @@ -0,0 +1,6 @@ +import click + + +@click.command() +def midas(): + click.echo("Welcome to Midas!") diff --git a/pyproject.toml b/pyproject.toml index e779591..73cac0b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,6 +4,10 @@ version = "0.1.0" description = "A static-first type checking framework for Python data-frames" readme = "README.md" requires-python = ">=3.11" +dependencies = ["click>=8.4.1"] + +[project.scripts] +midas = "midas.cli.main:midas" [build-system] requires = ['hatchling'] From 5a112332f2959a96973559a2bdcc3646ea3b8155 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 11:15:46 +0200 Subject: [PATCH 04/27] chore: complete pyproject.toml --- pyproject.toml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 73cac0b..69a9f7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,16 @@ version = "0.1.0" description = "A static-first type checking framework for Python data-frames" readme = "README.md" requires-python = ">=3.11" +authors = [ + { name = "Louis Heredero", email = "louis.heredero@students.hevs.ch" }, +] +classifiers = ["Programming Language :: Python :: 3"] dependencies = ["click>=8.4.1"] +[project.urls] +Homepage = "https://git.kbk28.ch/HEL/midas" +Repository = "https://git.kbk28.ch/HEL/midas" + [project.scripts] midas = "midas.cli.main:midas" From e2f3cabe151dfa99a84cf2e6ad53118da4b6d5f3 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 14:06:28 +0200 Subject: [PATCH 05/27] feat(cli): add compile command to read python AST --- midas/cli/main.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/midas/cli/main.py b/midas/cli/main.py index 0c33a3c..278abd9 100644 --- a/midas/cli/main.py +++ b/midas/cli/main.py @@ -1,6 +1,33 @@ +import ast +from typing import Optional, TextIO + import click -@click.command() +@click.group() def midas(): click.echo("Welcome to Midas!") + + +@midas.command() +@click.argument("file", type=click.File("r")) +def compile(file: TextIO): + raise NotImplementedError + + +@midas.group() +def utils(): + pass + + +@utils.command() +@click.option("-o", "--output", type=click.File("w")) +@click.argument("file", type=click.File("r")) +def dump_ast(output: Optional[TextIO], file: TextIO): + source: str = file.read() + tree: ast.Module = ast.parse(source, filename=file.name) + dump: str = ast.dump(tree, indent=4) + if output is None: + click.echo(dump) + else: + output.write(dump) From 7ce2840f03a2912a77b0821ed3661e9999a6f4ba Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 17:34:04 +0200 Subject: [PATCH 06/27] feat(parser): add AST nodes for python --- midas/ast/python.py | 51 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 midas/ast/python.py diff --git a/midas/ast/python.py b/midas/ast/python.py new file mode 100644 index 0000000..63307c4 --- /dev/null +++ b/midas/ast/python.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +import ast +from dataclasses import dataclass +from typing import Generic, Optional, TypeVar + +T = TypeVar("T") + + +@dataclass(frozen=True) +class MidasType(ABC): + @abstractmethod + def accept(self, visitor: Visitor[T]) -> T: ... + + class Visitor(ABC, Generic[T]): + @abstractmethod + def visit_base_type(self, node: BaseType) -> T: ... + + @abstractmethod + def visit_frame_column(self, node: FrameColumn) -> T: ... + + @abstractmethod + def visit_frame_type(self, node: FrameType) -> T: ... + + +@dataclass(frozen=True) +class BaseType(MidasType): + base: str + param: Optional[MidasType] + constraint: Optional[ast.expr] = None + + def accept(self, visitor: MidasType.Visitor[T]) -> T: + return visitor.visit_base_type(self) + + +@dataclass(frozen=True) +class FrameColumn(MidasType): + name: Optional[str] + type: Optional[MidasType] + + def accept(self, visitor: MidasType.Visitor[T]) -> T: + return visitor.visit_frame_column(self) + + +@dataclass(frozen=True) +class FrameType(MidasType): + columns: list[FrameColumn] + + def accept(self, visitor: MidasType.Visitor[T]) -> T: + return visitor.visit_frame_type(self) From f4d2be3b1b983eb19ace9caf90f499c7a891c447 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 17:36:22 +0200 Subject: [PATCH 07/27] feat(parser): add simple Python parser --- midas/parser/python.py | 92 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 midas/parser/python.py diff --git a/midas/parser/python.py b/midas/parser/python.py new file mode 100644 index 0000000..e55d21e --- /dev/null +++ b/midas/parser/python.py @@ -0,0 +1,92 @@ +import ast +from typing import Any, Optional + +from midas.ast.python import BaseType, FrameColumn, FrameType, MidasType + + +class InvalidSyntaxError(Exception): + pass + + +class UnsupportedSyntaxError(Exception): + def __init__(self, expr: ast.expr) -> None: + super().__init__(f"Unsupported syntax: {ast.unparse(expr)}") + + +class PythonParser(ast.NodeVisitor): + def __init__(self) -> None: + super().__init__() + + self.annotations: list[tuple[str, Optional[MidasType]]] = [] + + def visit_AnnAssign(self, node: ast.AnnAssign) -> Any: + match node: + case ast.AnnAssign( + target=ast.Name(id=target), annotation=annotation, simple=1 + ): + self.annotations.append( + (target, self._parse_type(annotation, root=True)) + ) + + case _: + print(f"Unsupported annotation: {ast.unparse(node)}") + + def _parse_type( + self, type_expr: ast.expr, root: bool = False + ) -> Optional[MidasType]: + match type_expr: + case ast.Subscript(value=ast.Name(id="Frame"), slice=schema): + return self._parse_frame_type(schema) + + case ast.Subscript(value=ast.Name(id=name), slice=param): + return BaseType( + base=name, param=self._parse_type(param), constraint=None + ) + + case ast.Name(id=name): + return BaseType(base=name, param=None, constraint=None) + + case ast.BinOp(left=left_expr, op=ast.Add(), right=right_expr): + print("Constraints not implemented yet") + return None + + case _: + if root: + return None + raise UnsupportedSyntaxError(type_expr) + + def _parse_frame_type(self, schema: ast.expr) -> FrameType: + columns: list[FrameColumn] = [] + + match schema: + case ast.Tuple(elts=cols): + for col in cols: + columns.append(self._parse_frame_column(col)) + case ast.Slice() | ast.Name(): + columns.append(self._parse_frame_column(schema)) + case _: + raise UnsupportedSyntaxError(schema) + + return FrameType(columns=columns) + + def _parse_frame_column(self, column: ast.expr) -> FrameColumn: + match column: + case ast.Name(): + return FrameColumn(name=None, type=self._parse_type(column)) + case ast.Slice(lower=ast.Name(id=name), upper=type_expr): + if name == "_": + name = None + + type: Optional[MidasType] = None + match type_expr: + case None: + raise InvalidSyntaxError("Missing column type") + case ast.Name(id="_"): + type = None + case ast.expr(): + type = self._parse_type(type_expr) + case _: + raise UnsupportedSyntaxError(type_expr) + return FrameColumn(name=name, type=type) + case _: + raise UnsupportedSyntaxError(column) From 4f799caaf58316616cde706514a0ac899d7c8ed4 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 17:36:44 +0200 Subject: [PATCH 08/27] feat(parser): add pretty-printer for python AST --- midas/ast/printer.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/midas/ast/printer.py b/midas/ast/printer.py index 474f0ec..513b823 100644 --- a/midas/ast/printer.py +++ b/midas/ast/printer.py @@ -1,11 +1,13 @@ from __future__ import annotations +import ast import io from contextlib import contextmanager from enum import Enum, auto from typing import Generator, Generic, Optional, Protocol, TypeVar import midas.ast.midas as m +import midas.ast.python as p class _Level(Enum): @@ -346,3 +348,32 @@ class MidasPrinter(m.Expr.Visitor[str], m.Stmt.Visitor[str]): def visit_type_expr(self, expr: m.TypeExpr): template: str = expr.template.accept(self) if expr.template is not None else "" return f"{expr.name.lexeme}{template}{'?' if expr.optional else ''}" + + +class PythonAstPrinter(AstPrinter, p.MidasType.Visitor[None]): + def visit_base_type(self, node: p.BaseType) -> None: + self._write_line("BaseType") + with self._child_level(): + self._write_line(f"base: {node.base}") + self._write_optional_child("param", node.param) + constraint_str: str = "None" + if node.constraint is not None: + constraint_str = ast.unparse(node.constraint) + self._write_line(f"constraint: {constraint_str}", last=True) + + def visit_frame_column(self, node: p.FrameColumn) -> None: + self._write_line("FrameColumn") + with self._child_level(): + self._write_line(f"name: {node.name}") + self._write_optional_child("type", node.type, last=True) + + def visit_frame_type(self, node: p.FrameType) -> None: + self._write_line("FrameType") + with self._child_level(): + self._write_line("columns", last=True) + with self._child_level(): + for i, col in enumerate(node.columns): + self._idx = i + if i == len(node.columns) - 1: + self._mark_last() + col.accept(self) From 3d599b34626985396eb562ad12aa9ea2585a6ca4 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 17:37:20 +0200 Subject: [PATCH 09/27] feat(cli): add option to run python parser --- midas/cli/main.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/midas/cli/main.py b/midas/cli/main.py index 278abd9..3c033f0 100644 --- a/midas/cli/main.py +++ b/midas/cli/main.py @@ -3,6 +3,9 @@ from typing import Optional, TextIO import click +from midas.ast.printer import PythonAstPrinter +from midas.parser.python import PythonParser + @click.group() def midas(): @@ -22,11 +25,28 @@ def utils(): @utils.command() @click.option("-o", "--output", type=click.File("w")) +@click.option("-p", "--parse", is_flag=True) @click.argument("file", type=click.File("r")) -def dump_ast(output: Optional[TextIO], file: TextIO): +def dump_ast(output: Optional[TextIO], parse: bool, file: TextIO): source: str = file.read() tree: ast.Module = ast.parse(source, filename=file.name) - dump: str = ast.dump(tree, indent=4) + dump: str + + if parse: + parser = PythonParser() + parser.visit(tree) + printer = PythonAstPrinter() + dump = "" + for name, annotation in parser.annotations: + dump += f"{name} = " + if annotation is None: + dump += "None" + else: + dump += printer.print(annotation) + dump += "\n" + else: + dump = ast.dump(tree, indent=4) + if output is None: click.echo(dump) else: From 832c350b6115b1b29a42c139b976245b7898fad0 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 17:38:13 +0200 Subject: [PATCH 10/27] fix: use generic Difference type in example --- examples/00_syntax_prototype/02_custom_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/00_syntax_prototype/02_custom_types.py b/examples/00_syntax_prototype/02_custom_types.py index 0297058..16bf442 100644 --- a/examples/00_syntax_prototype/02_custom_types.py +++ b/examples/00_syntax_prototype/02_custom_types.py @@ -21,7 +21,7 @@ lat + lon # Invalid operation # Registered operations are permitted lat1: Latitude = lat[0] lat2: Latitude = lat[1] -lat_diff: LatitudeDiff = lat2 - lat1 # Valid operation +lat_diff: Difference[Latitude] = lat2 - lat1 # Valid operation # In addition to the type, a column can have one or more constraints, either defined inline or in a separate file df2: Frame[ From 8d7c1154328ba452241bb8c2aad429a94b19a43e Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 18:46:06 +0200 Subject: [PATCH 11/27] feat(parser): parse type constraints in python --- midas/ast/printer.py | 14 +++++++++----- midas/ast/python.py | 13 ++++++++++++- midas/parser/python.py | 38 ++++++++++++++++++++++++++++++-------- 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/midas/ast/printer.py b/midas/ast/printer.py index 513b823..e9033e2 100644 --- a/midas/ast/printer.py +++ b/midas/ast/printer.py @@ -355,11 +355,15 @@ class PythonAstPrinter(AstPrinter, p.MidasType.Visitor[None]): self._write_line("BaseType") with self._child_level(): self._write_line(f"base: {node.base}") - self._write_optional_child("param", node.param) - constraint_str: str = "None" - if node.constraint is not None: - constraint_str = ast.unparse(node.constraint) - self._write_line(f"constraint: {constraint_str}", last=True) + self._write_optional_child("param", node.param, last=True) + + def visit_constraint_type(self, node: p.ConstraintType) -> None: + self._write_line("ConstraintType") + with self._child_level(): + self._write_line("type") + with self._child_level(single=True): + node.type.accept(self) + self._write_line(f"constraint: {ast.unparse(node.constraint)}", last=True) def visit_frame_column(self, node: p.FrameColumn) -> None: self._write_line("FrameColumn") diff --git a/midas/ast/python.py b/midas/ast/python.py index 63307c4..8b7f03e 100644 --- a/midas/ast/python.py +++ b/midas/ast/python.py @@ -17,6 +17,9 @@ class MidasType(ABC): @abstractmethod def visit_base_type(self, node: BaseType) -> T: ... + @abstractmethod + def visit_constraint_type(self, node: ConstraintType) -> T: ... + @abstractmethod def visit_frame_column(self, node: FrameColumn) -> T: ... @@ -28,12 +31,20 @@ class MidasType(ABC): class BaseType(MidasType): base: str param: Optional[MidasType] - constraint: Optional[ast.expr] = None def accept(self, visitor: MidasType.Visitor[T]) -> T: return visitor.visit_base_type(self) +@dataclass(frozen=True) +class ConstraintType(MidasType): + type: MidasType + constraint: ast.expr + + def accept(self, visitor: MidasType.Visitor[T]) -> T: + return visitor.visit_constraint_type(self) + + @dataclass(frozen=True) class FrameColumn(MidasType): name: Optional[str] diff --git a/midas/parser/python.py b/midas/parser/python.py index e55d21e..6139801 100644 --- a/midas/parser/python.py +++ b/midas/parser/python.py @@ -1,7 +1,7 @@ import ast from typing import Any, Optional -from midas.ast.python import BaseType, FrameColumn, FrameType, MidasType +from midas.ast.python import BaseType, ConstraintType, FrameColumn, FrameType, MidasType class InvalidSyntaxError(Exception): @@ -10,7 +10,9 @@ class InvalidSyntaxError(Exception): class UnsupportedSyntaxError(Exception): def __init__(self, expr: ast.expr) -> None: - super().__init__(f"Unsupported syntax: {ast.unparse(expr)}") + super().__init__( + f"Unsupported syntax at L{expr.lineno}:{expr.col_offset}: {ast.unparse(expr)}" + ) class PythonParser(ast.NodeVisitor): @@ -39,16 +41,32 @@ class PythonParser(ast.NodeVisitor): return self._parse_frame_type(schema) case ast.Subscript(value=ast.Name(id=name), slice=param): - return BaseType( - base=name, param=self._parse_type(param), constraint=None - ) + return BaseType(base=name, param=self._parse_type(param)) case ast.Name(id=name): - return BaseType(base=name, param=None, constraint=None) + return BaseType(base=name, param=None) case ast.BinOp(left=left_expr, op=ast.Add(), right=right_expr): - print("Constraints not implemented yet") - return None + left = self._parse_type(left_expr) + match left: + case None: + raise InvalidSyntaxError("") + + # If chained constraints, separate base type and rebuild constraint + case ConstraintType(type=left_type, constraint=left_constraint): + constraint = ast.BinOp( + left=left_constraint, + op=ast.Add(), + right=right_expr, + ) + ast.copy_location(constraint, type_expr) + return ConstraintType( + type=left_type, + constraint=constraint, + ) + + case _: + return ConstraintType(type=left, constraint=right_expr) case _: if root: @@ -62,8 +80,10 @@ class PythonParser(ast.NodeVisitor): case ast.Tuple(elts=cols): for col in cols: columns.append(self._parse_frame_column(col)) + case ast.Slice() | ast.Name(): columns.append(self._parse_frame_column(schema)) + case _: raise UnsupportedSyntaxError(schema) @@ -73,6 +93,7 @@ class PythonParser(ast.NodeVisitor): match column: case ast.Name(): return FrameColumn(name=None, type=self._parse_type(column)) + case ast.Slice(lower=ast.Name(id=name), upper=type_expr): if name == "_": name = None @@ -88,5 +109,6 @@ class PythonParser(ast.NodeVisitor): case _: raise UnsupportedSyntaxError(type_expr) return FrameColumn(name=name, type=type) + case _: raise UnsupportedSyntaxError(column) From 5aedddfabb3a8d018c2ead872f984eb11316ccff Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 19:32:15 +0200 Subject: [PATCH 12/27] feat(parser): parse functions in python --- examples/00_syntax_prototype/04_functions.py | 15 ++++++ midas/ast/printer.py | 39 ++++++++++++++- midas/ast/python.py | 42 ++++++++++++++-- midas/cli/main.py | 5 ++ midas/parser/python.py | 50 +++++++++++++++++++- 5 files changed, 143 insertions(+), 8 deletions(-) create mode 100644 examples/00_syntax_prototype/04_functions.py diff --git a/examples/00_syntax_prototype/04_functions.py b/examples/00_syntax_prototype/04_functions.py new file mode 100644 index 0000000..3b07899 --- /dev/null +++ b/examples/00_syntax_prototype/04_functions.py @@ -0,0 +1,15 @@ +# type: ignore +# ruff: disable[F821] +from __future__ import annotations + + +def func( + col1: Column[float + (0 <= _ <= 1)], + col2: Column[float + (0 <= _ <= 1)], +) -> Column[float + (0 <= _ <= 2)]: + result: Column[float + (0 <= _ <= 2)] = col1 + col2 + return result + + +def func2(a: int, /, b: float, *, c: str): + pass diff --git a/midas/ast/printer.py b/midas/ast/printer.py index e9033e2..b92e40f 100644 --- a/midas/ast/printer.py +++ b/midas/ast/printer.py @@ -350,7 +350,7 @@ class MidasPrinter(m.Expr.Visitor[str], m.Stmt.Visitor[str]): return f"{expr.name.lexeme}{template}{'?' if expr.optional else ''}" -class PythonAstPrinter(AstPrinter, p.MidasType.Visitor[None]): +class PythonAstPrinter(AstPrinter, p.Expr.Visitor[None]): def visit_base_type(self, node: p.BaseType) -> None: self._write_line("BaseType") with self._child_level(): @@ -381,3 +381,40 @@ class PythonAstPrinter(AstPrinter, p.MidasType.Visitor[None]): if i == len(node.columns) - 1: self._mark_last() col.accept(self) + + def visit_function(self, node: p.Function) -> None: + self._write_line("Function") + with self._child_level(): + self._write_line(f"name: {node.name}") + + self._write_line("posonlyargs") + with self._child_level(): + for i, arg in enumerate(node.posonlyargs): + self._idx = i + if i == len(node.posonlyargs) - 1: + self._mark_last() + arg.accept(self) + + self._write_line("args") + with self._child_level(): + for i, arg in enumerate(node.args): + self._idx = i + if i == len(node.args) - 1: + self._mark_last() + arg.accept(self) + + self._write_line("kwonlyargs") + with self._child_level(): + for i, arg in enumerate(node.kwonlyargs): + self._idx = i + if i == len(node.kwonlyargs) - 1: + self._mark_last() + arg.accept(self) + + self._write_optional_child("returns", node.returns, last=True) + + def visit_function_argument(self, node: p.FunctionArgument) -> None: + self._write_line("FunctionArgument") + with self._child_level(): + self._write_line(f"name: {node.name}") + self._write_optional_child("type", node.type, last=True) diff --git a/midas/ast/python.py b/midas/ast/python.py index 8b7f03e..9350fd0 100644 --- a/midas/ast/python.py +++ b/midas/ast/python.py @@ -9,7 +9,7 @@ T = TypeVar("T") @dataclass(frozen=True) -class MidasType(ABC): +class Expr(ABC): @abstractmethod def accept(self, visitor: Visitor[T]) -> T: ... @@ -26,13 +26,24 @@ class MidasType(ABC): @abstractmethod def visit_frame_type(self, node: FrameType) -> T: ... + @abstractmethod + def visit_function(self, node: Function) -> T: ... + + @abstractmethod + def visit_function_argument(self, node: FunctionArgument) -> T: ... + + +@dataclass(frozen=True) +class MidasType(Expr): + pass + @dataclass(frozen=True) class BaseType(MidasType): base: str param: Optional[MidasType] - def accept(self, visitor: MidasType.Visitor[T]) -> T: + def accept(self, visitor: Expr.Visitor[T]) -> T: return visitor.visit_base_type(self) @@ -41,7 +52,7 @@ class ConstraintType(MidasType): type: MidasType constraint: ast.expr - def accept(self, visitor: MidasType.Visitor[T]) -> T: + def accept(self, visitor: Expr.Visitor[T]) -> T: return visitor.visit_constraint_type(self) @@ -50,7 +61,7 @@ class FrameColumn(MidasType): name: Optional[str] type: Optional[MidasType] - def accept(self, visitor: MidasType.Visitor[T]) -> T: + def accept(self, visitor: Expr.Visitor[T]) -> T: return visitor.visit_frame_column(self) @@ -58,5 +69,26 @@ class FrameColumn(MidasType): class FrameType(MidasType): columns: list[FrameColumn] - def accept(self, visitor: MidasType.Visitor[T]) -> T: + def accept(self, visitor: Expr.Visitor[T]) -> T: return visitor.visit_frame_type(self) + + +@dataclass(frozen=True) +class Function(Expr): + name: str + posonlyargs: list[FunctionArgument] + args: list[FunctionArgument] + kwonlyargs: list[FunctionArgument] + returns: Optional[MidasType] + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_function(self) + + +@dataclass(frozen=True) +class FunctionArgument(Expr): + name: Optional[str] + type: Optional[MidasType] + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_function_argument(self) diff --git a/midas/cli/main.py b/midas/cli/main.py index 3c033f0..65ed210 100644 --- a/midas/cli/main.py +++ b/midas/cli/main.py @@ -44,6 +44,11 @@ def dump_ast(output: Optional[TextIO], parse: bool, file: TextIO): else: dump += printer.print(annotation) dump += "\n" + + dump += "\n# Functions\n\n" + + for func in parser.functions: + dump += printer.print(func) + "\n" else: dump = ast.dump(tree, indent=4) diff --git a/midas/parser/python.py b/midas/parser/python.py index 6139801..dc24022 100644 --- a/midas/parser/python.py +++ b/midas/parser/python.py @@ -1,7 +1,15 @@ import ast from typing import Any, Optional -from midas.ast.python import BaseType, ConstraintType, FrameColumn, FrameType, MidasType +from midas.ast.python import ( + BaseType, + ConstraintType, + FrameColumn, + FrameType, + Function, + FunctionArgument, + MidasType, +) class InvalidSyntaxError(Exception): @@ -20,6 +28,7 @@ class PythonParser(ast.NodeVisitor): super().__init__() self.annotations: list[tuple[str, Optional[MidasType]]] = [] + self.functions: list[Function] = [] def visit_AnnAssign(self, node: ast.AnnAssign) -> Any: match node: @@ -33,6 +42,43 @@ class PythonParser(ast.NodeVisitor): case _: print(f"Unsupported annotation: {ast.unparse(node)}") + def visit_FunctionDef(self, node: ast.FunctionDef) -> Any: + self.functions.append(self._parse_function(node)) + + # Call visit on children to process body + # TODO: scope the resulting nodes to the function + self.generic_visit(node) + + def _parse_function(self, node: ast.FunctionDef) -> Function: + match node: + case ast.FunctionDef( + name=name, + args=ast.arguments( + posonlyargs=posonlyargs, + args=args, + kwonlyargs=kwonlyargs, + ), + returns=returns, + ): + + def parse_args(args_list: list[ast.arg]) -> list[FunctionArgument]: + return [self._parse_function_argument(arg) for arg in args_list] + + return Function( + name=name, + posonlyargs=parse_args(posonlyargs), + args=parse_args(args), + kwonlyargs=parse_args(kwonlyargs), + returns=self._parse_type(returns) if returns is not None else None, + ) + + def _parse_function_argument(self, arg: ast.arg) -> FunctionArgument: + name: str = arg.arg + type: Optional[MidasType] = None + if arg.annotation is not None: + type = self._parse_type(arg.annotation) + return FunctionArgument(name=name, type=type) + def _parse_type( self, type_expr: ast.expr, root: bool = False ) -> Optional[MidasType]: @@ -50,7 +96,7 @@ class PythonParser(ast.NodeVisitor): left = self._parse_type(left_expr) match left: case None: - raise InvalidSyntaxError("") + raise InvalidSyntaxError() # If chained constraints, separate base type and rebuild constraint case ConstraintType(type=left_type, constraint=left_constraint): From d0c54db33a783404d71068332a9d1aa905499dad Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 22:11:44 +0200 Subject: [PATCH 13/27] feat(parser): store locations in parsed nodes --- midas/ast/python.py | 30 ++++++++++++++++++++++++++++-- midas/parser/python.py | 42 +++++++++++++++++++++++++++++++++++------- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/midas/ast/python.py b/midas/ast/python.py index 9350fd0..878b8b8 100644 --- a/midas/ast/python.py +++ b/midas/ast/python.py @@ -3,13 +3,39 @@ from __future__ import annotations from abc import ABC, abstractmethod import ast from dataclasses import dataclass -from typing import Generic, Optional, TypeVar +from typing import Generic, Optional, Protocol, TypeVar T = TypeVar("T") -@dataclass(frozen=True) +class HasLocation(Protocol): + lineno: int + col_offset: int + end_lineno: Optional[int] + end_col_offset: Optional[int] + + +@dataclass(frozen=True, kw_only=True) +class Location: + lineno: int + col_offset: int + end_lineno: Optional[int] + end_col_offset: Optional[int] + + @staticmethod + def from_ast(obj: HasLocation) -> Location: + return Location( + lineno=obj.lineno, + col_offset=obj.col_offset, + end_lineno=obj.end_lineno, + end_col_offset=obj.end_col_offset, + ) + + +@dataclass(frozen=True, kw_only=True) class Expr(ABC): + location: Optional[Location] = None + @abstractmethod def accept(self, visitor: Visitor[T]) -> T: ... diff --git a/midas/parser/python.py b/midas/parser/python.py index dc24022..51d68ca 100644 --- a/midas/parser/python.py +++ b/midas/parser/python.py @@ -8,6 +8,7 @@ from midas.ast.python import ( FrameType, Function, FunctionArgument, + Location, MidasType, ) @@ -50,6 +51,7 @@ class PythonParser(ast.NodeVisitor): self.generic_visit(node) def _parse_function(self, node: ast.FunctionDef) -> Function: + loc: Location = Location.from_ast(node) match node: case ast.FunctionDef( name=name, @@ -65,6 +67,7 @@ class PythonParser(ast.NodeVisitor): return [self._parse_function_argument(arg) for arg in args_list] return Function( + location=loc, name=name, posonlyargs=parse_args(posonlyargs), args=parse_args(args), @@ -73,24 +76,38 @@ class PythonParser(ast.NodeVisitor): ) def _parse_function_argument(self, arg: ast.arg) -> FunctionArgument: + loc: Location = Location.from_ast(arg) name: str = arg.arg type: Optional[MidasType] = None if arg.annotation is not None: type = self._parse_type(arg.annotation) - return FunctionArgument(name=name, type=type) + return FunctionArgument( + location=loc, + name=name, + type=type, + ) def _parse_type( self, type_expr: ast.expr, root: bool = False ) -> Optional[MidasType]: + loc: Location = Location.from_ast(type_expr) match type_expr: case ast.Subscript(value=ast.Name(id="Frame"), slice=schema): return self._parse_frame_type(schema) case ast.Subscript(value=ast.Name(id=name), slice=param): - return BaseType(base=name, param=self._parse_type(param)) + return BaseType( + location=loc, + base=name, + param=self._parse_type(param), + ) case ast.Name(id=name): - return BaseType(base=name, param=None) + return BaseType( + location=loc, + base=name, + param=None, + ) case ast.BinOp(left=left_expr, op=ast.Add(), right=right_expr): left = self._parse_type(left_expr) @@ -107,12 +124,17 @@ class PythonParser(ast.NodeVisitor): ) ast.copy_location(constraint, type_expr) return ConstraintType( + location=loc, type=left_type, constraint=constraint, ) case _: - return ConstraintType(type=left, constraint=right_expr) + return ConstraintType( + location=loc, + type=left, + constraint=right_expr, + ) case _: if root: @@ -120,6 +142,7 @@ class PythonParser(ast.NodeVisitor): raise UnsupportedSyntaxError(type_expr) def _parse_frame_type(self, schema: ast.expr) -> FrameType: + loc: Location = Location.from_ast(schema) columns: list[FrameColumn] = [] match schema: @@ -133,12 +156,17 @@ class PythonParser(ast.NodeVisitor): case _: raise UnsupportedSyntaxError(schema) - return FrameType(columns=columns) + return FrameType(location=loc, columns=columns) def _parse_frame_column(self, column: ast.expr) -> FrameColumn: + loc: Location = Location.from_ast(column) match column: case ast.Name(): - return FrameColumn(name=None, type=self._parse_type(column)) + return FrameColumn( + location=loc, + name=None, + type=self._parse_type(column), + ) case ast.Slice(lower=ast.Name(id=name), upper=type_expr): if name == "_": @@ -154,7 +182,7 @@ class PythonParser(ast.NodeVisitor): type = self._parse_type(type_expr) case _: raise UnsupportedSyntaxError(type_expr) - return FrameColumn(name=name, type=type) + return FrameColumn(location=loc, name=name, type=type) case _: raise UnsupportedSyntaxError(column) From 9b59058881b4094989303fe05cb4a162bdf0904d Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Fri, 22 May 2026 22:15:26 +0200 Subject: [PATCH 14/27] feat(cli): add highlight command --- midas/cli/highlight.css | 81 +++++++++++++++++++++++++++ midas/cli/highlighter.py | 115 +++++++++++++++++++++++++++++++++++++++ midas/cli/main.py | 18 ++++++ 3 files changed, 214 insertions(+) create mode 100644 midas/cli/highlight.css create mode 100644 midas/cli/highlighter.py diff --git a/midas/cli/highlight.css b/midas/cli/highlight.css new file mode 100644 index 0000000..a2f378c --- /dev/null +++ b/midas/cli/highlight.css @@ -0,0 +1,81 @@ +html, +body { + margin: 0; + font-size: 14pt; +} + +* { + box-sizing: border-box; +} + +#code { + display: flex; + flex-direction: column; + font-family: monospace; + white-space: pre-wrap; +} + +.line { + display: flex; + + &:nth-child(odd) { + background-color: rgb(247, 247, 247); + } + + .no { + width: 4em; + text-align: right; + padding: 0.2em 0.4em; + border-right: solid black 1px; + flex-shrink: 0; + } + + .txt { + flex-grow: 1; + padding: 0.2em 0.8em; + } +} + +span { + --col: transparent; + --opacity: 0.1; + --border: 0px; + background-color: rgba(var(--col), var(--opacity)); + outline: solid rgb(var(--col)) var(--border); + outline-offset: 2px; + border-radius: 2px; + + &:hover:not(:has(*:hover)) { + --opacity: 0.8; + --border: 2px; + z-index: 10; + } + + &.base-type { + --col: 108, 233, 108; + } + + &.param { + --col: 103, 192, 224; + } + + &.constraint-type { + --col: 174, 200, 195; + } + + &.frame-column { + --col: 216, 231, 81; + } + + &.frame-type { + --col: 231, 46, 40; + } + + &.function { + --col: 215, 103, 224; + } + + &.argument { + --col: 103, 192, 224; + } +} \ No newline at end of file diff --git a/midas/cli/highlighter.py b/midas/cli/highlighter.py new file mode 100644 index 0000000..e302c3a --- /dev/null +++ b/midas/cli/highlighter.py @@ -0,0 +1,115 @@ +from pathlib import Path +from typing import TextIO + +from midas.ast.python import ( + BaseType, + ConstraintType, + Expr, + FrameColumn, + FrameType, + Function, + FunctionArgument, +) + + +class PythonHighlighter(Expr.Visitor[None]): + CSS_PATH: Path = Path(__file__).parent / "highlight.css" + + def __init__(self, source: str) -> None: + self.source: str = source + self.lines: list[str] = self.source.splitlines() + self.openings: dict[tuple[int, int], list[str]] = {} + self.closings: dict[tuple[int, int], list[str]] = {} + + def highlight(self, node: Expr): + node.accept(self) + + def dump(self, buf: TextIO): + css: str = self.CSS_PATH.read_text() + css = "\n".join((" " + line).rstrip() for line in css.splitlines()) + lines: list[str] = [ + "", + '', + "", + ' ', + ' ', + " Highlighted file", + " ", + "", + "", + '
', + ] + for l, line in enumerate(self.lines): + lineno: int = l + 1 + line_buf: str = ( + f'
{lineno}
' + ) + for c, char in enumerate(line): + pos: tuple[int, int] = (lineno, c) + closings: list[str] = self.closings.get(pos, []) + openings: list[str] = self.openings.get(pos, []) + line_buf += "".join(closings + openings) + line_buf += char + line_buf += "
" + lines.append(" " + line_buf) + lines.extend( + [ + "
", + "", + "", + ] + ) + + buf.write("\n".join(lines)) + + def wrap(self, node: Expr, cls: str): + if node.location is None: + return + if node.location.end_lineno is None or node.location.end_col_offset is None: + return + start_pos: tuple[int, int] = (node.location.lineno, node.location.col_offset) + end_pos: tuple[int, int] = ( + node.location.end_lineno, + node.location.end_col_offset, + ) + opening: str = f'' + closing: str = "" + self.openings.setdefault(start_pos, []).append(opening) + self.closings.setdefault(end_pos, []).insert(0, closing) + if start_pos[0] != end_pos[0]: + for l in range(start_pos[0], end_pos[0]): + c: int = len(self.lines[l - 1]) + self.closings.setdefault((l, c), []).insert(0, closing) + self.openings.setdefault((l + 1, 0), []).append(opening) + + def visit_base_type(self, node: BaseType) -> None: + self.wrap(node, "base-type") + if node.param is not None: + self.wrap(node.param, "param") + node.param.accept(self) + + def visit_constraint_type(self, node: ConstraintType) -> None: + self.wrap(node, "constraint-type") + node.type.accept(self) + + def visit_frame_column(self, node: FrameColumn) -> None: + self.wrap(node, "frame-column") + if node.type is not None: + node.type.accept(self) + + def visit_frame_type(self, node: FrameType) -> None: + self.wrap(node, "frame-type") + for column in node.columns: + column.accept(self) + + def visit_function(self, node: Function) -> None: + self.wrap(node, "function") + for arg in node.posonlyargs + node.args + node.kwonlyargs: + arg.accept(self) + + def visit_function_argument(self, node: FunctionArgument) -> None: + self.wrap(node, "argument") + if node.type is not None: + node.type.accept(self) diff --git a/midas/cli/main.py b/midas/cli/main.py index 65ed210..4dd79c0 100644 --- a/midas/cli/main.py +++ b/midas/cli/main.py @@ -4,6 +4,7 @@ from typing import Optional, TextIO import click from midas.ast.printer import PythonAstPrinter +from midas.cli.highlighter import PythonHighlighter from midas.parser.python import PythonParser @@ -56,3 +57,20 @@ def dump_ast(output: Optional[TextIO], parse: bool, file: TextIO): click.echo(dump) else: output.write(dump) + + +@utils.command() +@click.option("-o", "--output", type=click.File("w"), default="-") +@click.argument("file", type=click.File("r")) +def highlight(output: TextIO, file: TextIO): + source: str = file.read() + tree: ast.Module = ast.parse(source, filename=file.name) + parser = PythonParser() + parser.visit(tree) + highlighter: PythonHighlighter = PythonHighlighter(source) + for _, annotation in parser.annotations: + if annotation is not None: + highlighter.highlight(annotation) + for func in parser.functions: + highlighter.highlight(func) + highlighter.dump(output) From e94db2181fc26e772bf45799ef270200884c7355 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 25 May 2026 12:14:14 +0200 Subject: [PATCH 15/27] feat(parser): add location to midas AST nodes --- midas/ast/location.py | 37 +++++++++++++ midas/ast/midas.py | 9 +++- midas/ast/python.py | 28 ++-------- midas/lexer/token.py | 23 +++++++++ midas/parser/midas.py | 115 ++++++++++++++++++++++++++++++++--------- midas/parser/python.py | 2 +- 6 files changed, 161 insertions(+), 53 deletions(-) create mode 100644 midas/ast/location.py diff --git a/midas/ast/location.py b/midas/ast/location.py new file mode 100644 index 0000000..47fe360 --- /dev/null +++ b/midas/ast/location.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional, Protocol + + +class HasLocation(Protocol): + lineno: int + col_offset: int + end_lineno: Optional[int] + end_col_offset: Optional[int] + + +@dataclass(frozen=True, kw_only=True) +class Location: + lineno: int + col_offset: int + end_lineno: Optional[int] + end_col_offset: Optional[int] + + @staticmethod + def from_ast(obj: HasLocation) -> Location: + return Location( + lineno=obj.lineno, + col_offset=obj.col_offset, + end_lineno=obj.end_lineno, + end_col_offset=obj.end_col_offset, + ) + + @staticmethod + def span(start: Location, end: Location) -> Location: + return Location( + lineno=start.lineno, + col_offset=start.col_offset, + end_lineno=end.lineno, + end_col_offset=end.end_col_offset, + ) diff --git a/midas/ast/midas.py b/midas/ast/midas.py index 28a7819..1ff503d 100644 --- a/midas/ast/midas.py +++ b/midas/ast/midas.py @@ -9,6 +9,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Any, Generic, Optional, TypeVar +from midas.ast.location import Location from midas.lexer.token import Token T = TypeVar("T") @@ -18,8 +19,10 @@ T = TypeVar("T") ############## -@dataclass(frozen=True) +@dataclass(frozen=True, kw_only=True) class Stmt(ABC): + location: Optional[Location] = None + @abstractmethod def accept(self, visitor: Visitor[T]) -> T: ... @@ -109,8 +112,10 @@ class PredicateStmt(Stmt): ############### -@dataclass(frozen=True) +@dataclass(frozen=True, kw_only=True) class Expr(ABC): + location: Optional[Location] = None + @abstractmethod def accept(self, visitor: Visitor[T]) -> T: ... diff --git a/midas/ast/python.py b/midas/ast/python.py index 878b8b8..c25b438 100644 --- a/midas/ast/python.py +++ b/midas/ast/python.py @@ -3,35 +3,13 @@ from __future__ import annotations from abc import ABC, abstractmethod import ast from dataclasses import dataclass -from typing import Generic, Optional, Protocol, TypeVar +from typing import Generic, Optional, TypeVar + +from midas.ast.location import Location T = TypeVar("T") -class HasLocation(Protocol): - lineno: int - col_offset: int - end_lineno: Optional[int] - end_col_offset: Optional[int] - - -@dataclass(frozen=True, kw_only=True) -class Location: - lineno: int - col_offset: int - end_lineno: Optional[int] - end_col_offset: Optional[int] - - @staticmethod - def from_ast(obj: HasLocation) -> Location: - return Location( - lineno=obj.lineno, - col_offset=obj.col_offset, - end_lineno=obj.end_lineno, - end_col_offset=obj.end_col_offset, - ) - - @dataclass(frozen=True, kw_only=True) class Expr(ABC): location: Optional[Location] = None diff --git a/midas/lexer/token.py b/midas/lexer/token.py index 76a0fb1..052d8a6 100644 --- a/midas/lexer/token.py +++ b/midas/lexer/token.py @@ -1,7 +1,10 @@ +from __future__ import annotations + from dataclasses import dataclass from enum import Enum, auto from typing import Any +from midas.ast.location import Location from midas.lexer.position import Position @@ -63,3 +66,23 @@ class Token: lexeme: str value: Any position: Position + + def get_location(self) -> Location: + lineno: int = self.position.line + col_offset: int = self.position.column - 1 + end_lineno = lineno + end_col_offset = col_offset + for c in self.lexeme: + end_col_offset += 1 + if c == "\n": + end_lineno += 1 + end_col_offset = 0 + return Location( + lineno=lineno, + col_offset=col_offset, + end_lineno=end_lineno, + end_col_offset=end_col_offset, + ) + + def location_to(self, to: Token) -> Location: + return Location.span(self.get_location(), to.get_location()) diff --git a/midas/parser/midas.py b/midas/parser/midas.py index a919994..4998c51 100644 --- a/midas/parser/midas.py +++ b/midas/parser/midas.py @@ -1,5 +1,6 @@ from typing import Optional +from midas.ast.location import Location from midas.ast.midas import ( BinaryExpr, ComplexTypeStmt, @@ -104,6 +105,7 @@ class MidasParser(Parser): Returns: TypeStmt: the parsed type declaration statement """ + keyword: Token = self.previous() name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") template: Optional[TemplateExpr] = None if self.check(TokenType.LEFT_BRACKET): @@ -116,11 +118,20 @@ class MidasParser(Parser): if self.match(TokenType.WHERE): constraint = self.constraint() return SimpleTypeStmt( - name=name, template=template, base=base, constraint=constraint + location=keyword.location_to(self.previous()), + name=name, + template=template, + base=base, + constraint=constraint, ) else: properties: list[PropertyStmt] = self.type_properties() - return ComplexTypeStmt(name=name, template=template, properties=properties) + return ComplexTypeStmt( + location=keyword.location_to(self.previous()), + name=name, + template=template, + properties=properties, + ) def template_expr(self) -> TemplateExpr: """Parse a generic template expression @@ -130,10 +141,14 @@ class MidasParser(Parser): Returns: TemplateExpr: the parsed template expression """ - self.consume(TokenType.LEFT_BRACKET, "Missing '[' before template expression") + left: Token = self.consume( + TokenType.LEFT_BRACKET, "Missing '[' before template expression" + ) type: TypeExpr = self.type_expr() - self.consume(TokenType.RIGHT_BRACKET, "Missing ']' after template expression") - return TemplateExpr(type=type) + right: Token = self.consume( + TokenType.RIGHT_BRACKET, "Missing ']' after template expression" + ) + return TemplateExpr(location=left.location_to(right), type=type) def type_expr(self) -> TypeExpr: """Parse a type expression @@ -149,7 +164,12 @@ class MidasParser(Parser): if self.check(TokenType.LEFT_BRACKET): template = self.template_expr() optional: bool = self.match(TokenType.QMARK) - return TypeExpr(name=name, template=template, optional=optional) + return TypeExpr( + location=name.location_to(self.previous()), + name=name, + template=template, + optional=optional, + ) def simple_type_expr(self) -> SimpleTypeExpr: """Parse a simple type expression @@ -161,7 +181,9 @@ class MidasParser(Parser): """ name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") optional: bool = self.match(TokenType.QMARK) - return SimpleTypeExpr(name=name, optional=optional) + return SimpleTypeExpr( + location=name.location_to(self.previous()), name=name, optional=optional + ) def constraint(self) -> Expr: """Parse a constraint @@ -183,7 +205,12 @@ class MidasParser(Parser): while self.match(TokenType.AND): operator: Token = self.previous() right: Expr = self.equality() - expr = LogicalExpr(left=expr, operator=operator, right=right) + location: Optional[Location] = None + if expr.location and right.location: + location = Location.span(expr.location, right.location) + expr = LogicalExpr( + location=location, left=expr, operator=operator, right=right + ) return expr def equality(self) -> Expr: @@ -196,7 +223,12 @@ class MidasParser(Parser): while self.match(TokenType.BANG_EQUAL, TokenType.EQUAL_EQUAL): operator: Token = self.previous() right: Expr = self.comparison() - expr = BinaryExpr(left=expr, operator=operator, right=right) + location: Optional[Location] = None + if expr.location and right.location: + location = Location.span(expr.location, right.location) + expr = BinaryExpr( + location=location, left=expr, operator=operator, right=right + ) return expr def comparison(self) -> Expr: @@ -214,7 +246,12 @@ class MidasParser(Parser): ): operator: Token = self.previous() right: Expr = self.unary() - expr = BinaryExpr(left=expr, operator=operator, right=right) + location: Optional[Location] = None + if expr.location and right.location: + location = Location.span(expr.location, right.location) + expr = BinaryExpr( + location=location, left=expr, operator=operator, right=right + ) return expr def unary(self) -> Expr: @@ -226,7 +263,10 @@ class MidasParser(Parser): if self.match(TokenType.MINUS): operator: Token = self.previous() right: Expr = self.unary() - return UnaryExpr(operator=operator, right=right) + location: Optional[Location] = None + if right.location: + location = Location.span(operator.get_location(), right.location) + return UnaryExpr(location=location, operator=operator, right=right) return self.reference() def reference(self) -> Expr: @@ -240,7 +280,10 @@ class MidasParser(Parser): name: Token = self.consume( TokenType.IDENTIFIER, "Expected property name after '.'" ) - expr = GetExpr(expr=expr, name=name) + location: Optional[Location] = None + if expr.location: + location = Location.span(expr.location, name.get_location()) + expr = GetExpr(location=location, expr=expr, name=name) return expr def primary(self) -> Expr: @@ -251,26 +294,27 @@ class MidasParser(Parser): Returns: Expr: the parsed expression """ + token: Token = self.peek() if self.match(TokenType.FALSE): - return LiteralExpr(False) + return LiteralExpr(location=token.get_location(), value=False) if self.match(TokenType.TRUE): - return LiteralExpr(True) + return LiteralExpr(location=token.get_location(), value=True) if self.match(TokenType.NONE): - return LiteralExpr(None) + return LiteralExpr(location=token.get_location(), value=None) if self.match(TokenType.NUMBER): - return LiteralExpr(self.previous().value) + return LiteralExpr(location=token.get_location(), value=token.value) if self.match(TokenType.IDENTIFIER): - return VariableExpr(self.previous()) + return VariableExpr(location=token.get_location(), name=token) if self.match(TokenType.UNDERSCORE): - return WildcardExpr(self.previous()) + return WildcardExpr(location=token.get_location(), token=token) if self.match(TokenType.LEFT_PAREN): expr: Expr = self.constraint() - self.consume(TokenType.RIGHT_PAREN, "Unclosed parenthesis") - return GroupingExpr(expr) + right: Token = self.consume(TokenType.RIGHT_PAREN, "Unclosed parenthesis") + return GroupingExpr(location=token.location_to(right), expr=expr) raise self.error(self.peek(), "Expected expression") @@ -304,7 +348,12 @@ class MidasParser(Parser): constraint: Optional[Expr] = None if self.match(TokenType.WHERE): constraint = self.constraint() - return PropertyStmt(name=name, type=type, constraint=constraint) + return PropertyStmt( + location=name.location_to(self.previous()), + name=name, + type=type, + constraint=constraint, + ) def extend_declaration(self) -> ExtendStmt: """Parse an extension definition @@ -314,13 +363,17 @@ class MidasParser(Parser): Returns: ExtendStmt: the parsed extension statement """ + keyword: Token = self.previous() type: TypeExpr = self.type_expr() self.consume(TokenType.LEFT_BRACE, "Expected '{' to start extend body") operations: list[OpStmt] = [] while not self.is_at_end() and not self.check(TokenType.RIGHT_BRACE): operations.append(self.op_declaration()) self.consume(TokenType.RIGHT_BRACE, "Unclosed extend body") - return ExtendStmt(type=type, operations=operations) + location: Optional[Location] = None + if type.location: + location = keyword.location_to(self.previous()) + return ExtendStmt(location=location, type=type, operations=operations) def op_declaration(self) -> OpStmt: """Parse an operation definition @@ -330,7 +383,7 @@ class MidasParser(Parser): Returns: OpStmt: the parsed operation statement """ - self.consume(TokenType.OP, "Expected 'op' keyword") + keyword: Token = self.consume(TokenType.OP, "Expected 'op' keyword") name: Token = self.consume(TokenType.IDENTIFIER, "Expected operation name") self.consume(TokenType.LEFT_PAREN, "Expected '(' before operand type") @@ -340,7 +393,12 @@ class MidasParser(Parser): self.consume(TokenType.ARROW, "Expected '->' before result type") result: TypeExpr = self.type_expr() - return OpStmt(name=name, operand=operand, result=result) + return OpStmt( + location=keyword.location_to(self.previous()), + name=name, + operand=operand, + result=result, + ) def predicate_declaration(self) -> PredicateStmt: """Parse a predicate declaration @@ -350,6 +408,7 @@ class MidasParser(Parser): Returns: PredicateStmt: the parsed predicate declaration statement """ + keyword: Token = self.previous() name: Token = self.consume(TokenType.IDENTIFIER, "Expected predicate name") self.consume(TokenType.LEFT_PAREN, "Expected '(' before predicate subject") subject: Token = self.consume(TokenType.IDENTIFIER, "Expected subject name") @@ -358,4 +417,10 @@ class MidasParser(Parser): self.consume(TokenType.RIGHT_PAREN, "Expected ')' after predicate subject") self.consume(TokenType.EQUAL, "Expected '=' after predicate subject") condition: Expr = self.constraint() - return PredicateStmt(name=name, subject=subject, type=type, condition=condition) + return PredicateStmt( + location=keyword.location_to(self.previous()), + name=name, + subject=subject, + type=type, + condition=condition, + ) diff --git a/midas/parser/python.py b/midas/parser/python.py index 51d68ca..6e0ffe1 100644 --- a/midas/parser/python.py +++ b/midas/parser/python.py @@ -1,6 +1,7 @@ import ast from typing import Any, Optional +from midas.ast.location import Location from midas.ast.python import ( BaseType, ConstraintType, @@ -8,7 +9,6 @@ from midas.ast.python import ( FrameType, Function, FunctionArgument, - Location, MidasType, ) From 0e0a1b26f2e8ca8bb62a656c282b4803b55d5b0b Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 25 May 2026 12:14:55 +0200 Subject: [PATCH 16/27] feat(cli): add midas highlighter --- midas/cli/highlight.css | 28 ------- midas/cli/highlighter.py | 164 ++++++++++++++++++++++++++++++++------- midas/cli/hl_midas.css | 55 +++++++++++++ midas/cli/hl_python.css | 29 +++++++ midas/cli/main.py | 56 +++++++++++-- 5 files changed, 270 insertions(+), 62 deletions(-) create mode 100644 midas/cli/hl_midas.css create mode 100644 midas/cli/hl_python.css diff --git a/midas/cli/highlight.css b/midas/cli/highlight.css index a2f378c..1abed08 100644 --- a/midas/cli/highlight.css +++ b/midas/cli/highlight.css @@ -50,32 +50,4 @@ span { --border: 2px; z-index: 10; } - - &.base-type { - --col: 108, 233, 108; - } - - &.param { - --col: 103, 192, 224; - } - - &.constraint-type { - --col: 174, 200, 195; - } - - &.frame-column { - --col: 216, 231, 81; - } - - &.frame-type { - --col: 231, 46, 40; - } - - &.function { - --col: 215, 103, 224; - } - - &.argument { - --col: 103, 192, 224; - } } \ No newline at end of file diff --git a/midas/cli/highlighter.py b/midas/cli/highlighter.py index e302c3a..051ae79 100644 --- a/midas/cli/highlighter.py +++ b/midas/cli/highlighter.py @@ -1,19 +1,29 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod from pathlib import Path -from typing import TextIO +from typing import Generic, Optional, Protocol, TextIO, TypeVar -from midas.ast.python import ( - BaseType, - ConstraintType, - Expr, - FrameColumn, - FrameType, - Function, - FunctionArgument, -) +from midas.ast.location import Location +import midas.ast.midas as m +import midas.ast.python as p + +H = TypeVar("H", bound="Highlighter", contravariant=True) -class PythonHighlighter(Expr.Visitor[None]): - CSS_PATH: Path = Path(__file__).parent / "highlight.css" +class Highlightable(Protocol, Generic[H]): + def accept(self, visitor: H): ... + + +class Locatable(Protocol): + @property + @abstractmethod + def location(self) -> Optional[Location]: ... + + +class Highlighter(ABC): + BASE_CSS_PATH: Path = Path(__file__).parent / "highlight.css" + EXTRA_CSS_PATH: Optional[Path] = None def __init__(self, source: str) -> None: self.source: str = source @@ -21,12 +31,22 @@ class PythonHighlighter(Expr.Visitor[None]): self.openings: dict[tuple[int, int], list[str]] = {} self.closings: dict[tuple[int, int], list[str]] = {} - def highlight(self, node: Expr): - node.accept(self) + def format_css(self, path: Path) -> list[str]: + css: str = path.read_text() + css = "\n".join((" " + line).rstrip() for line in css.splitlines()) + return [ + " ", + ] def dump(self, buf: TextIO): - css: str = self.CSS_PATH.read_text() - css = "\n".join((" " + line).rstrip() for line in css.splitlines()) + base_css: list[str] = self.format_css(self.BASE_CSS_PATH) + extra_css: list[str] = ( + self.format_css(self.EXTRA_CSS_PATH) + if self.EXTRA_CSS_PATH is not None + else [] + ) lines: list[str] = [ "", '', @@ -34,9 +54,8 @@ class PythonHighlighter(Expr.Visitor[None]): ' ', ' ', " Highlighted file", - " ", + *base_css, + *extra_css, "", "", '
', @@ -64,7 +83,7 @@ class PythonHighlighter(Expr.Visitor[None]): buf.write("\n".join(lines)) - def wrap(self, node: Expr, cls: str): + def wrap(self, node: Locatable, cls: str): if node.location is None: return if node.location.end_lineno is None or node.location.end_col_offset is None: @@ -84,32 +103,125 @@ class PythonHighlighter(Expr.Visitor[None]): self.closings.setdefault((l, c), []).insert(0, closing) self.openings.setdefault((l + 1, 0), []).append(opening) - def visit_base_type(self, node: BaseType) -> None: + +class PythonHighlighter(Highlighter, p.Expr.Visitor[None]): + EXTRA_CSS_PATH: Optional[Path] = Path(__file__).parent / "hl_python.css" + + def highlight(self, node: Highlightable[PythonHighlighter]): + node.accept(self) + + def visit_base_type(self, node: p.BaseType) -> None: self.wrap(node, "base-type") if node.param is not None: self.wrap(node.param, "param") node.param.accept(self) - def visit_constraint_type(self, node: ConstraintType) -> None: + def visit_constraint_type(self, node: p.ConstraintType) -> None: self.wrap(node, "constraint-type") node.type.accept(self) - def visit_frame_column(self, node: FrameColumn) -> None: + def visit_frame_column(self, node: p.FrameColumn) -> None: self.wrap(node, "frame-column") if node.type is not None: node.type.accept(self) - def visit_frame_type(self, node: FrameType) -> None: + def visit_frame_type(self, node: p.FrameType) -> None: self.wrap(node, "frame-type") for column in node.columns: column.accept(self) - def visit_function(self, node: Function) -> None: + def visit_function(self, node: p.Function) -> None: self.wrap(node, "function") for arg in node.posonlyargs + node.args + node.kwonlyargs: arg.accept(self) - def visit_function_argument(self, node: FunctionArgument) -> None: + def visit_function_argument(self, node: p.FunctionArgument) -> None: self.wrap(node, "argument") if node.type is not None: node.type.accept(self) + + +class MidasHighlighter(Highlighter, m.Stmt.Visitor[None], m.Expr.Visitor[None]): + EXTRA_CSS_PATH: Optional[Path] = Path(__file__).parent / "hl_midas.css" + + def highlight(self, node: Highlightable[MidasHighlighter]): + node.accept(self) + + def visit_simple_type_stmt(self, stmt: m.SimpleTypeStmt) -> None: + self.wrap(stmt, "simple-type") + if stmt.template is not None: + stmt.template.accept(self) + stmt.base.accept(self) + if stmt.constraint is not None: + self.wrap(stmt.constraint, "constraint") + stmt.constraint.accept(self) + + def visit_complex_type_stmt(self, stmt: m.ComplexTypeStmt) -> None: + self.wrap(stmt, "complex-type") + if stmt.template is not None: + stmt.template.accept(self) + for prop in stmt.properties: + prop.accept(self) + + def visit_property_stmt(self, stmt: m.PropertyStmt) -> None: + self.wrap(stmt, "property") + stmt.type.accept(self) + if stmt.constraint is not None: + self.wrap(stmt.constraint, "constraint") + stmt.constraint.accept(self) + + def visit_extend_stmt(self, stmt: m.ExtendStmt) -> None: + self.wrap(stmt, "extend") + stmt.type.accept(self) + for op in stmt.operations: + op.accept(self) + + def visit_op_stmt(self, stmt: m.OpStmt) -> None: + self.wrap(stmt, "op") + stmt.operand.accept(self) + stmt.result.accept(self) + + def visit_predicate_stmt(self, stmt: m.PredicateStmt) -> None: + self.wrap(stmt, "predicate") + stmt.type.accept(self) + stmt.condition.accept(self) + + def visit_simple_type_expr(self, expr: m.SimpleTypeExpr) -> None: + self.wrap(expr, "simple-type-expr") + + def visit_logical_expr(self, expr: m.LogicalExpr) -> None: + self.wrap(expr, "logical-expr") + expr.left.accept(self) + expr.right.accept(self) + + def visit_binary_expr(self, expr: m.BinaryExpr) -> None: + self.wrap(expr, "binary-expr") + expr.left.accept(self) + expr.right.accept(self) + + def visit_unary_expr(self, expr: m.UnaryExpr) -> None: + self.wrap(expr, "unary-expr") + expr.right.accept(self) + + def visit_get_expr(self, expr: m.GetExpr) -> None: + self.wrap(expr, "get-expr") + expr.expr.accept(self) + + def visit_variable_expr(self, expr: m.VariableExpr) -> None: + self.wrap(expr, "variable") + + def visit_grouping_expr(self, expr: m.GroupingExpr) -> None: + expr.expr.accept(self) + + def visit_literal_expr(self, expr: m.LiteralExpr) -> None: ... + + def visit_wildcard_expr(self, expr: m.WildcardExpr) -> None: ... + + def visit_template_expr(self, expr: m.TemplateExpr) -> None: + self.wrap(expr, "template") + expr.type.accept(self) + + def visit_type_expr(self, expr: m.TypeExpr) -> None: + self.wrap(expr, "type") + if expr.template is not None: + expr.template.accept(self) diff --git a/midas/cli/hl_midas.css b/midas/cli/hl_midas.css new file mode 100644 index 0000000..e8adef6 --- /dev/null +++ b/midas/cli/hl_midas.css @@ -0,0 +1,55 @@ +span { + &.comment { + --col: 200, 200, 200; + color: rgb(110, 110, 110); + font-style: italic; + } + + &.simple-type { + --col: 108, 233, 108; + } + + &.complex-type { + --col: 233, 206, 108; + } + + &.constraint { + --col: 233, 108, 108; + } + + &.property { + --col: 233, 108, 176; + } + + &.extend { + --col: 108, 197, 233; + } + + &.op { + --col: 108, 148, 233; + } + + &.predicate { + --col: 193, 108, 233; + } + + &.simple-type-expr { + --col: 150, 150, 150; + } + + &.logical-expr, + &.binary-expr, + &.unary-expr, + &.get-expr { + --col: 123, 215, 193; + } + + &.template { + --col: 163, 117, 71; + } + + &.type { + --col: 200, 200, 200; + font-weight: bold; + } +} \ No newline at end of file diff --git a/midas/cli/hl_python.css b/midas/cli/hl_python.css new file mode 100644 index 0000000..e6dc43b --- /dev/null +++ b/midas/cli/hl_python.css @@ -0,0 +1,29 @@ +span { + &.base-type { + --col: 108, 233, 108; + } + + &.param { + --col: 103, 192, 224; + } + + &.constraint-type { + --col: 174, 200, 195; + } + + &.frame-column { + --col: 216, 231, 81; + } + + &.frame-type { + --col: 231, 46, 40; + } + + &.function { + --col: 215, 103, 224; + } + + &.argument { + --col: 103, 192, 224; + } +} \ No newline at end of file diff --git a/midas/cli/main.py b/midas/cli/main.py index 4dd79c0..7319226 100644 --- a/midas/cli/main.py +++ b/midas/cli/main.py @@ -3,8 +3,13 @@ from typing import Optional, TextIO import click +from midas.ast.location import Location +import midas.ast.midas as m from midas.ast.printer import PythonAstPrinter -from midas.cli.highlighter import PythonHighlighter +from midas.cli.highlighter import Highlighter, MidasHighlighter, PythonHighlighter +from midas.lexer.midas import MidasLexer +from midas.lexer.token import Token, TokenType +from midas.parser.midas import MidasParser from midas.parser.python import PythonParser @@ -59,18 +64,53 @@ def dump_ast(output: Optional[TextIO], parse: bool, file: TextIO): output.write(dump) -@utils.command() -@click.option("-o", "--output", type=click.File("w"), default="-") -@click.argument("file", type=click.File("r")) -def highlight(output: TextIO, file: TextIO): - source: str = file.read() - tree: ast.Module = ast.parse(source, filename=file.name) +def highlight_python(source: str, path: str) -> Highlighter: + tree: ast.Module = ast.parse(source, filename=path) parser = PythonParser() parser.visit(tree) - highlighter: PythonHighlighter = PythonHighlighter(source) + highlighter = PythonHighlighter(source) for _, annotation in parser.annotations: if annotation is not None: highlighter.highlight(annotation) for func in parser.functions: highlighter.highlight(func) + return highlighter + + +def highlight_midas(source: str, path: str) -> Highlighter: + lexer = MidasLexer(source, file=path) + tokens: list[Token] = lexer.process() + parser = MidasParser(tokens) + stmts: list[m.Stmt] = parser.parse() + highlighter = MidasHighlighter(source) + + class LocatableToken: + def __init__(self, token: Token): + self.token: Token = token + + @property + def location(self) -> Location: + return self.token.get_location() + + for token in tokens: + if token.type == TokenType.COMMENT: + highlighter.wrap(LocatableToken(token), "comment") + for stmt in stmts: + highlighter.highlight(stmt) + return highlighter + + +@utils.command() +@click.option("-o", "--output", type=click.File("w"), default="-") +@click.argument("file", type=click.File("r")) +def highlight(output: TextIO, file: TextIO): + source: str = file.read() + highlighter: Highlighter + + if file.name.endswith(".py"): + highlighter = highlight_python(source, file.name) + elif file.name.endswith(".midas"): + highlighter = highlight_midas(source, file.name) + else: + raise ValueError("Unsupported file type") highlighter.dump(output) From a735113466a0b56d028dd1de9dfa585b1f0d3c73 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 25 May 2026 12:46:04 +0200 Subject: [PATCH 17/27] fix(parser): update ast gen script --- gen/gen.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/gen/gen.py b/gen/gen.py index 106dcc7..34781b3 100644 --- a/gen/gen.py +++ b/gen/gen.py @@ -14,7 +14,8 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Any, Generic, Optional, TypeVar -from lexer.token import Token +from midas.ast.location import Location +from midas.lexer.token import Token T = TypeVar("T") @@ -23,8 +24,10 @@ T = TypeVar("T") ############## -@dataclass(frozen=True) +@dataclass(frozen=True, kw_only=True) class Stmt(ABC): + location: Optional[Location] = None + @abstractmethod def accept(self, visitor: Visitor[T]) -> T: ... @@ -40,8 +43,10 @@ class Stmt(ABC): ############### -@dataclass(frozen=True) +@dataclass(frozen=True, kw_only=True) class Expr(ABC): + location: Optional[Location] = None + @abstractmethod def accept(self, visitor: Visitor[T]) -> T: ... From 939e5af4ceb9eef87112abc17b1f1170c3e81aa2 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 25 May 2026 20:38:38 +0200 Subject: [PATCH 18/27] refactor(parser): improve AST class generator make the generation script more flexible --- gen/gen.py | 144 +++++++++++++++++++++------------------ gen/{ast.py => midas.py} | 120 +++++++++++++++++++++----------- midas/ast/midas.py | 2 +- 3 files changed, 156 insertions(+), 110 deletions(-) rename gen/{ast.py => midas.py} (76%) diff --git a/gen/gen.py b/gen/gen.py index 34781b3..03f38d0 100644 --- a/gen/gen.py +++ b/gen/gen.py @@ -3,58 +3,34 @@ import re HEADER = '''""" This file was generated by a script. Any manual changes might be overwritten. -Please modify gen/ast.py instead and run gen/gen.py +Please modify {defs_path} instead and run {gen_path} """''' +SECTION_TEMPLATE = """{banner} + + +@dataclass(frozen=True, kw_only=True) +class {base}(ABC): + location: Optional[Location] = None + + @abstractmethod + def accept(self, visitor: Visitor[T]) -> T: ... + + class Visitor(ABC, Generic[T]): +{visitor_methods} + + +{classes}""" + TEMPLATE = """{header} from __future__ import annotations -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Any, Generic, Optional, TypeVar - -from midas.ast.location import Location -from midas.lexer.token import Token +{imports} T = TypeVar("T") -############## -# Statements # -############## - - -@dataclass(frozen=True, kw_only=True) -class Stmt(ABC): - location: Optional[Location] = None - - @abstractmethod - def accept(self, visitor: Visitor[T]) -> T: ... - - class Visitor(ABC, Generic[T]): -{stmt_visitor_methods} - - -{statements} - - -############### -# Expressions # -############### - - -@dataclass(frozen=True, kw_only=True) -class Expr(ABC): - location: Optional[Location] = None - - @abstractmethod - def accept(self, visitor: Visitor[T]) -> T: ... - - class Visitor(ABC, Generic[T]): -{expr_visitor_methods} - - -{expressions} +{sections} """ VISITOR_METHOD_TEMPLATE = """ @@ -71,6 +47,16 @@ class {cls}({base}): return visitor.visit_{func_name}(self) """ +SECTION_REGEX = re.compile( + r"^###>\s*(?P[^\n]*?)\s*\|\s*(?P[^\n]*?)\s*?\n(?P.*?)\n###<$", + re.MULTILINE | re.DOTALL, +) + +IMPORTS_REGEX = re.compile( + r"^###>\s*Imports\s*?\n(?P.*?)\n###<$", + re.MULTILINE | re.DOTALL, +) + def snake_case(text: str) -> str: return re.sub(r"[A-Z]", lambda c: "_" + c.group().lower(), text).lower().strip("_") @@ -95,41 +81,63 @@ def make_class(name: str, cls: str, base: str): return cls_def.strip("\n") -def generate(src: str): - classes: list[str] = src.split("\n\n") - stmt_visitor_methods: list[str] = [] - expr_visitor_methods: list[str] = [] - statements: list[str] = [] - expressions: list[str] = [] +def make_banner(text: str) -> str: + middle: str = f"# {text} #" + rule: str = "#" * len(middle) + return "\n".join((rule, middle, rule)) - for cls in classes: + +def make_section(full_name: str, base: str, body: str) -> str: + visitor_methods: list[str] = [] + classes: list[str] = [] + definitions: list[str] = body.strip("\n").split("\n\n") + for cls in definitions: cls = cls.strip("\n") name: str = re.match("class (.*?):", cls).group(1) # type: ignore print(f"Processing {name}") - if name.endswith("Stmt"): - stmt_visitor_methods.append(make_visitor_method(name, "stmt")) - statements.append(make_class(name, cls, "Stmt")) - elif name.endswith("Expr"): - expr_visitor_methods.append(make_visitor_method(name, "expr")) - expressions.append(make_class(name, cls, "Expr")) + visitor_methods.append(make_visitor_method(name, base.lower())) + classes.append(make_class(name, cls, base)) - return TEMPLATE.format( - header=HEADER, - stmt_visitor_methods="\n\n".join(stmt_visitor_methods), - expr_visitor_methods="\n\n".join(expr_visitor_methods), - statements="\n\n\n".join(statements), - expressions="\n\n\n".join(expressions), + return SECTION_TEMPLATE.format( + banner=make_banner(full_name), + base=base, + visitor_methods="\n\n".join(visitor_methods), + classes="\n\n\n".join(classes), ) +def generate(definitions_path: Path, out_path: Path): + root_dir: Path = Path(__file__).parent.parent + rel_path: Path = definitions_path.relative_to(root_dir) + src: str = definitions_path.read_text() + sections: list[str] = [] + + imports: str = "" + if m := IMPORTS_REGEX.search(src): + imports = m.group("body").strip("\n") + + for section_m in SECTION_REGEX.finditer(src): + full_name: str = section_m.group("name") + base: str = section_m.group("base") + body: str = section_m.group("body") + sections.append(make_section(full_name, base, body)) + + result: str = TEMPLATE.format( + header=HEADER.format( + defs_path=rel_path, + gen_path=Path(__file__).relative_to(root_dir), + ), + imports=imports, + sections="\n\n\n".join(sections), + ) + out_path.write_text(result) + + def main(): root: Path = Path(__file__).parent.parent - in_path: Path = root / "gen" / "ast.py" - out_path: Path = root / "midas" / "ast" / "midas.py" - - src: str = in_path.read_text() - generated: str = generate(src) - out_path.write_text(generated) + defs_dir: Path = root / "gen" + ast_dir: Path = root / "midas" / "ast" + generate(defs_dir / "midas.py", ast_dir / "midas.py") if __name__ == "__main__": diff --git a/gen/ast.py b/gen/midas.py similarity index 76% rename from gen/ast.py rename to gen/midas.py index 6fca631..7187554 100644 --- a/gen/ast.py +++ b/gen/midas.py @@ -1,72 +1,110 @@ +# type: ignore +# ruff: disable[F821, F401] + +###> Imports +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Generic, Optional, TypeVar + +from midas.ast.location import Location +from midas.lexer.token import Token + +###< + + +###> Stmt | Statements class SimpleTypeStmt: name: Token template: Optional[TemplateExpr] base: TypeExpr constraint: Optional[Expr] -class SimpleTypeExpr: - name: Token - optional: bool - -class LogicalExpr: - left: Expr - operator: Token - right: Expr - -class BinaryExpr: - left: Expr - operator: Token - right: Expr - -class UnaryExpr: - operator: Token - right: Expr - -class GetExpr: - expr: Expr - name: Token - -class VariableExpr: - name: Token - -class GroupingExpr: - expr: Expr - -class LiteralExpr: - value: Any - -class WildcardExpr: - token: Token - -class TemplateExpr: - type: TypeExpr - -class TypeExpr: - name: Token - template: Optional[TemplateExpr] - optional: bool class ComplexTypeStmt: name: Token template: Optional[TemplateExpr] properties: list[PropertyStmt] + class PropertyStmt: name: Token type: TypeExpr constraint: Optional[Expr] + class ExtendStmt: type: TypeExpr operations: list[OpStmt] + class OpStmt: name: Token operand: TypeExpr result: TypeExpr + class PredicateStmt: name: Token subject: Token type: TypeExpr condition: Expr + + +###< + + +###> Expr | Expressions +class SimpleTypeExpr: + name: Token + optional: bool + + +class LogicalExpr: + left: Expr + operator: Token + right: Expr + + +class BinaryExpr: + left: Expr + operator: Token + right: Expr + + +class UnaryExpr: + operator: Token + right: Expr + + +class GetExpr: + expr: Expr + name: Token + + +class VariableExpr: + name: Token + + +class GroupingExpr: + expr: Expr + + +class LiteralExpr: + value: Any + + +class WildcardExpr: + token: Token + + +class TemplateExpr: + type: TypeExpr + + +class TypeExpr: + name: Token + template: Optional[TemplateExpr] + optional: bool + + +###< diff --git a/midas/ast/midas.py b/midas/ast/midas.py index 1ff503d..9cea8c2 100644 --- a/midas/ast/midas.py +++ b/midas/ast/midas.py @@ -1,6 +1,6 @@ """ This file was generated by a script. Any manual changes might be overwritten. -Please modify gen/ast.py instead and run gen/gen.py +Please modify gen/midas.py instead and run gen/gen.py """ from __future__ import annotations From 0bbdf04621ce2e3c68bbc961266dfaaecdf66059 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 25 May 2026 20:53:36 +0200 Subject: [PATCH 19/27] feat(parser): generate python AST classes use the generation script to create Python AST node classes, also distinguish between Midas type annotation nodes and statements --- gen/gen.py | 12 ++++--- gen/python.py | 53 ++++++++++++++++++++++++++++++ midas/ast/printer.py | 32 +++++++++--------- midas/ast/python.py | 74 ++++++++++++++++++++++++------------------ midas/parser/python.py | 7 ++-- 5 files changed, 122 insertions(+), 56 deletions(-) create mode 100644 gen/python.py diff --git a/gen/gen.py b/gen/gen.py index 03f38d0..75e6100 100644 --- a/gen/gen.py +++ b/gen/gen.py @@ -48,7 +48,7 @@ class {cls}({base}): """ SECTION_REGEX = re.compile( - r"^###>\s*(?P[^\n]*?)\s*\|\s*(?P[^\n]*?)\s*?\n(?P.*?)\n###<$", + r"^###>\s*(?P[^\n]*?)\s*\|\s*(?P[^\n]*?)(\s*\|\s*(?P[^\n]*?))?\s*?\n(?P.*?)\n###<$", re.MULTILINE | re.DOTALL, ) @@ -87,15 +87,15 @@ def make_banner(text: str) -> str: return "\n".join((rule, middle, rule)) -def make_section(full_name: str, base: str, body: str) -> str: +def make_section(full_name: str, base: str, param: str, body: str) -> str: visitor_methods: list[str] = [] classes: list[str] = [] - definitions: list[str] = body.strip("\n").split("\n\n") + definitions: list[str] = body.strip("\n").split("\n\n\n") for cls in definitions: cls = cls.strip("\n") name: str = re.match("class (.*?):", cls).group(1) # type: ignore print(f"Processing {name}") - visitor_methods.append(make_visitor_method(name, base.lower())) + visitor_methods.append(make_visitor_method(name, param)) classes.append(make_class(name, cls, base)) return SECTION_TEMPLATE.format( @@ -119,8 +119,9 @@ def generate(definitions_path: Path, out_path: Path): for section_m in SECTION_REGEX.finditer(src): full_name: str = section_m.group("name") base: str = section_m.group("base") + param: str = section_m.group("param") or base.lower() body: str = section_m.group("body") - sections.append(make_section(full_name, base, body)) + sections.append(make_section(full_name, base, param, body)) result: str = TEMPLATE.format( header=HEADER.format( @@ -138,6 +139,7 @@ def main(): defs_dir: Path = root / "gen" ast_dir: Path = root / "midas" / "ast" generate(defs_dir / "midas.py", ast_dir / "midas.py") + generate(defs_dir / "python.py", ast_dir / "python.py") if __name__ == "__main__": diff --git a/gen/python.py b/gen/python.py new file mode 100644 index 0000000..15df1d9 --- /dev/null +++ b/gen/python.py @@ -0,0 +1,53 @@ +# type: ignore +# ruff: disable[F821, F401] + +###> Imports +import ast +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Generic, Optional, TypeVar + +from midas.ast.location import Location + +###< + + +###> MidasType | Type annotations | node +class BaseType: + base: str + param: Optional[MidasType] + + +class ConstraintType: + type: MidasType + constraint: ast.expr + + +class FrameColumn: + name: Optional[str] + type: Optional[MidasType] + + +class FrameType: + columns: list[FrameColumn] + + +###< + + +###> Stmt | Statements +class Function: + name: str + posonlyargs: list[Argument] + args: list[Argument] + kwonlyargs: list[Argument] + returns: Optional[MidasType] + + @dataclass(frozen=True, kw_only=True) + class Argument: + location: Optional[Location] = None + name: Optional[str] + type: Optional[MidasType] + + +###< diff --git a/midas/ast/printer.py b/midas/ast/printer.py index b92e40f..2d38241 100644 --- a/midas/ast/printer.py +++ b/midas/ast/printer.py @@ -350,7 +350,7 @@ class MidasPrinter(m.Expr.Visitor[str], m.Stmt.Visitor[str]): return f"{expr.name.lexeme}{template}{'?' if expr.optional else ''}" -class PythonAstPrinter(AstPrinter, p.Expr.Visitor[None]): +class PythonAstPrinter(AstPrinter, p.MidasType.Visitor[None], p.Stmt.Visitor[None]): def visit_base_type(self, node: p.BaseType) -> None: self._write_line("BaseType") with self._child_level(): @@ -382,39 +382,39 @@ class PythonAstPrinter(AstPrinter, p.Expr.Visitor[None]): self._mark_last() col.accept(self) - def visit_function(self, node: p.Function) -> None: + def visit_function(self, stmt: p.Function) -> None: self._write_line("Function") with self._child_level(): - self._write_line(f"name: {node.name}") + self._write_line(f"name: {stmt.name}") self._write_line("posonlyargs") with self._child_level(): - for i, arg in enumerate(node.posonlyargs): + for i, arg in enumerate(stmt.posonlyargs): self._idx = i - if i == len(node.posonlyargs) - 1: + if i == len(stmt.posonlyargs) - 1: self._mark_last() - arg.accept(self) + self._print_argument(arg) self._write_line("args") with self._child_level(): - for i, arg in enumerate(node.args): + for i, arg in enumerate(stmt.args): self._idx = i - if i == len(node.args) - 1: + if i == len(stmt.args) - 1: self._mark_last() - arg.accept(self) + self._print_argument(arg) self._write_line("kwonlyargs") with self._child_level(): - for i, arg in enumerate(node.kwonlyargs): + for i, arg in enumerate(stmt.kwonlyargs): self._idx = i - if i == len(node.kwonlyargs) - 1: + if i == len(stmt.kwonlyargs) - 1: self._mark_last() - arg.accept(self) + self._print_argument(arg) - self._write_optional_child("returns", node.returns, last=True) + self._write_optional_child("returns", stmt.returns, last=True) - def visit_function_argument(self, node: p.FunctionArgument) -> None: + def _print_argument(self, arg: p.Function.Argument) -> None: self._write_line("FunctionArgument") with self._child_level(): - self._write_line(f"name: {node.name}") - self._write_optional_child("type", node.type, last=True) + self._write_line(f"name: {arg.name}") + self._write_optional_child("type", arg.type, last=True) diff --git a/midas/ast/python.py b/midas/ast/python.py index c25b438..cd120ee 100644 --- a/midas/ast/python.py +++ b/midas/ast/python.py @@ -1,7 +1,12 @@ +""" +This file was generated by a script. Any manual changes might be overwritten. +Please modify gen/python.py instead and run gen/gen.py +""" + from __future__ import annotations -from abc import ABC, abstractmethod import ast +from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Generic, Optional, TypeVar @@ -9,9 +14,13 @@ from midas.ast.location import Location T = TypeVar("T") +#################### +# Type annotations # +#################### + @dataclass(frozen=True, kw_only=True) -class Expr(ABC): +class MidasType(ABC): location: Optional[Location] = None @abstractmethod @@ -30,24 +39,13 @@ class Expr(ABC): @abstractmethod def visit_frame_type(self, node: FrameType) -> T: ... - @abstractmethod - def visit_function(self, node: Function) -> T: ... - - @abstractmethod - def visit_function_argument(self, node: FunctionArgument) -> T: ... - - -@dataclass(frozen=True) -class MidasType(Expr): - pass - @dataclass(frozen=True) class BaseType(MidasType): base: str param: Optional[MidasType] - def accept(self, visitor: Expr.Visitor[T]) -> T: + def accept(self, visitor: MidasType.Visitor[T]) -> T: return visitor.visit_base_type(self) @@ -56,7 +54,7 @@ class ConstraintType(MidasType): type: MidasType constraint: ast.expr - def accept(self, visitor: Expr.Visitor[T]) -> T: + def accept(self, visitor: MidasType.Visitor[T]) -> T: return visitor.visit_constraint_type(self) @@ -65,7 +63,7 @@ class FrameColumn(MidasType): name: Optional[str] type: Optional[MidasType] - def accept(self, visitor: Expr.Visitor[T]) -> T: + def accept(self, visitor: MidasType.Visitor[T]) -> T: return visitor.visit_frame_column(self) @@ -73,26 +71,40 @@ class FrameColumn(MidasType): class FrameType(MidasType): columns: list[FrameColumn] - def accept(self, visitor: Expr.Visitor[T]) -> T: + def accept(self, visitor: MidasType.Visitor[T]) -> T: return visitor.visit_frame_type(self) +############## +# Statements # +############## + + +@dataclass(frozen=True, kw_only=True) +class Stmt(ABC): + location: Optional[Location] = None + + @abstractmethod + def accept(self, visitor: Visitor[T]) -> T: ... + + class Visitor(ABC, Generic[T]): + @abstractmethod + def visit_function(self, stmt: Function) -> T: ... + + @dataclass(frozen=True) -class Function(Expr): +class Function(Stmt): name: str - posonlyargs: list[FunctionArgument] - args: list[FunctionArgument] - kwonlyargs: list[FunctionArgument] + posonlyargs: list[Argument] + args: list[Argument] + kwonlyargs: list[Argument] returns: Optional[MidasType] - def accept(self, visitor: Expr.Visitor[T]) -> T: + @dataclass(frozen=True, kw_only=True) + class Argument: + location: Optional[Location] = None + name: Optional[str] + type: Optional[MidasType] + + def accept(self, visitor: Stmt.Visitor[T]) -> T: return visitor.visit_function(self) - - -@dataclass(frozen=True) -class FunctionArgument(Expr): - name: Optional[str] - type: Optional[MidasType] - - def accept(self, visitor: Expr.Visitor[T]) -> T: - return visitor.visit_function_argument(self) diff --git a/midas/parser/python.py b/midas/parser/python.py index 6e0ffe1..082cab1 100644 --- a/midas/parser/python.py +++ b/midas/parser/python.py @@ -8,7 +8,6 @@ from midas.ast.python import ( FrameColumn, FrameType, Function, - FunctionArgument, MidasType, ) @@ -63,7 +62,7 @@ class PythonParser(ast.NodeVisitor): returns=returns, ): - def parse_args(args_list: list[ast.arg]) -> list[FunctionArgument]: + def parse_args(args_list: list[ast.arg]) -> list[Function.Argument]: return [self._parse_function_argument(arg) for arg in args_list] return Function( @@ -75,13 +74,13 @@ class PythonParser(ast.NodeVisitor): returns=self._parse_type(returns) if returns is not None else None, ) - def _parse_function_argument(self, arg: ast.arg) -> FunctionArgument: + def _parse_function_argument(self, arg: ast.arg) -> Function.Argument: loc: Location = Location.from_ast(arg) name: str = arg.arg type: Optional[MidasType] = None if arg.annotation is not None: type = self._parse_type(arg.annotation) - return FunctionArgument( + return Function.Argument( location=loc, name=name, type=type, From ecab1b74a451e0305d79f18e1b58875839cb4bcc Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 25 May 2026 21:39:20 +0200 Subject: [PATCH 20/27] feat(parser): add Python AST nodes --- gen/python.py | 61 +++++++++++++++++- midas/ast/python.py | 148 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 207 insertions(+), 2 deletions(-) diff --git a/gen/python.py b/gen/python.py index 15df1d9..33a01cc 100644 --- a/gen/python.py +++ b/gen/python.py @@ -5,7 +5,7 @@ import ast from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Generic, Optional, TypeVar +from typing import Any, Generic, Optional, TypeVar from midas.ast.location import Location @@ -36,6 +36,10 @@ class FrameType: ###> Stmt | Statements +class ExpressionStmt: + expr: Expr + + class Function: name: str posonlyargs: list[Argument] @@ -50,4 +54,59 @@ class Function: type: Optional[MidasType] +class TypeAssign: + name: str + type: MidasType + + +###< + + +###> Expr | Expressions +class AssignExpr: + name: str + value: Expr + + +class BinaryExpr: + left: Expr + operator: ast.operator + right: Expr + + +class UnaryExpr: + operator: ast.unaryop + right: Expr + + +class CallExpr: + callee: Expr + arguments: list[Expr] + + +class GetExpr: + object: Expr + name: str + + +class LiteralExpr: + value: Any + + +class VariableExpr: + name: str + + +class LogicalExpr: + left: Expr + operator: ast.boolop + right: Expr + + +class SetExpr: + object: Expr + name: str + value: Expr + + ###< diff --git a/midas/ast/python.py b/midas/ast/python.py index cd120ee..5e219c8 100644 --- a/midas/ast/python.py +++ b/midas/ast/python.py @@ -8,7 +8,7 @@ from __future__ import annotations import ast from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Generic, Optional, TypeVar +from typing import Any, Generic, Optional, TypeVar from midas.ast.location import Location @@ -88,9 +88,23 @@ class Stmt(ABC): def accept(self, visitor: Visitor[T]) -> T: ... class Visitor(ABC, Generic[T]): + @abstractmethod + def visit_expression_stmt(self, stmt: ExpressionStmt) -> T: ... + @abstractmethod def visit_function(self, stmt: Function) -> T: ... + @abstractmethod + def visit_type_assign(self, stmt: TypeAssign) -> T: ... + + +@dataclass(frozen=True) +class ExpressionStmt(Stmt): + expr: Expr + + def accept(self, visitor: Stmt.Visitor[T]) -> T: + return visitor.visit_expression_stmt(self) + @dataclass(frozen=True) class Function(Stmt): @@ -108,3 +122,135 @@ class Function(Stmt): def accept(self, visitor: Stmt.Visitor[T]) -> T: return visitor.visit_function(self) + + +@dataclass(frozen=True) +class TypeAssign(Stmt): + name: str + type: MidasType + + def accept(self, visitor: Stmt.Visitor[T]) -> T: + return visitor.visit_type_assign(self) + + +############### +# Expressions # +############### + + +@dataclass(frozen=True, kw_only=True) +class Expr(ABC): + location: Optional[Location] = None + + @abstractmethod + def accept(self, visitor: Visitor[T]) -> T: ... + + class Visitor(ABC, Generic[T]): + @abstractmethod + def visit_assign_expr(self, expr: AssignExpr) -> T: ... + + @abstractmethod + def visit_binary_expr(self, expr: BinaryExpr) -> T: ... + + @abstractmethod + def visit_unary_expr(self, expr: UnaryExpr) -> T: ... + + @abstractmethod + def visit_call_expr(self, expr: CallExpr) -> T: ... + + @abstractmethod + def visit_get_expr(self, expr: GetExpr) -> T: ... + + @abstractmethod + def visit_literal_expr(self, expr: LiteralExpr) -> T: ... + + @abstractmethod + def visit_variable_expr(self, expr: VariableExpr) -> T: ... + + @abstractmethod + def visit_logical_expr(self, expr: LogicalExpr) -> T: ... + + @abstractmethod + def visit_set_expr(self, expr: SetExpr) -> T: ... + + +@dataclass(frozen=True) +class AssignExpr(Expr): + name: str + value: Expr + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_assign_expr(self) + + +@dataclass(frozen=True) +class BinaryExpr(Expr): + left: Expr + operator: ast.operator + right: Expr + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_binary_expr(self) + + +@dataclass(frozen=True) +class UnaryExpr(Expr): + operator: ast.unaryop + right: Expr + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_unary_expr(self) + + +@dataclass(frozen=True) +class CallExpr(Expr): + callee: Expr + arguments: list[Expr] + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_call_expr(self) + + +@dataclass(frozen=True) +class GetExpr(Expr): + object: Expr + name: str + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_get_expr(self) + + +@dataclass(frozen=True) +class LiteralExpr(Expr): + value: Any + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_literal_expr(self) + + +@dataclass(frozen=True) +class VariableExpr(Expr): + name: str + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_variable_expr(self) + + +@dataclass(frozen=True) +class LogicalExpr(Expr): + left: Expr + operator: ast.boolop + right: Expr + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_logical_expr(self) + + +@dataclass(frozen=True) +class SetExpr(Expr): + object: Expr + name: str + value: Expr + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_set_expr(self) From c64d626d1ccefdf2e66da9eaab2910e8b4f3be17 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 25 May 2026 21:42:04 +0200 Subject: [PATCH 21/27] refactor(parser): remove inheritance from NodeVisitor remove the parent NodeVisitor class from PythonParser and implement all custom recursive methods instead --- midas/parser/python.py | 88 +++++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 22 deletions(-) diff --git a/midas/parser/python.py b/midas/parser/python.py index 082cab1..277a71c 100644 --- a/midas/parser/python.py +++ b/midas/parser/python.py @@ -1,14 +1,20 @@ import ast -from typing import Any, Optional +from typing import Optional from midas.ast.location import Location + from midas.ast.python import ( + AssignExpr, BaseType, ConstraintType, + Expr, + ExpressionStmt, FrameColumn, FrameType, Function, MidasType, + Stmt, + TypeAssign, ) @@ -23,33 +29,66 @@ class UnsupportedSyntaxError(Exception): ) -class PythonParser(ast.NodeVisitor): - def __init__(self) -> None: - super().__init__() +class PythonParser: + def parse_module(self, node: ast.Module) -> list[Stmt]: + statements: list[Stmt] = [] + for stmt in node.body: + parsed: None | Stmt | list[Stmt] = self.parse_stmt(stmt) + if isinstance(parsed, Stmt): + statements.append(parsed) + elif parsed is not None: + statements.extend(parsed) + return statements - self.annotations: list[tuple[str, Optional[MidasType]]] = [] - self.functions: list[Function] = [] - - def visit_AnnAssign(self, node: ast.AnnAssign) -> Any: + def parse_stmt(self, node: ast.stmt) -> None | Stmt | list[Stmt]: match node: - case ast.AnnAssign( - target=ast.Name(id=target), annotation=annotation, simple=1 - ): - self.annotations.append( - (target, self._parse_type(annotation, root=True)) - ) + case ast.AnnAssign(): + return self.parse_annotation_assign(node) + + case ast.FunctionDef(): + return self.parse_function(node) + case _: + print(f"Unsupported assignment: {ast.unparse(node)}") + return None + + def parse_annotation_assign(self, node: ast.AnnAssign) -> list[Stmt]: + statements: list[Stmt] = [] + loc: Location = Location.from_ast(node) + match node: + case ast.AnnAssign( + target=ast.Name(id=target), + annotation=annotation, + value=value, + simple=1, + ): + type = self._parse_type(annotation, root=True) + if type is not None: + statements.append( + TypeAssign( + location=loc, + name=target, + type=type, + ) + ) + + if value is not None: + parsed_value: Expr = self.parse_expr(value) + statements.append( + ExpressionStmt( + location=loc, + expr=AssignExpr( + location=loc, + name=target, + value=parsed_value, + ), + ) + ) case _: print(f"Unsupported annotation: {ast.unparse(node)}") + return statements - def visit_FunctionDef(self, node: ast.FunctionDef) -> Any: - self.functions.append(self._parse_function(node)) - - # Call visit on children to process body - # TODO: scope the resulting nodes to the function - self.generic_visit(node) - - def _parse_function(self, node: ast.FunctionDef) -> Function: + def parse_function(self, node: ast.FunctionDef) -> Function: loc: Location = Location.from_ast(node) match node: case ast.FunctionDef( @@ -73,6 +112,8 @@ class PythonParser(ast.NodeVisitor): kwonlyargs=parse_args(kwonlyargs), returns=self._parse_type(returns) if returns is not None else None, ) + case _: + print(f"Unsupported function definition: {ast.unparse(node)}") def _parse_function_argument(self, arg: ast.arg) -> Function.Argument: loc: Location = Location.from_ast(arg) @@ -185,3 +226,6 @@ class PythonParser(ast.NodeVisitor): case _: raise UnsupportedSyntaxError(column) + + def parse_expr(self, node: ast.expr) -> Expr: + raise NotImplementedError() From 4d23e8840e0a4abdd341d0bd5737f470820a1c11 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 25 May 2026 22:06:18 +0200 Subject: [PATCH 22/27] feat(parser): adapt AST printer with new nodes --- midas/ast/printer.py | 105 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/midas/ast/printer.py b/midas/ast/printer.py index 2d38241..f467a85 100644 --- a/midas/ast/printer.py +++ b/midas/ast/printer.py @@ -350,7 +350,12 @@ class MidasPrinter(m.Expr.Visitor[str], m.Stmt.Visitor[str]): return f"{expr.name.lexeme}{template}{'?' if expr.optional else ''}" -class PythonAstPrinter(AstPrinter, p.MidasType.Visitor[None], p.Stmt.Visitor[None]): +class PythonAstPrinter( + AstPrinter, + p.MidasType.Visitor[None], + p.Stmt.Visitor[None], + p.Expr.Visitor[None], +): def visit_base_type(self, node: p.BaseType) -> None: self._write_line("BaseType") with self._child_level(): @@ -382,6 +387,9 @@ class PythonAstPrinter(AstPrinter, p.MidasType.Visitor[None], p.Stmt.Visitor[Non self._mark_last() col.accept(self) + def visit_expression_stmt(self, stmt: p.ExpressionStmt) -> None: + stmt.expr.accept(self) + def visit_function(self, stmt: p.Function) -> None: self._write_line("Function") with self._child_level(): @@ -418,3 +426,98 @@ class PythonAstPrinter(AstPrinter, p.MidasType.Visitor[None], p.Stmt.Visitor[Non with self._child_level(): self._write_line(f"name: {arg.name}") self._write_optional_child("type", arg.type, last=True) + + def visit_type_assign(self, stmt: p.TypeAssign) -> None: + self._write_line("TypeAssign") + with self._child_level(): + self._write_line(f"name: {stmt.name}") + self._write_line("type", last=True) + with self._child_level(single=True): + stmt.type.accept(self) + + def visit_assign_expr(self, expr: p.AssignExpr) -> None: + self._write_line("AssignExpr") + with self._child_level(): + self._write_line(f"name: {expr.name}") + self._write_line("value", last=True) + with self._child_level(single=True): + expr.value.accept(self) + + def visit_binary_expr(self, expr: p.BinaryExpr) -> None: + self._write_line("BinaryExpr") + with self._child_level(): + self._write_line("left") + with self._child_level(single=True): + expr.left.accept(self) + + self._write_line(f"operator: {expr.operator.__class__.__name__}") + + self._write_line("right", last=True) + with self._child_level(single=True): + expr.right.accept(self) + + def visit_unary_expr(self, expr: p.UnaryExpr) -> None: + self._write_line("UnaryExpr") + with self._child_level(): + self._write_line(f"operator: {expr.operator.__class__.__name__}") + + self._write_line("right", last=True) + with self._child_level(single=True): + expr.right.accept(self) + + def visit_call_expr(self, expr: p.CallExpr) -> None: + self._write_line("CallExpr") + with self._child_level(): + self._write_line("callee") + with self._child_level(single=True): + expr.callee.accept(self) + + self._write_line("arguments", last=True) + with self._child_level(): + for i, arg in enumerate(expr.arguments): + self._idx = i + if i == len(expr.arguments) - 1: + self._mark_last() + arg.accept(self) + + def visit_get_expr(self, expr: p.GetExpr) -> None: + self._write_line("GetExpr") + with self._child_level(): + self._write_line("object") + with self._child_level(single=True): + expr.object.accept(self) + self._write_line(f"name: {expr.name}", last=True) + + def visit_literal_expr(self, expr: p.LiteralExpr) -> None: + self._write_line("LiteralExpr") + with self._child_level(single=True): + self._write_line(f"value: {expr.value}") + + def visit_variable_expr(self, expr: p.VariableExpr) -> None: + self._write_line("VariableExpr") + with self._child_level(single=True): + self._write_line(f"name: {expr.name}") + + def visit_logical_expr(self, expr: p.LogicalExpr) -> None: + self._write_line("LogicalExpr") + with self._child_level(): + self._write_line("left") + with self._child_level(single=True): + expr.left.accept(self) + + self._write_line(f"operator: {expr.operator.__class__.__name__}") + + self._write_line("right", last=True) + with self._child_level(single=True): + expr.right.accept(self) + + def visit_set_expr(self, expr: p.SetExpr) -> None: + self._write_line("SetExpr") + with self._child_level(): + self._write_line("object") + with self._child_level(single=True): + expr.object.accept(self) + self._write_line(f"name: {expr.name}") + self._write_line("value", last=True) + with self._child_level(single=True): + expr.value.accept(self) From bbd0e3ae8d73cd034f6714af0639e29225cc8e5f Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 25 May 2026 22:14:44 +0200 Subject: [PATCH 23/27] feat(cli): update highlighter with new nodes --- midas/cli/highlighter.py | 47 ++++++++++++++++++++++++++++++++-------- midas/cli/main.py | 24 ++++++-------------- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/midas/cli/highlighter.py b/midas/cli/highlighter.py index 051ae79..45ed55c 100644 --- a/midas/cli/highlighter.py +++ b/midas/cli/highlighter.py @@ -104,7 +104,12 @@ class Highlighter(ABC): self.openings.setdefault((l + 1, 0), []).append(opening) -class PythonHighlighter(Highlighter, p.Expr.Visitor[None]): +class PythonHighlighter( + Highlighter, + p.MidasType.Visitor[None], + p.Stmt.Visitor[None], + p.Expr.Visitor[None], +): EXTRA_CSS_PATH: Optional[Path] = Path(__file__).parent / "hl_python.css" def highlight(self, node: Highlightable[PythonHighlighter]): @@ -130,15 +135,39 @@ class PythonHighlighter(Highlighter, p.Expr.Visitor[None]): for column in node.columns: column.accept(self) - def visit_function(self, node: p.Function) -> None: - self.wrap(node, "function") - for arg in node.posonlyargs + node.args + node.kwonlyargs: - arg.accept(self) + def visit_expression_stmt(self, stmt: p.ExpressionStmt) -> None: + stmt.expr.accept(self) - def visit_function_argument(self, node: p.FunctionArgument) -> None: - self.wrap(node, "argument") - if node.type is not None: - node.type.accept(self) + def visit_function(self, stmt: p.Function) -> None: + self.wrap(stmt, "function") + for arg in stmt.posonlyargs + stmt.args + stmt.kwonlyargs: + self._highlight_function_argument(arg) + + def _highlight_function_argument(self, arg: p.Function.Argument) -> None: + self.wrap(arg, "argument") + if arg.type is not None: + arg.type.accept(self) + + def visit_type_assign(self, stmt: p.TypeAssign) -> None: + stmt.type.accept(self) + + def visit_assign_expr(self, expr: p.AssignExpr) -> None: ... + + def visit_binary_expr(self, expr: p.BinaryExpr) -> None: ... + + def visit_unary_expr(self, expr: p.UnaryExpr) -> None: ... + + def visit_call_expr(self, expr: p.CallExpr) -> None: ... + + def visit_get_expr(self, expr: p.GetExpr) -> None: ... + + def visit_literal_expr(self, expr: p.LiteralExpr) -> None: ... + + def visit_variable_expr(self, expr: p.VariableExpr) -> None: ... + + def visit_logical_expr(self, expr: p.LogicalExpr) -> None: ... + + def visit_set_expr(self, expr: p.SetExpr) -> None: ... class MidasHighlighter(Highlighter, m.Stmt.Visitor[None], m.Expr.Visitor[None]): diff --git a/midas/cli/main.py b/midas/cli/main.py index 7319226..fb0e716 100644 --- a/midas/cli/main.py +++ b/midas/cli/main.py @@ -5,6 +5,7 @@ import click from midas.ast.location import Location import midas.ast.midas as m +import midas.ast.python as p from midas.ast.printer import PythonAstPrinter from midas.cli.highlighter import Highlighter, MidasHighlighter, PythonHighlighter from midas.lexer.midas import MidasLexer @@ -40,21 +41,13 @@ def dump_ast(output: Optional[TextIO], parse: bool, file: TextIO): if parse: parser = PythonParser() - parser.visit(tree) + stmts: list[p.Stmt] = parser.parse_module(tree) printer = PythonAstPrinter() dump = "" - for name, annotation in parser.annotations: - dump += f"{name} = " - if annotation is None: - dump += "None" - else: - dump += printer.print(annotation) + for stmt in stmts: + dump += printer.print(stmt) dump += "\n" - dump += "\n# Functions\n\n" - - for func in parser.functions: - dump += printer.print(func) + "\n" else: dump = ast.dump(tree, indent=4) @@ -67,13 +60,10 @@ def dump_ast(output: Optional[TextIO], parse: bool, file: TextIO): def highlight_python(source: str, path: str) -> Highlighter: tree: ast.Module = ast.parse(source, filename=path) parser = PythonParser() - parser.visit(tree) + stmts: list[p.Stmt] = parser.parse_module(tree) highlighter = PythonHighlighter(source) - for _, annotation in parser.annotations: - if annotation is not None: - highlighter.highlight(annotation) - for func in parser.functions: - highlighter.highlight(func) + for stmt in stmts: + highlighter.highlight(stmt) return highlighter From 8a9b4f39893254369b3a0863d0d98b503b4d0fe1 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 25 May 2026 22:43:38 +0200 Subject: [PATCH 24/27] feat(parser): parse assignments --- gen/python.py | 10 +++++----- midas/ast/printer.py | 14 ++++++++++---- midas/ast/python.py | 24 ++++++++++++------------ midas/cli/highlighter.py | 2 +- midas/parser/python.py | 36 +++++++++++++++++++++++++----------- 5 files changed, 53 insertions(+), 33 deletions(-) diff --git a/gen/python.py b/gen/python.py index 33a01cc..db12f42 100644 --- a/gen/python.py +++ b/gen/python.py @@ -59,15 +59,15 @@ class TypeAssign: type: MidasType +class AssignStmt: + targets: list[Expr] + value: Expr + + ###< ###> Expr | Expressions -class AssignExpr: - name: str - value: Expr - - class BinaryExpr: left: Expr operator: ast.operator diff --git a/midas/ast/printer.py b/midas/ast/printer.py index f467a85..9ac012f 100644 --- a/midas/ast/printer.py +++ b/midas/ast/printer.py @@ -435,13 +435,19 @@ class PythonAstPrinter( with self._child_level(single=True): stmt.type.accept(self) - def visit_assign_expr(self, expr: p.AssignExpr) -> None: - self._write_line("AssignExpr") + def visit_assign_stmt(self, stmt: p.AssignStmt) -> None: + self._write_line("AssignStmt") with self._child_level(): - self._write_line(f"name: {expr.name}") + self._write_line("targets") + with self._child_level(): + for i, target in enumerate(stmt.targets): + self._idx = i + if i == len(stmt.targets) - 1: + self._mark_last() + target.accept(self) self._write_line("value", last=True) with self._child_level(single=True): - expr.value.accept(self) + stmt.value.accept(self) def visit_binary_expr(self, expr: p.BinaryExpr) -> None: self._write_line("BinaryExpr") diff --git a/midas/ast/python.py b/midas/ast/python.py index 5e219c8..7ca700b 100644 --- a/midas/ast/python.py +++ b/midas/ast/python.py @@ -97,6 +97,9 @@ class Stmt(ABC): @abstractmethod def visit_type_assign(self, stmt: TypeAssign) -> T: ... + @abstractmethod + def visit_assign_stmt(self, stmt: AssignStmt) -> T: ... + @dataclass(frozen=True) class ExpressionStmt(Stmt): @@ -133,6 +136,15 @@ class TypeAssign(Stmt): return visitor.visit_type_assign(self) +@dataclass(frozen=True) +class AssignStmt(Stmt): + targets: list[Expr] + value: Expr + + def accept(self, visitor: Stmt.Visitor[T]) -> T: + return visitor.visit_assign_stmt(self) + + ############### # Expressions # ############### @@ -146,9 +158,6 @@ class Expr(ABC): def accept(self, visitor: Visitor[T]) -> T: ... class Visitor(ABC, Generic[T]): - @abstractmethod - def visit_assign_expr(self, expr: AssignExpr) -> T: ... - @abstractmethod def visit_binary_expr(self, expr: BinaryExpr) -> T: ... @@ -174,15 +183,6 @@ class Expr(ABC): def visit_set_expr(self, expr: SetExpr) -> T: ... -@dataclass(frozen=True) -class AssignExpr(Expr): - name: str - value: Expr - - def accept(self, visitor: Expr.Visitor[T]) -> T: - return visitor.visit_assign_expr(self) - - @dataclass(frozen=True) class BinaryExpr(Expr): left: Expr diff --git a/midas/cli/highlighter.py b/midas/cli/highlighter.py index 45ed55c..e9c3c4e 100644 --- a/midas/cli/highlighter.py +++ b/midas/cli/highlighter.py @@ -151,7 +151,7 @@ class PythonHighlighter( def visit_type_assign(self, stmt: p.TypeAssign) -> None: stmt.type.accept(self) - def visit_assign_expr(self, expr: p.AssignExpr) -> None: ... + def visit_assign_stmt(self, stmt: p.AssignStmt) -> None: ... def visit_binary_expr(self, expr: p.BinaryExpr) -> None: ... diff --git a/midas/parser/python.py b/midas/parser/python.py index 277a71c..95fe0c0 100644 --- a/midas/parser/python.py +++ b/midas/parser/python.py @@ -4,17 +4,17 @@ from typing import Optional from midas.ast.location import Location from midas.ast.python import ( - AssignExpr, + AssignStmt, BaseType, ConstraintType, Expr, - ExpressionStmt, FrameColumn, FrameType, Function, MidasType, Stmt, TypeAssign, + VariableExpr, ) @@ -45,11 +45,14 @@ class PythonParser: case ast.AnnAssign(): return self.parse_annotation_assign(node) + case ast.Assign(): + return self.parse_assign(node) + case ast.FunctionDef(): return self.parse_function(node) case _: - print(f"Unsupported assignment: {ast.unparse(node)}") + print(f"Unsupported statement: {ast.unparse(node)}") return None def parse_annotation_assign(self, node: ast.AnnAssign) -> list[Stmt]: @@ -73,21 +76,32 @@ class PythonParser: ) if value is not None: - parsed_value: Expr = self.parse_expr(value) statements.append( - ExpressionStmt( + AssignStmt( location=loc, - expr=AssignExpr( - location=loc, - name=target, - value=parsed_value, - ), - ) + targets=[ + VariableExpr( + location=Location.from_ast(node.target), name=target + ), + ], + value=self.parse_expr(value), + ), ) case _: print(f"Unsupported annotation: {ast.unparse(node)}") return statements + def parse_assign(self, node: ast.Assign) -> AssignStmt: + targets: list[Expr] = [] + for target in node.targets: + targets.append(self.parse_expr(target)) + value: Expr = self.parse_expr(node.value) + return AssignStmt( + location=Location.from_ast(node), + targets=targets, + value=value, + ) + def parse_function(self, node: ast.FunctionDef) -> Function: loc: Location = Location.from_ast(node) match node: From 0b3f33d7fe3ec47ab133a3eb22190a340eb2d7db Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 25 May 2026 23:17:52 +0200 Subject: [PATCH 25/27] feat(parser): parse python expressions --- gen/python.py | 6 +++ midas/ast/printer.py | 25 ++++++++- midas/ast/python.py | 14 +++++ midas/cli/highlighter.py | 2 + midas/parser/python.py | 110 ++++++++++++++++++++++++++++++++++++--- 5 files changed, 150 insertions(+), 7 deletions(-) diff --git a/gen/python.py b/gen/python.py index db12f42..9bf984d 100644 --- a/gen/python.py +++ b/gen/python.py @@ -74,6 +74,12 @@ class BinaryExpr: right: Expr +class CompareExpr: + left: Expr + operator: ast.cmpop + right: Expr + + class UnaryExpr: operator: ast.unaryop right: Expr diff --git a/midas/ast/printer.py b/midas/ast/printer.py index 9ac012f..e3ecde9 100644 --- a/midas/ast/printer.py +++ b/midas/ast/printer.py @@ -462,6 +462,19 @@ class PythonAstPrinter( with self._child_level(single=True): expr.right.accept(self) + def visit_compare_expr(self, expr: p.CompareExpr) -> None: + self._write_line("CompareExpr") + with self._child_level(): + self._write_line("left") + with self._child_level(single=True): + expr.left.accept(self) + + self._write_line(f"operator: {expr.operator.__class__.__name__}") + + self._write_line("right", last=True) + with self._child_level(single=True): + expr.right.accept(self) + def visit_unary_expr(self, expr: p.UnaryExpr) -> None: self._write_line("UnaryExpr") with self._child_level(): @@ -478,7 +491,7 @@ class PythonAstPrinter( with self._child_level(single=True): expr.callee.accept(self) - self._write_line("arguments", last=True) + self._write_line("arguments") with self._child_level(): for i, arg in enumerate(expr.arguments): self._idx = i @@ -486,6 +499,16 @@ class PythonAstPrinter( self._mark_last() arg.accept(self) + self._write_line("keywords", last=True) + with self._child_level(): + for i, (name, arg) in enumerate(expr.keywords.items()): + self._idx = i + if i == len(expr.keywords) - 1: + self._mark_last() + self._write_line(name) + with self._child_level(single=True): + arg.accept(self) + def visit_get_expr(self, expr: p.GetExpr) -> None: self._write_line("GetExpr") with self._child_level(): diff --git a/midas/ast/python.py b/midas/ast/python.py index 7ca700b..d4fc032 100644 --- a/midas/ast/python.py +++ b/midas/ast/python.py @@ -161,6 +161,9 @@ class Expr(ABC): @abstractmethod def visit_binary_expr(self, expr: BinaryExpr) -> T: ... + @abstractmethod + def visit_compare_expr(self, expr: CompareExpr) -> T: ... + @abstractmethod def visit_unary_expr(self, expr: UnaryExpr) -> T: ... @@ -193,6 +196,16 @@ class BinaryExpr(Expr): return visitor.visit_binary_expr(self) +@dataclass(frozen=True) +class CompareExpr(Expr): + left: Expr + operator: ast.cmpop + right: Expr + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_compare_expr(self) + + @dataclass(frozen=True) class UnaryExpr(Expr): operator: ast.unaryop @@ -206,6 +219,7 @@ class UnaryExpr(Expr): class CallExpr(Expr): callee: Expr arguments: list[Expr] + keywords: dict[str, Expr] def accept(self, visitor: Expr.Visitor[T]) -> T: return visitor.visit_call_expr(self) diff --git a/midas/cli/highlighter.py b/midas/cli/highlighter.py index e9c3c4e..f4801bb 100644 --- a/midas/cli/highlighter.py +++ b/midas/cli/highlighter.py @@ -155,6 +155,8 @@ class PythonHighlighter( def visit_binary_expr(self, expr: p.BinaryExpr) -> None: ... + def visit_compare_expr(self, expr: p.CompareExpr) -> None: ... + def visit_unary_expr(self, expr: p.UnaryExpr) -> None: ... def visit_call_expr(self, expr: p.CallExpr) -> None: ... diff --git a/midas/parser/python.py b/midas/parser/python.py index 95fe0c0..4b6a3f1 100644 --- a/midas/parser/python.py +++ b/midas/parser/python.py @@ -6,14 +6,22 @@ from midas.ast.location import Location from midas.ast.python import ( AssignStmt, BaseType, + BinaryExpr, + CallExpr, + CompareExpr, ConstraintType, Expr, + ExpressionStmt, FrameColumn, FrameType, Function, + GetExpr, + LiteralExpr, + LogicalExpr, MidasType, Stmt, TypeAssign, + UnaryExpr, VariableExpr, ) @@ -33,11 +41,15 @@ class PythonParser: def parse_module(self, node: ast.Module) -> list[Stmt]: statements: list[Stmt] = [] for stmt in node.body: - parsed: None | Stmt | list[Stmt] = self.parse_stmt(stmt) - if isinstance(parsed, Stmt): - statements.append(parsed) - elif parsed is not None: - statements.extend(parsed) + try: + parsed: None | Stmt | list[Stmt] = self.parse_stmt(stmt) + if isinstance(parsed, Stmt): + statements.append(parsed) + elif parsed is not None: + statements.extend(parsed) + except UnsupportedSyntaxError as e: + print(f"{e}, skipping") + continue return statements def parse_stmt(self, node: ast.stmt) -> None | Stmt | list[Stmt]: @@ -51,6 +63,9 @@ class PythonParser: case ast.FunctionDef(): return self.parse_function(node) + case ast.Expr(value=expr): + return ExpressionStmt(expr=self.parse_expr(expr)) + case _: print(f"Unsupported statement: {ast.unparse(node)}") return None @@ -242,4 +257,87 @@ class PythonParser: raise UnsupportedSyntaxError(column) def parse_expr(self, node: ast.expr) -> Expr: - raise NotImplementedError() + match node: + case ast.BoolOp(): + return self.parse_bool_op(node) + + case ast.BinOp(left=left, op=op, right=right): + return BinaryExpr( + left=self.parse_expr(left), + operator=op, + right=self.parse_expr(right), + ) + + case ast.UnaryOp(op=op, operand=right): + return UnaryExpr( + operator=op, + right=self.parse_expr(right), + ) + + case ast.Compare(): + return self.parse_compare(node) + + case ast.Call(): + return self.parse_call(node) + + case ast.Constant(value=value): + return LiteralExpr(value=value) + + case ast.Attribute(value=object, attr=name): + return GetExpr( + object=self.parse_expr(object), + name=name, + ) + + case ast.Name(id=name): + return VariableExpr(name=name) + + case _: + raise UnsupportedSyntaxError(node) + + def parse_bool_op(self, node: ast.BoolOp) -> LogicalExpr: + op: ast.boolop = node.op + values: list[ast.expr] = node.values + expr: LogicalExpr = LogicalExpr( + left=self.parse_expr(values[0]), + operator=op, + right=self.parse_expr(values[1]), + ) + for value in values[2:]: + expr = LogicalExpr( + left=expr, + operator=op, + right=self.parse_expr(value), + ) + return expr + + def parse_compare(self, node: ast.Compare) -> Expr: + ops: list[ast.cmpop] = node.ops + rights: list[Expr] = [self.parse_expr(expr) for expr in node.comparators] + expr: Expr = CompareExpr( + left=self.parse_expr(node.left), + operator=ops[0], + right=rights[0], + ) + for i, right in enumerate(rights[1:]): + expr = LogicalExpr( + left=expr, + operator=ast.And(), + right=CompareExpr( + left=rights[i], + operator=ops[i], + right=right, + ), + ) + return expr + + def parse_call(self, node: ast.Call) -> CallExpr: + return CallExpr( + callee=self.parse_expr(node.func), + arguments=[self.parse_expr(arg) for arg in node.args], + keywords={ + arg.arg: self.parse_expr(arg.value) + for arg in node.keywords + if arg.arg is not None # Should always be True, type checker happy + }, + ) From 170101aa37143b55e35b602178a74f2ef5858bc3 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Tue, 26 May 2026 10:12:59 +0200 Subject: [PATCH 26/27] fix(parser): add call keywords attribute in gen definition --- gen/python.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gen/python.py b/gen/python.py index 9bf984d..0aadd57 100644 --- a/gen/python.py +++ b/gen/python.py @@ -88,6 +88,7 @@ class UnaryExpr: class CallExpr: callee: Expr arguments: list[Expr] + keywords: dict[str, Expr] class GetExpr: From 6524b3591a631a9f551cb0604dbec6a72634a616 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Tue, 26 May 2026 10:14:23 +0200 Subject: [PATCH 27/27] feat(cli): highlight midas keywords --- midas/cli/highlight.css | 4 ++++ midas/cli/main.py | 15 ++++++++++----- midas/lexer/keyword.py | 12 ------------ midas/lexer/midas.py | 3 +-- midas/lexer/token.py | 16 ++++++++++++++++ 5 files changed, 31 insertions(+), 19 deletions(-) delete mode 100644 midas/lexer/keyword.py diff --git a/midas/cli/highlight.css b/midas/cli/highlight.css index 1abed08..31f005d 100644 --- a/midas/cli/highlight.css +++ b/midas/cli/highlight.css @@ -50,4 +50,8 @@ span { --border: 2px; z-index: 10; } + + &.keyword { + color: rgb(211, 72, 9); + } } \ No newline at end of file diff --git a/midas/cli/main.py b/midas/cli/main.py index fb0e716..11d69e0 100644 --- a/midas/cli/main.py +++ b/midas/cli/main.py @@ -1,11 +1,12 @@ import ast +from dataclasses import dataclass from typing import Optional, TextIO import click -from midas.ast.location import Location import midas.ast.midas as m import midas.ast.python as p +from midas.ast.location import Location from midas.ast.printer import PythonAstPrinter from midas.cli.highlighter import Highlighter, MidasHighlighter, PythonHighlighter from midas.lexer.midas import MidasLexer @@ -73,20 +74,24 @@ def highlight_midas(source: str, path: str) -> Highlighter: parser = MidasParser(tokens) stmts: list[m.Stmt] = parser.parse() highlighter = MidasHighlighter(source) + for err in parser.errors: + print(err.get_report()) + @dataclass(frozen=True) class LocatableToken: - def __init__(self, token: Token): - self.token: Token = token + token: Token @property def location(self) -> Location: return self.token.get_location() + for stmt in stmts: + highlighter.highlight(stmt) for token in tokens: if token.type == TokenType.COMMENT: highlighter.wrap(LocatableToken(token), "comment") - for stmt in stmts: - highlighter.highlight(stmt) + elif token.is_keyword: + highlighter.wrap(LocatableToken(token), "keyword") return highlighter diff --git a/midas/lexer/keyword.py b/midas/lexer/keyword.py deleted file mode 100644 index 878f8cd..0000000 --- a/midas/lexer/keyword.py +++ /dev/null @@ -1,12 +0,0 @@ -from midas.lexer.token import TokenType - -KEYWORDS: dict[str, TokenType] = { - "type": TokenType.TYPE, - "op": TokenType.OP, - "predicate": TokenType.PREDICATE, - "extend": TokenType.EXTEND, - "where": TokenType.WHERE, - "true": TokenType.TRUE, - "false": TokenType.FALSE, - "none": TokenType.NONE, -} diff --git a/midas/lexer/midas.py b/midas/lexer/midas.py index fe521ce..acc97d6 100644 --- a/midas/lexer/midas.py +++ b/midas/lexer/midas.py @@ -1,6 +1,5 @@ from midas.lexer.base import Lexer -from midas.lexer.keyword import KEYWORDS -from midas.lexer.token import TokenType +from midas.lexer.token import KEYWORDS, TokenType class MidasLexer(Lexer): diff --git a/midas/lexer/token.py b/midas/lexer/token.py index 052d8a6..a518a8b 100644 --- a/midas/lexer/token.py +++ b/midas/lexer/token.py @@ -58,6 +58,18 @@ class TokenType(Enum): NEWLINE = auto() +KEYWORDS: dict[str, TokenType] = { + "type": TokenType.TYPE, + "op": TokenType.OP, + "predicate": TokenType.PREDICATE, + "extend": TokenType.EXTEND, + "where": TokenType.WHERE, + "true": TokenType.TRUE, + "false": TokenType.FALSE, + "none": TokenType.NONE, +} + + @dataclass(frozen=True) class Token: """A scanned token""" @@ -86,3 +98,7 @@ class Token: def location_to(self, to: Token) -> Location: return Location.span(self.get_location(), to.get_location()) + + @property + def is_keyword(self) -> bool: + return self.lexeme in KEYWORDS