38 Commits

Author SHA1 Message Date
16239db479 feat(gen): add tuple expr to generator 2026-06-29 22:44:39 +02:00
dc2134c87d tests: update with multi-parameter generics 2026-06-29 22:43:08 +02:00
89f3c945e4 fix: minor fixes 2026-06-29 22:41:47 +02:00
45f7d1be2b feat: add Python tuple expression 2026-06-29 14:35:31 +02:00
27f3fa7d1e feat: handle multi-parameter generic in Python 2026-06-29 14:24:38 +02:00
78eba39ae3 feat(checker): add len() 2026-06-29 14:02:52 +02:00
3b78b37306 fix(checker): allow some assignments to unknown 2026-06-29 14:02:29 +02:00
9e14b30bc9 feat(checker): add methods on str 2026-06-29 14:01:33 +02:00
a6a1075f91 feat(checker): type check tuple instantiation in Midas 2026-06-29 14:00:37 +02:00
11be47fce3 fix(parser): parse empty calls 2026-06-29 13:59:03 +02:00
2eeede9826 fix(gen): prevent empty loop for column asserts 2026-06-29 11:19:26 +02:00
f796f4c6fa fix(checker): allow iterating on unknown 2026-06-29 11:13:47 +02:00
c333735580 fix(checker): allow subtypes and unknown as if test 2026-06-29 11:06:35 +02:00
2416102494 feat(gen): assertions for column values 2026-06-29 11:05:59 +02:00
eb4971686a fix(checker): allow calling unknown method on dataframes 2026-06-29 11:01:53 +02:00
9f59366289 feat(gen): generate asserts for dataframes and columns 2026-06-26 14:56:15 +02:00
fd0b410d74 fix(checker): change heterogeneous errors to warnings 2026-06-26 11:55:31 +02:00
5b0c5c01ad feat(checker): add mean method on frames 2026-06-26 11:21:38 +02:00
43e40396a1 fix(checker): type check None literal 2026-06-26 11:21:17 +02:00
0d265ef24c feat(checker): lookup dunders on dataframes 2026-06-26 10:35:50 +02:00
88c56c9d15 tests: update with reordered argument typing 2026-06-26 10:28:12 +02:00
d1c217a335 refactor: use metaclass to collect frame methods 2026-06-25 22:31:59 +02:00
5b3e87afcb refactor: add MethodResolver class 2026-06-25 22:14:25 +02:00
894d5a7196 feat: add dummy classes for typing frames and columns 2026-06-25 21:35:47 +02:00
eb809c6341 fix(checker): improve heterogeneous error message 2026-06-25 21:35:19 +02:00
bd68d1003f feat(checker): lookup dataframe methods 2026-06-25 21:34:59 +02:00
72c9236650 feat(checker): defined add method of dataframes 2026-06-25 21:34:00 +02:00
90051c7981 feat(checker): add structural subtyping rule for dataframes 2026-06-25 21:09:14 +02:00
dd1e2e693c feat(cli): print context for multiline diagnostics 2026-06-25 16:32:15 +02:00
78e10e0895 feat(checker): process frame type definitions 2026-06-24 14:36:53 +02:00
c81e4a9560 feat(cli): add frame type to highlighter 2026-06-24 14:36:53 +02:00
6d0cf1a055 feat(parser): add frame type to midas syntax 2026-06-24 14:36:52 +02:00
cc5e7af143 feat(gen): add support for tuples and dataframes 2026-06-24 14:36:51 +02:00
3bdbc80079 feat(checker): handle setting dataframe column 2026-06-24 14:36:51 +02:00
c1b5284f72 feat(checker): type check subscript on dataframes 2026-06-24 14:36:28 +02:00
5e9ccd4e13 feat(types): add TupleType 2026-06-24 14:36:04 +02:00
cf083fc0c3 fix(types): add str methods to dataframe types 2026-06-24 14:35:31 +02:00
a80da5db2c feat(types): add DataFrameType and ColumnType 2026-06-24 14:35:30 +02:00
38 changed files with 1940 additions and 1862 deletions

View File

@@ -1,117 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
width="128"
height="128"
viewBox="0 0 128 128"
version="1.1"
id="svg1"
inkscape:export-filename="logo.png"
inkscape:export-xdpi="96"
inkscape:export-ydpi="96"
inkscape:version="1.4.4 (1:1.4.4+202605061436+dcaf3e7d9e)"
sodipodi:docname="logo.svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<sodipodi:namedview
id="namedview1"
pagecolor="#ffffff"
bordercolor="#000000"
borderopacity="0.25"
inkscape:showpageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1"
inkscape:document-units="mm"
showgrid="true"
inkscape:zoom="1.9332778"
inkscape:cx="-8.2760999"
inkscape:cy="112.2446"
inkscape:window-width="2584"
inkscape:window-height="1028"
inkscape:window-x="0"
inkscape:window-y="24"
inkscape:window-maximized="1"
inkscape:current-layer="layer1">
<inkscape:grid
id="grid1"
units="px"
originx="0"
originy="0"
spacingx="4"
spacingy="4"
empcolor="#0099e5"
empopacity="0.30196078"
color="#0099e5"
opacity="0.14901961"
empspacing="4"
enabled="true"
visible="true" />
</sodipodi:namedview>
<defs
id="defs1">
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4689"
id="linearGradient1478"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.562541,0,0,0.567972,-9.399749,-5.305317)"
x1="26.648937"
y1="20.603781"
x2="135.66525"
y2="114.39767" />
<linearGradient
id="linearGradient4689">
<stop
style="stop-color:#e1be1e;stop-opacity:1;"
offset="0"
id="stop4691" />
<stop
style="stop-color:#ffeb82;stop-opacity:1;"
offset="1"
id="stop4693" />
</linearGradient>
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4671"
id="linearGradient1475"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.562541,0,0,0.567972,-9.399749,-5.305317)"
x1="150.96111"
y1="192.35176"
x2="112.03144"
y2="137.27299" />
<linearGradient
id="linearGradient4671">
<stop
style="stop-color:#ffdc21;stop-opacity:1;"
offset="0"
id="stop4673" />
<stop
style="stop-color:#ffeb82;stop-opacity:1;"
offset="1"
id="stop4675" />
</linearGradient>
</defs>
<g
inkscape:label="Calque 1"
inkscape:groupmode="layer"
id="layer1">
<g
id="g1"
transform="translate(2.911719,3.414527)">
<path
style="fill:url(#linearGradient1478);fill-opacity:1"
d="m 60.510156,6.3979729 c -4.583653,0.021298 -8.960939,0.4122177 -12.8125,1.09375 C 36.35144,9.4962267 34.291407,13.691825 34.291406,21.429223 v 10.21875 h 26.8125 v 3.40625 h -26.8125 -10.0625 c -7.792459,0 -14.6157592,4.683717 -16.7500002,13.59375 -2.46182,10.212966 -2.5710151,16.586023 0,27.25 1.9059283,7.937852 6.4575432,13.593748 14.2500002,13.59375 h 9.21875 v -12.25 c 0,-8.849902 7.657144,-16.656248 16.75,-16.65625 h 26.78125 c 7.454951,0 13.406253,-6.138164 13.40625,-13.625 v -25.53125 c 0,-7.266339 -6.12998,-12.7247775 -13.40625,-13.9375001 -4.605987,-0.7667253 -9.385097,-1.1150483 -13.96875,-1.09375 z m -14.5,8.2187501 c 2.769547,0 5.03125,2.298646 5.03125,5.125 -2e-6,2.816336 -2.261703,5.09375 -5.03125,5.09375 -2.779476,-1e-6 -5.03125,-2.277415 -5.03125,-5.09375 -1e-6,-2.826353 2.251774,-5.125 5.03125,-5.125 z"
id="path1948" />
<path
style="fill:url(#linearGradient1475);fill-opacity:1"
d="m 91.228906,35.054223 v 11.90625 c 0,9.230755 -7.825895,16.999999 -16.75,17 h -26.78125 c -7.335833,0 -13.406249,6.278483 -13.40625,13.625 v 25.531247 c 0,7.26634 6.318588,11.54032 13.40625,13.625 8.487331,2.49561 16.626237,2.94663 26.78125,0 6.750155,-1.95439 13.406253,-5.88761 13.40625,-13.625 V 92.897973 h -26.78125 v -3.40625 h 26.78125 13.406254 c 7.79246,0 10.69625,-5.435408 13.40624,-13.59375 2.79933,-8.398886 2.68022,-16.475776 0,-27.25 -1.92578,-7.757441 -5.60387,-13.59375 -13.40624,-13.59375 z m -15.0625,64.65625 c 2.779478,3e-6 5.03125,2.277417 5.03125,5.093747 -2e-6,2.82635 -2.251775,5.125 -5.03125,5.125 -2.76955,0 -5.03125,-2.29865 -5.03125,-5.125 2e-6,-2.81633 2.261697,-5.093747 5.03125,-5.093747 z"
id="path1950" />
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 4.7 KiB

View File

@@ -1,697 +0,0 @@
//#import "@preview/codly:1.3.0": codly, codly-init
// Fix unaligned highlights in v0.15.0 ()
// See https://github.com/Dherse/codly/pull/132
#import "@local/codly:1.3.1": codly, codly-init
#import "@preview/codly-languages:0.1.10": codly-languages
#import "template.typ": TODO, project
#import "@preview/gentle-clues:1.3.1" as gc
#let midas-version = toml("../pyproject.toml").project.version
#let head-ref = read("../.git/HEAD").split(":").at(1).trim()
#let commit-hash = read("../.git/" + head-ref).slice(0, 8)
#show: project.with(
title: [Midas User Manual],
author: "Louis Heredero",
version: midas-version,
hash: commit-hash,
icon-path: path("../assets/icon.svg"),
)
#show: codly-init
#codly(
languages: codly-languages
+ (
midas: (
name: "Midas",
color: rgb("#eedd47"),
icon: box(
image(
"../assets/icon.svg",
height: 130%,
fit: "contain",
),
),
),
),
)
= Introduction
Python is a very popular programming language, especially in data sciences.
However, it has been designed for simplicity, distancing itself from typed languages such as Java or C to embrace dynamic typing.
What this means is that in Python, type checks are deferred to runtime when operations are concretely executed.
For developers, it might seem like a great way of simplifying the language and making it very flexible, but it does come with a cost.
Indeed, type errors are very easy to make in Python. While passing an integer where a string is expected might not be an issue in some cases, these are the sort of thing that can cause crashes or incorrect results without a clear diagnostic to help the user fix it.
Fortunately, developers using IDEs or properly configured text editors can benefit from external type checkers such as MyPy which will perform static type analysis of their Python code. Some can also be configured to be very strict, forcing the user to make the whole code typeable statically, thus avoiding any runtime type errors.
This is not the end of the problem though. Some parts of a program, especially in data related fields, may not be available at "compile-time". For example, a dataset can be loaded from an external file, or data can be fetched from an API, with no guarantees of having the expected format when analyzing the code statically.
In turn, that can cause a range of loud and silent errors at runtime. A malformed number will probably crash the program when trying to convert it, but a NaN in a series of value might just produce wrong results without any exception. Combine this with often long-running data-processing pipelines and this is how developers can waste hours of precious computation time.
Midas is a type system which can be used on top of Python to provide better type checking capabilities and gradual typing.
It aims at providing optional but strict type annotations and casting operations which can produce runtime assertions. It also allows the user to define dependent types with value constraints that are translated into runtime checks.
= Installation
Midas comes as a very light Python package that you can install on your system in a few simple steps.
== Requirements
Here below are the requirements for installing Midas. All Python dependencies will be installed by `uv` in the installation process described in @install-steps.
- Python 3.11+
- `uv`
== Steps <install-steps>
1. Clone the repository
```bash
git clone https://git.kb28.ch/HEL/midas.git
```
2. Navigate inside the directory
```bash
cd midas
```
3. Install Midas as a tool in your local user space
```bash
uv tool install .
```
And that's it ! You can now use Midas commands anywhere, like this:
```bash
midas --help
```
= Quick Start
This chapter will give you the keys to quickly start using Midas in your project.
== Defining custom types
To begin with, you might want to define some custom types for your project, to avoid handling anonymous float values everywhere. To do so, create a `*.midas` file in your project, and write some definitions for your types. See @midas-ref for more information on syntax and features.
@qs-midas shows a simple example of what it might look like.
#codly(header: [types.midas])
#figure(
```midas
type Meter = float
extend Meter {
def __add__: fn(Meter, /) -> Meter
def __sub__: fn(Meter, /) -> Meter
}
type Coordinate = object
extend Coordinate {
prop x: Meter
prop y: Meter
}
```,
caption: [Example Midas type definitions],
) <qs-midas>
You can check for any syntax error using the following command:
```bash
midas validate types.midas
```
When you are happy with your definitions, you can generate Python stubs to use in your source code. This allows other type checkers like MyPy to recognize your custom types and avoid reporting them as undefined. It can also help catch some type errors in your IDE.
```bash
midas stubs types.midas -o stubs.pyi
```
This command will generate a file as shown in @qs-stubs, providing stub classes to represent the type lattice including methods and properties.
#codly(header: [stubs.pyi])
#figure(
```pyi
from __future__ import annotations
class Meter(float):
def __add__(self, _0: Meter, /) -> Meter: ...
def __sub__(self, _0: Meter, /) -> Meter: ...
class Coordinate(object):
x: Meter
y: Meter
```,
caption: [Generated stubs from example definitions of @qs-midas],
) <qs-stubs>
== Using Midas in Python
You can now write your Python program as you would normally. You can import your custom types from the generated stubs file and use them in type annotations.
You can also import the `cast` and `unsafe_cast` functions from `midas.typing` to explicitly cast a value to a specific type (see @cast for more information).
An example Python script is shown in @qs-python, demonstrating how you can use custom types in type annotations. Notice the comments describing errors that will be caught by the type checker in @qs-type-checking.
#codly(header: [script.py])
#figure(
```python
from lib import load_coordinate
from midas.typing import cast
from stubs import Coordinate, Meter
p1 = cast(Coordinate, load_coordinate(0))
p2 = cast(Coordinate, load_coordinate(1))
diff_x = p2.x - p1.x
diff_y = p2.y - p1.y
dist = diff_x + diff_y
p2.x += cast(Meter, 1)
p2.y = True # invalid, wrong type
p2.z = 3 # invalid, no property 'z' on Coordinate
p2.x.a = 3 # invalid, no properties on Meter
```,
caption: [Example Python script],
) <qs-python>
== Type checking <qs-type-checking>
Now that you have defined some types and written a script, you can run the type checker with the following command. You can also skip this step and directly run the compilation command in @qs-compilation.
```bash
midas check -t types.midas script.py
```
== Compiling <qs-compilation>
The final step is to compile your code. This step will produce a runnable Python script, including runtime assertions generated by `cast` expressions.
```bash
midas compile -t types.midas script.py
python3 build/midas/script.py
```
= Midas Language Reference <midas-ref>
In this chapter, you will find a complete reference for the Midas definition language.
A `*.midas` file contains a number of statements, which can be:
- *`type`* statements (see @type-stmt): to define a new type
- *`extend`* statements (see @extend-stmt): to define member of a type
- *`predicate`* statements (see @predicate-stmt): to define named predicates that can be used in constraint types
== Type Statement <type-stmt>
A *`type`* statement lets you define a new type. It requires a unique name and base type.
The simplest form of a *`type`* statement is:
#figure(
```midas
type MyType = float
```,
caption: [Simple `type` statement declaring a new type "`MyType`" as a subtype of `float`],
) <midas-simple-alias>
This statement defines a new type called `MyType` which is a subtype of `float`. `MyType` is a `float` but a `float` is not necessarily `MyType`.
=== Builtin / base types
A number of base types are provided out of the box, which can be used to derive other types.
They correspond to Python's builtin types:
```py object```,
```py str```,
```py float```,
```py int```,
```py bool```,
```py list```,
```py dict```,
```py None```.
Some differences are to be noted however.
1. ```py bool``` is not a subtype of ```py int```
2. ```py list``` are homogeneous, i.e. all items must be of the same type
3. ```py dict``` keys and values are homogeneous, i.e. all keys must be of the same type and all values must be of the same type (can be different from keys).
=== Function types
A function type is written in a similar notation to Python function definitions:
#figure(
```midas
type Repeater = fn(text: str, count: int) -> str
```,
caption: [Simple function type definition],
)
Midas supports positional-only, keyword-only and mixed arguments (using the `/` and `*` separators). You may omit the name of positional-only arguments. The return type is required.
Optional parameters can be indicated by adding a question mark (`?`) after their type:
#figure(
```midas
type Repeater = fn(text: str, count: int, *, sep: str?) -> str
```,
caption: [Function type definition with an optional keyword-only parameter],
)
#gc.warning[
Sink arguments (`*args`, `**kwargs`) are not currently supported.
]
=== Constraint types
A useful feature provided by Midas is the possibility to combine types with custom value constraints. For example, you might want to define a type for positive amounts of money:
#figure(
```midas
type Money = float
type Income = Money where _ >= 0
```,
caption: [Simple constraint type definition],
)
Constraints can be combined with any type using the `where` keyword, followed by a constraint expression (see @constraint-expr).
=== Generic types
For more complex types, you might want to use type parameters. For example, to define a container, we might write:
#figure(
```midas
type Container[T] = object
```,
caption: [Simple generic container type definition],
)
To better refine a generic type, you can also bound type parameters using the following syntax:
#figure(
```midas
type Container[T <: float] = object
```,
caption: [Generic container type definition with a bound],
)
This can be read as "`Container` is a generic type which takes one type parameter `T` that must be a subtype of `float`".
You can use a generic type, i.e. instantiate it, by using a similar syntax with concrete type as arguments:
#figure(
```midas
type MyContainer = Container[MyType]
```,
caption: [Application of a generic type],
)
Generic types can also take multiple parameters, which are then separated by commas:
#figure(
```midas
type ZipCodeRegistry = dict[int, str]
```,
caption: [Application of a multi-parameter generic type],
)
The _body_ of a generic type, i.e. the right-hand side of the definition, can contain or even be equal to any number of its parameters.#footnote[The latter is not something that is expressible in standard Python, yet it brings a semantic distinction on top of structurally equivalent values.] For example, the following is a valid type statement:
#figure(
```midas
type Price[T <: Currency] = T where _ > 0
```,
caption: [Type parameters in a generic type's body],
)
#pagebreak()
== Extend Statement <extend-stmt>
Type statements allow you to define new types, kind of like type aliases. However, a type might have properties or methods of its own. These might override those of the parent type or be brand new members.
This is where the `extend` statement comes into play. It allows defining members on a given type. Members can either be properties (`prop`) or methods (`def`). The only difference between the two is that methods must be functions and can be overloaded.
Here is a simple example showing how to define a property and a method on a custom type:
#figure(
```midas
type MyType = float
extend MyType {
prop norm: float
def double: fn() -> MyType
}
```,
caption: [Simple `extend` statement defining a property and a method],
)
An `extend` statement can appear anywhere after the type it extends has been defined.
You may want to override Python's dunder methods to implement type checking for some basic operators, like `__add__` for the `+` operator.
#figure(
```midas
type Money = float
extend Money {
def __add__(Money, /) -> Money
def __mul__(float, /) -> Money
}
```,
caption: [Simple `extend` statement overriding some dunder methods],
)
When extending generic type, you must specify the whole type, including its parameter(s):
#figure(
```midas
type Container[T <: float] = object
extend Container[T <: float] {
prop content: T
def set_content: fn(content: T) -> None
}
```,
caption: [Generic `extend` statement using type parameters in the declared members],
)
#pagebreak()
== Predicate Statement <predicate-stmt>
A *`predicate`* statement lets you define a named constraint expression, like a function, which can then be used in other constraint expressions (either in other predicate statements or in constraint types). See @constraint-expr for more information about the syntax of constraint expressions.
The left-hand side of a predicate statement is written as a function signature, without a return type. The right-hand side is a constraint expression. For example:
#figure(
```midas
predicate is_positive(v: float) = v >= 0
```,
caption: [Simple `predicate` statement defining an `is_positive` predicate],
)
The left-hand side can also be curried to allow partial application. For example:
#figure(
```midas
predicate in_range(mn: float, mx: float)(v: float) = mn <= v & v <= mx
predicate is_ratio = in_range(0.0, 1.0)
```,
caption: [Curried `predicate` statement and partial application],
) <midas-predicate-partial>
Notice that the second predicate statement doesn't take any parameters. This is simply a partial application of another predicate, kind of like an alias. You can use it in other expressions to finalize the call:
#figure(
```midas
type Efficiency = float where is_ratio(_)
```,
caption: [Constraint type definition using the partially applied predicate from @midas-predicate-partial],
)
Of course you can also directly call `in_range`:
#figure(
```midas
type Efficiency = float where in_range(0.0, 1.0)(_)
```,
caption: [Full call of curried predicate from @midas-predicate-partial],
)
When compiled, named predicates are translated to Python functions which are used in runtime assertions. Only predicates that are referenced are compiled.
#pagebreak()
== Constraint Expressions <constraint-expr>
*Constraint expressions* are Python-like expressions which can appear in *`predicate`* statements or in constraint types.
They can contain comparisons, simple computations, logical operations and must evaluate to a boolean value.
Context is quite restricted inside these expressions. You can only reference some builtin functions, such as type constructors (`float(...)`, `str(...)`, etc.), parameters of predicate statements, and named predicates. In constraint type, the special variable `_` can be used to reference the value targeted by the type. For example:
#figure(
```midas
predicate not_nan(v: float) = v != float("nan")
type RealFloat = float where not_nan(_)
```,
caption: [Example constraint expressions],
) <ex-constraint-expr>
In the predicate statement (@ex-constraint-expr:1), we reference the parameter `v` and the builtin `float` function.
In the constraint type definition (@ex-constraint-expr:2), we then reference the named predicate `not_nan`, passing the value targeted by the type itself ( `_` )
= Supported Python Syntax <python-ref>
Midas integrates naturally in Python via type annotations. Through generated stubs, even other type checker can detect your custom types (see @cmd-stubs).
It has been designed to leave the user free of typing any amount of their code but be strict about the parts that are annotated. By default, any untyped Python expression is assigned `UnknownType`.
Any operation is permitted on `UnknownType` and will result in `UnknownType` values.
The moment an expression can be typed, that be thanks to an annotation or a literal value, the type checker kicks in and will validate your statements.
Because Python is very flexible language with many features, some expressions and statements might be more complex to properly type check, thus only a subset of the Python language is fully supported. This chapter lists all supported features of Python and how they affect type checking.
Some examples are presented in the following sections in the form of code blocks. Highlights in the code blocks indicate the type assigned to each expression by the type checker. Some types may be omitted for readability. For example:
#codly(
highlights: (
(
line: 1,
start: 5,
fill: green,
tag: [_int_],
),
(
line: 2,
start: 7,
end: 7,
fill: green,
tag: [_int_],
),
),
)
```python
v = 3
print(v)
```
== Literals
Literal Python values are type checked using builtin types. Lists and dictionaries of literals are also typed liked literals. This does not include comprehension lists/dicts (```py [. for . in .]```), nor formatted strings (```py f"..."```). @supported-literals shows the list of supported literal values and their type.
#let supported-literals = table(
columns: 2,
table.header[*Example value*][*Judged Type*],
```py 42```, ```py int```,
```py 3.14```, ```py float```,
```py True```, ```py bool```,
```py "Midas"```, ```py str```,
```py None```, ```py None```,
```py [1, 2, 3]```, ```py list[int]```,
```py {1: "One", 2: "Two"}```, ```py dict[int, str]```,
```py ("1", 1, True)```, ```py tuple[str, int, bool]```,
)
#figure(
supported-literals,
caption: [Supported literal values and their judged types],
) <supported-literals>
== Assignments
Variable assignments allow assigning a new value to a variable. For the type checker, this implies two things:
1. If the variable was not already declared in the current scope, it is declared at that point with the type of the right-hand side expression
2. If the variable was already declared, the type of the right-hand side expression is checked against the declared type of the variable. Only a subtype of the variable's type can be assigned to it
Once a variable has been given a type, it cannot be changed in the same scope.
The walrus operator (```py :=```) is not currently supported.
A simple annotation declaration, without assigning a value, is enough to declare a variable. For example:
#figure(
```python
var: float
```,
caption: [Bare Python variable annotation without assignment],
)
Because unpacking is not supported, assigning to multiple values is also not handled by the type checker.
== Arithmetic
- All basic binary operators are supported, through dunder methods.
- All comparison operators except ```py in``` are supported.
- All unary operators are supported (`+`, `-`, `~`).
- All logical operators are supported (```py and```, ```py or```, ```py not```).
== Ternary operator
The ternary operator ```py . if . else .``` is supported. As for `if` statements (see @if-else), the test expression must be a boolean. Additionally, both branches must be of the same type.
For example:
#codly(
highlights: (
(
line: 1,
start: 10,
end: 44,
tag: [_str_],
fill: blue,
),
(
line: 1,
start: 11,
end: 16,
tag: [_str_],
fill: green,
),
(
line: 1,
start: 39,
end: 43,
tag: [_str_],
fill: green,
),
(
line: 1,
start: 21,
end: 32,
tag: [_bool_],
fill: green,
),
),
)
#figure(
```python
parity = ("even" if num % 2 == 0 else "odd")
```,
caption: [Typing of ternary operator],
)
== Control flow
Some control flow features are supported. For the limited code of this project, not all constructs are supported. The following are those currently handled and typ checked by Midas.
=== `if` / `elif` / `else` <if-else>
Conditional statements are checked relatively strictly by Midas. The test expression, i.e. what comes after the ```py if``` keyword, must be a boolean. While Python allows introducing and leaking new variables from inside an ```py if``` statement, Midas will strictly forbid leaks by restraining bindings to the scope they are defined in. For example, the following Python code will not compile with Midas:
#figure(
```python
age = 22
if age >= 18:
msg = "You're an adult"
else:
msg = "You're still a child"
print(msg) # -> unknown variable 'msg'
```,
caption: [`if`/`else` statement cannot leak variables],
)
=== `for` loops
Simple forms of `for` loops can be used, that is using a single variable and iterating over an object implementing the `__getitem__` method. Like above in @if-else, leaking variables from inside the loop is ignored.
The `for`-`else` statements are not supported. `while` loops are also not not supported.
== Functions
You can define functions as usual and the type checker will do its best to type it. Apart from argument sinks (`*args`, `**kwargs`), all forms of parameter specifications are supported (positional-only, keyword-only, mixed, optional).
As for the rest of your code, type annotations are optional, but recommended. If you omit the return type hint, the type checker will try to infer it from the function body and its return statements. If you did specify a return type, all return paths must return values that are subtypes of the type hint.
#codly(
highlights: (
(
line: 2,
start: 12,
end: 16,
tag: [_float_],
fill: green,
),
(
line: 2,
start: 12,
tag: [_float_],
fill: blue,
),
(
line: 3,
start: 10,
end: 15,
tag: [_(value: float) -> float_],
fill: green,
),
(
line: 3,
start: 17,
end: 19,
tag: [_float_],
fill: green,
),
(
line: 3,
start: 10,
tag: [_float_],
fill: blue,
),
),
)
#figure(
```python
def double(value: float) -> float:
return value * 2
result = double(4.0)
```,
caption: [Typing of function's body and call],
)
Anonymous functions (```py lambda```) are not yet supported
== Casts <cast>
#gc.info[
The functions discussed in this section are provided by the `midas.typing` submodule. You can import them in your script like so:
#figure(
```python
from midas.typing import cast, unsafe_cast
```,
caption: [Importing cast functions],
)
]
Sometimes, you may want to use a value whose type is not known to the type checker in a place where it expects a particular type. In that case, if you do know that the runtime type will correspond to what is expected, you can use a `cast` expression.
Similar to the `cast` function from the `typing` package of Python's Standard Library, it allows telling the type checker that a value has a given type. While `typing`'s function doesn't have any runtime side-effect, Midas' will generate runtime assertions, ensuring that your statement is true when running the code. What cannot be checked statically is checked at runtime.
In the following example, a runtime check would be generated to ensure that the value is indeed a `float` and that it satisfies the type's constraint (i.e. `>= 0`):
#codly(
highlights: (
(
line: 1,
start: 35,
end: 47,
tag: [_UnknownType_],
fill: red,
),
(
line: 2,
start: 7,
end: 17,
tag: [_PositiveFloat_],
fill: green,
),
),
)
#figure(
```python
typed_value = cast(PositiveFloat, unknown_value)
print(typed_value)
```,
caption: [Typing of `cast` expression],
)
There may be some cases where the cost of checking a value at runtime is simply not worth the safety, for example when dealing with a big dataset. If do wish so, you can use `unsafe_cast` which will only tell the type checker the type of the value, without generating a runtime assertion. This maps to the default behavior of `typing`'s own `cast` function.
If the value passed to `cast` or `unsafe_cast` is a literal (e.g. an integer, a string, a list of literals, etc.), the assertion is evaluated _at compile-time_ and no runtime assertion is generated.
= Commands <commands>
#TODO
== Type Checking (`check`) <cmd-check>
== Compiling (`compile`) <cmd-compile>
== Formatting (`format`) <cmd-format>
== Highlighting (`highlight`) <cmd-highlight>
== Dumping the AST (`parse`) <cmd-parse>
== Dumping the Registry (`dump-registry`) <cmd-registry>
== Generating Stubs (`stubs`) <cmd-stubs>
== Showing Type Judgements (`types`) <cmd-types>
== Validating Definitions (`validate`) <cmd-validate>

View File

@@ -1,176 +0,0 @@
%YAML 1.2
---
name: Midas
file_extensions:
- midas
scope: source.midas
variables:
identifier: "[a-zA-Z_][a-zA-Z0-9_]*"
contexts:
main:
- include: comments
- include: keywords
- include: types
comments:
- match: "//"
scope: punctuation.definition.comment.midas
push:
- meta_scope: comment.line.midas
- match: $
pop: true
- match: /\*
scope: punctuation.definition.comment.midas
push:
- meta_scope: comment.block.midas
- match: \*/
pop: true
keywords:
- match: \btype\b
scope: keyword.declaration.midas
push: type-stmt
- match: \bextend\b
scope: keyword.declaration.midas
push: extend-stmt
- match: \bpredicate\b
scope: keyword.declaration.midas
push: predicate-stmt
type-stmt:
- include: comments
- match: "{{identifier}}"
scope: entity.name.type
- match: \[
push: type-params
- match: "="
scope: keyword.operator.equal.midas
push: type-expr
- match: $
pop: true
type-expr:
- include: comments
- match: \b(fn)\s*(\()
captures:
1: keyword.other.midas
2: punctuation.section.group.begin
push: fn-params
- match: \b(where)\b
scope: keyword.other.midas
set: constraint
- match: "{{identifier}}"
scope: entity.name.type
- match: $
pop: 2
fn-params:
- match: "({{identifier}})(:)"
captures:
1: variable.parameter.midas
2: punctuation.separator.annotation.midas
push:
- include: type-expr
- match: \?
scope: keyword.operator.qmark.midas
- match: "(?=,)"
scope: punctuation.separator.midas
pop: true
- match: '(?=\))'
pop: true
- include: type-expr
- match: '\)'
set:
- match: "->"
scope: keyword.operator.arrow.midas
set: type-expr
constraint:
- include: comments
- match: $
pop: 2
- match: \d+(\.\d+)?
scope: constant.numeric.midas
- match: \b(true|false|none)\b
scope: constant.language.midas
- match: (<=|>=|<|>|==|!=|&)
scope: keyword.operator
- match: _
scope: variable.language.midas
- match: '{{identifier}}(?=\s*\()'
scope: variable.function.midas
- match: "{{identifier}}"
scope: variable.other.readwrite.midas
type-params:
- include: comments
- match: "<:"
scope: keyword.operator.subtype.midas
- match: "[a-zA-Z][a-zA-Z_0-9]*"
scope: entity.name.type
- match: "]"
pop: true
extend-stmt:
- include: comments
- match: "{{identifier}}"
scope: entity.name.type
- match: \[
push: type-params
- match: \{
scope: punctuation.section.block.begin
push: extend-body
extend-body:
- include: comments
- include: member-stmt
- match: \}
scope: punctuation.section.block.end
pop: 2
member-stmt:
- match: \b(prop|def)\b
scope: keyword.other.midas
push:
- match: "{{identifier}}"
scope: variable.other.member
- match: ":"
push: type-expr
- match: $
pop: true
predicate-stmt:
- include: comments
- match: "{{identifier}}"
scope: entity.name.function.midas
- match: '\('
push: predicate-params
- match: "="
scope: keyword.operator.equal.midas
set: constraint
- match: $
pop: true
predicate-params:
- match: "({{identifier}})(:)"
captures:
1: variable.parameter.midas
2: punctuation.separator.annotation.midas
push:
- include: type-expr
- match: "(?=,)"
scope: punctuation.separator.midas
pop: true
- match: '(?=\))'
pop: true
- match: '\)'
pop: true

View File

@@ -1,143 +0,0 @@
#import "@preview/modpattern:0.2.0": modpattern
#let TODO = block(
width: 6em,
height: 3em,
stroke: red,
fill: modpattern(
size: (10pt, 10pt),
line(
start: (0%, 0%),
end: (100%, 100%),
stroke: gray.transparentize(60%) + 2pt,
),
),
align(
center + horizon,
text(fill: red, size: 1.5em)[*TODO*],
),
)
#let _render-header(version, hash) = {
let last-heading = query(heading.where(level: 1).before(here())).last(default: none)
let next-heading = query(heading.where(level: 1).after(here())).first(default: none)
let current-heading = if next-heading != none and next-heading.location().page() == here().page() {
next-heading
} else if last-heading != none {
last-heading
} else { none }
let chapter = if current-heading != none {
let body = current-heading.body
if current-heading.numbering != none {
let num = counter(heading).display(current-heading.numbering, at: current-heading.location())
body = [#num #body]
}
body
} else []
grid(
columns: (1fr, auto, 1fr),
align: (left, center, right),
document.title, [v#version - #hash], chapter,
)
}
#let _unshift-prefix(prefix, content) = context {
pad(left: -measure(prefix).width, prefix + content)
}
#let project(
title: none,
author: none,
version: "0.0.1",
hash: "abcdefgh",
icon-path: none,
doc,
) = {
assert(title != none, message: "Please provide a title")
set document(
title: title,
author: author,
)
set text(
font: "Source Sans 3",
)
set raw(syntaxes: path("midas.sublime-syntax"))
let front-page() = {
align(center)[
#{
set text(size: 1.5em)
std.title()
}
v#version - #hash
#if icon-path != none {
v(1cm)
image(icon-path)
}
]
pagebreak()
}
let outlines() = {
outline()
pagebreak()
outline(
title: [List of Listings],
target: figure.where(kind: raw),
)
outline(
title: [List of Tables],
target: figure.where(kind: table),
)
}
let main() = {
// Adapted from https://github.com/hei-templates/hei-synd-thesis/blob/7d2b941197babae0bf3afd4e5914754e09a64001/lib/template-thesis.typ#L242-L261
show heading.where(level: 1): it => {
pagebreak()
set text(size: 1.5em)
set block(above: 1.2em, below: 1.2em)
if it.numbering != none {
let num = numbering(it.numbering, ..counter(heading).at(it.location()))
let prefix = num + h(1em)
_unshift-prefix(prefix, it.body)
} else {
it
}
}
show heading.where(level: 2): it => {
if it.numbering != none {
let num = numbering(it.numbering, ..counter(heading).at(it.location()))
_unshift-prefix(num + h(0.8em), it.body)
} else {
it
}
}
set page(
header: context _render-header(version, hash),
footer: context if page.numbering != none {
align(center, counter(page).display(page.numbering, both: true))
},
numbering: "1 / 1",
)
show heading: set heading(numbering: "I.1.")
counter(page).update(1)
doc
}
front-page()
outlines()
main()
}

View File

@@ -152,4 +152,14 @@ class FunctionType:
required: bool
class FrameType:
columns: list[Column]
@dataclass(frozen=True, kw_only=True)
class Column:
location: Optional[Location] = None
name: Token
type: Type
###<

View File

@@ -15,7 +15,7 @@ from midas.ast.location import Location
###> MidasType | Type annotations | node
class BaseType:
base: str
param: Optional[MidasType]
args: tuple[MidasType, ...]
class ConstraintType:
@@ -174,6 +174,10 @@ class SliceExpr:
step: Optional[Expr]
class TupleExpr:
items: tuple[Expr, ...]
class RawExpr:
expr: ast.expr

View File

@@ -253,6 +253,9 @@ class Type(ABC):
@abstractmethod
def visit_function_type(self, type: FunctionType) -> T: ...
@abstractmethod
def visit_frame_type(self, type: FrameType) -> T: ...
@dataclass(frozen=True)
class NamedType(Type):
@@ -311,3 +314,17 @@ class FunctionType(Type):
def accept(self, visitor: Type.Visitor[T]) -> T:
return visitor.visit_function_type(self)
@dataclass(frozen=True)
class FrameType(Type):
columns: list[Column]
@dataclass(frozen=True, kw_only=True)
class Column:
location: Optional[Location] = None
name: Token
type: Type
def accept(self, visitor: Type.Visitor[T]) -> T:
return visitor.visit_frame_type(self)

View File

@@ -350,6 +350,25 @@ class MidasAstPrinter(
arg.type.accept(self)
self._write_line(f"required: {arg.required}", last=True)
def visit_frame_type(self, type: m.FrameType) -> None:
self._write_line("FrameType")
with self._child_level(single=True):
self._write_line("columns")
with self._child_level():
for i, column in enumerate(type.columns):
self._idx = i
if i == len(type.columns) - 1:
self._mark_last()
self._print_frame_column(column)
def _print_frame_column(self, column: m.FrameType.Column) -> None:
self._write_line("Column")
with self._child_level():
self._write_line(f'name: "{column.name.lexeme}"')
self._write_line("type")
with self._child_level(single=True):
column.type.accept(self)
class MidasPrinter(m.Expr.Visitor[str], m.Stmt.Visitor[str], m.Type.Visitor[str]):
def __init__(self, indent: int = 4):
@@ -502,6 +521,23 @@ class MidasPrinter(m.Expr.Visitor[str], m.Stmt.Visitor[str], m.Type.Visitor[str]
res += "?"
return res
def visit_frame_type(self, type: m.FrameType) -> str:
res: str = self.indented("Frame[")
if len(type.columns) != 0:
res += "\n"
self.level += 1
columns: list[str] = []
for column in type.columns:
columns.append(self.indented(self._print_frame_column(column)))
res += ",\n".join(columns)
self.level -= 1
res += "\n"
res += "]"
return res
def _print_frame_column(self, column: m.FrameType.Column) -> str:
return f"{column.name.lexeme}: {column.type.accept(self)}"
class PythonAstPrinter(
AstPrinter,
@@ -513,7 +549,13 @@ class PythonAstPrinter(
self._write_line("BaseType")
with self._child_level():
self._write_line(f"base: {node.base}")
self._write_optional_child("param", node.param, last=True)
self._write_line("args:", last=True)
with self._child_level():
for i, arg in enumerate(node.args):
self._idx = i
if i == len(node.args) - 1:
self._mark_last()
arg.accept(self)
def visit_constraint_type(self, node: p.ConstraintType) -> None:
self._write_line("ConstraintType")
@@ -826,6 +868,17 @@ class PythonAstPrinter(
self._write_optional_child("upper", expr.upper)
self._write_optional_child("step", expr.step, last=True)
def visit_tuple_expr(self, expr: p.TupleExpr) -> None:
self._write_line("TupleExpr")
with self._child_level():
self._write_line("items", last=True)
with self._child_level():
for i, item in enumerate(expr.items):
self._idx = i
if i == len(expr.items) - 1:
self._mark_last()
item.accept(self)
def visit_raw_expr(self, expr: p.RawExpr) -> None:
self._write_line("RawExpr")
with self._child_level(single=True):

View File

@@ -44,7 +44,7 @@ class MidasType(ABC):
@dataclass(frozen=True)
class BaseType(MidasType):
base: str
param: Optional[MidasType]
args: tuple[MidasType, ...]
def accept(self, visitor: MidasType.Visitor[T]) -> T:
return visitor.visit_base_type(self)
@@ -268,6 +268,9 @@ class Expr(ABC):
@abstractmethod
def visit_slice_expr(self, expr: SliceExpr) -> T: ...
@abstractmethod
def visit_tuple_expr(self, expr: TupleExpr) -> T: ...
@abstractmethod
def visit_raw_expr(self, expr: RawExpr) -> T: ...
@@ -402,6 +405,14 @@ class SliceExpr(Expr):
return visitor.visit_slice_expr(self)
@dataclass(frozen=True)
class TupleExpr(Expr):
items: tuple[Expr, ...]
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_tuple_expr(self)
@dataclass(frozen=True)
class RawExpr(Expr):
expr: ast.expr

View File

@@ -179,3 +179,99 @@ extend dict[K, V] {
// def __ior__: fn(value: Iterable[tuple[K, V]], /) -> dict[K, V]
}
extend str {
def capitalize: fn() -> str
def casefold: fn() -> str
def center: fn(width: int, fillchar: str?, /) -> str
def count: fn(sub: str, start: None?, end: None?, /) -> int
def count: fn(sub: str, start: int, end: None?, /) -> int
def count: fn(sub: str, start: None, end: int, /) -> int
def count: fn(sub: str, start: int, end: int, /) -> int
def encode: fn(encoding: str?, errors: str?) -> bytes
def endswith: fn(suffix: str, start: None?, end: None?, /) -> bool
def endswith: fn(suffix: str, start: int, end: None?, /) -> bool
def endswith: fn(suffix: str, start: None, end: int, /) -> bool
def endswith: fn(suffix: str, start: int, end: int, /) -> bool
def expandtabs: fn(tabsize: int?) -> str
def find: fn(sub: str, start: None?, end: None?, /) -> int
def find: fn(sub: str, start: int, end: None?, /) -> int
def find: fn(sub: str, start: None, end: int, /) -> int
def find: fn(sub: str, start: int, end: int, /) -> int
// def format: fn(*args: object, **kwargs: object) -> str
// def format_map: fn(mapping: _FormatMapMapping, /) -> str
def index: fn(sub: str, start: None?, end: None?, /) -> int
def index: fn(sub: str, start: int, end: None?, /) -> int
def index: fn(sub: str, start: None, end: int, /) -> int
def index: fn(sub: str, start: int, end: int, /) -> int
def isalnum: fn() -> bool
def isalpha: fn() -> bool
def isascii: fn() -> bool
def isdecimal: fn() -> bool
def isdigit: fn() -> bool
def isidentifier: fn() -> bool
def islower: fn() -> bool
def isnumeric: fn() -> bool
def isprintable: fn() -> bool
def isspace: fn() -> bool
def istitle: fn() -> bool
def isupper: fn() -> bool
def join: fn(iterable: list[str], /) -> str // TODO: use Iterable
def ljust: fn(width: int, fillchar: str?, /) -> str
def lower: fn() -> str
def lstrip: fn(chars: None?, /) -> str
def lstrip: fn(chars: str, /) -> str
def partition: fn(sep: str, /) -> tuple[str, str, str]
def replace: fn(old: str, new: str, count: int?, /) -> str
def removeprefix: fn(prefix: str, /) -> str
def removesuffix: fn(suffix: str, /) -> str
def rfind: fn(sub: str, start: None?, end: None?, /) -> int
def rfind: fn(sub: str, start: int, end: None?, /) -> int
def rfind: fn(sub: str, start: None, end: int, /) -> int
def rfind: fn(sub: str, start: int, end: int, /) -> int
def rindex: fn(sub: str, start: None?, end: None?, /) -> int
def rindex: fn(sub: str, start: int, end: None?, /) -> int
def rindex: fn(sub: str, start: None, end: int, /) -> int
def rindex: fn(sub: str, start: int, end: int, /) -> int
def rjust: fn(width: int, fillchar: str?, /) -> str
def rpartition: fn(sep: str, /) -> tuple[str, str, str]
def rsplit: fn(sep: None?, maxsplit: int?) -> list[str]
def rsplit: fn(sep: str, maxsplit: int?) -> list[str]
def rstrip: fn(chars: None?, /) -> str
def rstrip: fn(chars: str, /) -> str
def split: fn(sep: None?, maxsplit: int?) -> list[str]
def split: fn(sep: str, maxsplit: int?) -> list[str]
def splitlines: fn(keepends: bool?) -> list[str]
def startswith: fn(prefix: str, start: None?, end: None?, /) -> bool
def startswith: fn(prefix: str, start: int, end: None?, /) -> bool
def startswith: fn(prefix: str, start: None, end: int, /) -> bool
def startswith: fn(prefix: str, start: int, end: int, /) -> bool
def strip: fn(chars: None?, /) -> str
def strip: fn(chars: str, /) -> str
def swapcase: fn() -> str
def title: fn() -> str
// def translate: fn(table: _TranslateTable, /) -> str
def upper: fn() -> str
def zfill: fn(width: int, /) -> str
def __add__: fn(value: str, /) -> str
// Incompatible with Sequence.__contains__
def __contains__: fn(key: str, /) -> bool
def __eq__: fn(value: object, /) -> bool
def __ge__: fn(value: str, /) -> bool
def __getitem__: fn(key: slice, /) -> str
def __getitem__: fn(key: int, /) -> str
def __gt__: fn(value: str, /) -> bool
def __hash__: fn() -> int
// def __iter__: fn() -> Iterator[str]
def __le__: fn(value: str, /) -> bool
def __len__: fn() -> int
def __lt__: fn(value: str, /) -> bool
def __mod__: fn(value: Any, /) -> str
def __mul__: fn(value: int, /) -> str
def __ne__: fn(value: object, /) -> bool
def __rmul__: fn(value: int, /) -> str
def __getnewargs__: fn() -> tuple[str]
def __format__: fn(format_spec: str, /) -> str
}

View File

@@ -15,7 +15,7 @@ if TYPE_CHECKING:
BUILTIN_SUBTYPES: dict[str, set[str]] = {
"object": {"float", "list", "dict", "str"},
"object": {"float", "list", "dict", "str", "bytes", "tuple"},
"float": {"int"},
"int": {"bool"},
}
@@ -26,12 +26,15 @@ def define_builtins(reg: TypesRegistry):
any = reg.define_type("Any", TopType())
unit = reg.define_type("None", UnitType())
object = reg.define_type("object", BaseType(name="object"))
bytes = reg.define_type("bytes", BaseType(name="bytes"))
bool = reg.define_type("bool", BaseType(name="bool"))
int = reg.define_type("int", BaseType(name="int"))
float = reg.define_type("float", BaseType(name="float"))
str = reg.define_type("str", BaseType(name="str"))
slice = reg.define_type("slice", BaseType(name="slice"))
tuple = reg.define_type("tuple", BaseType(name="tuple"))
list = reg.define_type(
"list",
GenericType(

View File

@@ -0,0 +1,198 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Callable, Optional
from midas.ast.location import Location
from midas.checker.registry import TypesRegistry
from midas.checker.reporter import FileReporter
from midas.checker.types import (
ColumnType,
DataFrameType,
Function,
OverloadedFunction,
TopType,
Type,
UnknownType,
unfold_type,
)
if TYPE_CHECKING:
from midas.checker.python import PythonTyper, TypedExpr
@staticmethod
def frame_method(*names: str):
def wrapper(func):
names_: tuple[str, ...] = names
if len(names_) == 0:
names_ = (func.__name__,)
setattr(func, "__method_names__", names_)
return func
return wrapper
@dataclass(frozen=True, kw_only=True)
class Call:
location: Location
frame: DataFrameType
positional: list[TypedExpr]
keywords: dict[str, TypedExpr]
class _MethodRegistryMeta(type):
_methods: dict[str, Callable[..., Type]] = {}
def __new__(
cls,
name: str,
bases: tuple[type, ...],
namespace: dict[str, Any],
):
new_class = super().__new__(cls, name, bases, namespace)
new_class._methods = {}
for attr in namespace.values():
if callable(attr) and hasattr(attr, "__method_names__"):
for name in attr.__method_names__: # type: ignore
new_class._methods[name] = attr # type: ignore
return new_class
class MethodRegistry(metaclass=_MethodRegistryMeta):
def __init__(self, typer: PythonTyper) -> None:
self.typer: PythonTyper = typer
@property
def reporter(self) -> FileReporter:
return self.typer.reporter
@property
def types(self) -> TypesRegistry:
return self.typer.types
def call(
self,
method: str,
call: Call,
) -> Type:
func: Optional[Callable[..., Type]] = self._methods.get(method)
if func is None:
self.reporter.warning(call.location, f"Unknown method {method}")
return UnknownType()
return func(self, call)
@frame_method("add", "__add__")
def add(
self,
call: Call,
) -> Type:
# TODO: support add with scalar, sequence, Series, dict
# TODO: check operation exists on inner column types
new_columns: list[DataFrameType.Column] = []
by_name: dict[str, DataFrameType.Column] = {}
frame2: Optional[DataFrameType] = None
if len(call.positional) != 0:
other: Type = call.positional[0][1]
unfolded_other: Type = unfold_type(other)
if isinstance(unfolded_other, DataFrameType):
frame2 = unfolded_other
by_name = {
col.name: col for col in frame2.columns if col.name is not None
}
in_frame1: set[str] = set()
for column in call.frame.columns:
if column.name is not None:
in_frame1.add(column.name)
col_type1: Type = column.type
col_type: Type = ColumnType(type=UnknownType())
if column.name in by_name:
column2 = by_name[column.name]
col_type2: Type = column2.type
if self.types.are_equivalent(col_type2, col_type1):
col_type = col_type1
new_column = DataFrameType.Column(
index=column.index,
name=column.name,
type=col_type,
)
new_columns.append(new_column)
if frame2 is not None:
for column in frame2.columns:
if column.name in in_frame1:
continue
new_columns.append(
DataFrameType.Column(
index=len(new_columns),
name=column.name,
type=ColumnType(type=UnknownType()),
)
)
signature = Function(
args=[
Function.Argument(
pos=0,
name="other",
type=DataFrameType(columns=[]),
required=True,
),
],
returns=DataFrameType(columns=new_columns),
)
return (
self.typer._get_call_result(
location=call.location,
callee=signature,
positional=call.positional,
keywords=call.keywords,
)
or UnknownType()
)
@frame_method()
def mean(self, call: Call) -> Type:
with_axis = Function(
kw_args=[
Function.Argument(
pos=0,
name="axis",
type=self.types.get_type("int"),
required=False,
)
],
returns=ColumnType(type=TopType()),
)
without_axis = Function(
kw_args=[
Function.Argument(
pos=0,
name="axis",
type=self.types.get_type("None"),
required=True,
)
],
returns=TopType(),
)
overload = OverloadedFunction(
overloads=[
with_axis,
without_axis,
]
)
return (
self.typer._get_call_result(
location=call.location,
callee=overload,
positional=call.positional,
keywords=call.keywords,
)
or UnknownType()
)

154
midas/checker/frames.py Normal file
View File

@@ -0,0 +1,154 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Optional, TypeGuard, cast
import midas.ast.python as p
from midas.ast.location import Location
from midas.checker.frame_methods import Call, MethodRegistry
from midas.checker.reporter import FileReporter
from midas.checker.types import ColumnType, DataFrameType, TupleType, Type, UnknownType
if TYPE_CHECKING:
from midas.checker.python import PythonTyper, TypedExpr
def is_list_of_literals(exprs: list[p.Expr]) -> TypeGuard[list[p.LiteralExpr]]:
return all(isinstance(expr, p.LiteralExpr) for expr in exprs)
class FrameManager:
def __init__(self, typer: PythonTyper) -> None:
self.typer: PythonTyper = typer
self.method_resolver: MethodRegistry = MethodRegistry(self.typer)
def assign(
self,
reporter: FileReporter,
location: Location,
frame: DataFrameType,
index: p.Expr,
value_type: Type,
) -> Type:
match index:
case p.LiteralExpr(value=str() as name):
return self.assign_column(reporter, location, frame, name, value_type)
case p.ListExpr(items=indices) if is_list_of_literals(indices) and all(
isinstance(idx, str) for idx in indices
):
raise NotImplementedError
case _:
reporter.error(location, f"Invalid index type {index} on {frame}")
return UnknownType()
def assign_column(
self,
reporter: FileReporter,
location: Location,
frame: DataFrameType,
name: str,
type: Type,
) -> Type:
if not isinstance(type, ColumnType):
reporter.error(
location,
f"Cannot assign {type} to dataframe column. Must be a ColumnType",
)
return frame
return self._set_column(frame, name, type)
def get(
self,
reporter: FileReporter,
location: Location,
frame: DataFrameType,
index: p.Expr,
) -> Type:
match index:
case p.LiteralExpr(value=str() as name):
column: Optional[ColumnType] = FrameManager._get_column(frame, name)
if column is None:
reporter.error(location, f"Unknown column '{name}' on {frame}")
return UnknownType()
return column
case p.ListExpr(items=indices) if is_list_of_literals(indices) and all(
isinstance(index.value, str) for index in indices
):
names: list[str] = [cast(str, index.value) for index in indices]
columns: list[ColumnType] = []
for name in names:
column: Optional[ColumnType] = FrameManager._get_column(frame, name)
if column is None:
reporter.error(location, f"Unknown column '{name}' on {frame}")
return UnknownType()
columns.append(column)
return TupleType(items=tuple(columns))
case _:
reporter.error(location, f"Invalid index type {index} on {frame}")
return UnknownType()
@classmethod
def _set_column(
cls, frame: DataFrameType, name: str, column: ColumnType
) -> DataFrameType:
new_columns: list[DataFrameType.Column] = []
index: int = len(frame.columns)
replace: bool = False
for i, col in enumerate(frame.columns):
if col.name == name:
index = i
replace = True
# TODO: check column type here to prevent changing it
new_columns.append(col)
new_col: DataFrameType.Column = DataFrameType.Column(
index=index,
name=name,
type=column,
)
if replace:
new_columns[index] = new_col
else:
new_columns.append(new_col)
return DataFrameType(columns=new_columns)
@classmethod
def _set_columns(
cls, frame: DataFrameType, names: list[str], columns: list[ColumnType]
) -> DataFrameType:
for name, col in zip(names, columns):
frame = cls._set_column(frame, name, col)
return frame
@classmethod
def _get_column(cls, frame: DataFrameType, name: str) -> Optional[ColumnType]:
for col in frame.columns:
if col.name == name:
return col.type
return None
@classmethod
def _get_columns(
cls, frame: DataFrameType, names: list[str]
) -> list[Optional[ColumnType]]:
return [cls._get_column(frame, name) for name in names]
def call(
self,
method: str,
location: Location,
frame: DataFrameType,
positional: list[TypedExpr],
keywords: dict[str, TypedExpr],
) -> Type:
call: Call = Call(
location=location,
frame=frame,
positional=positional,
keywords=keywords,
)
return self.method_resolver.call(method, call)

View File

@@ -14,8 +14,10 @@ from midas.checker.reporter import FileReporter, Reporter
from midas.checker.types import (
AliasType,
AppliedType,
ColumnType,
ComplexType,
ConstraintType,
DataFrameType,
ExtensionType,
Function,
GenericType,
@@ -401,6 +403,18 @@ class MidasTyper(m.Stmt.Visitor[None], m.Expr.Visitor[Type], m.Type.Visitor[Type
kw=[process_arg(arg, i + n_pos + n_mixed) for i, arg in enumerate(spec.kw)],
)
def visit_frame_type(self, type: m.FrameType) -> Type:
def process_column(i: int, col: m.FrameType.Column) -> DataFrameType.Column:
return DataFrameType.Column(
index=i,
name=col.name.lexeme,
type=ColumnType(type=col.type.accept(self)),
)
return DataFrameType(
columns=[process_column(i, col) for i, col in enumerate(type.columns)]
)
def _resolve_type_params(self, params: list[m.TypeParam]):
vars: list[TypeVar] = []
for param in params:

View File

@@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import Callable, Optional
from typing import Any, Callable, Optional
from midas.checker.environment import Environment
from midas.checker.registry import TypesRegistry
@@ -17,7 +17,7 @@ class Preamble(Environment):
def __init__(self, types: TypesRegistry) -> None:
super().__init__()
self._types: TypesRegistry = types
self._python_funcs: dict[str, Callable] = {}
self._python_funcs: dict[str, Callable[..., Any]] = {}
self._def_type_constructor("object", object)
self._def_type_constructor("float", float)
@@ -34,7 +34,7 @@ class Preamble(Environment):
# TODO: use sink
self._def_function(
name="print",
pos=[Param("object", TopType())],
pos=[Param("object", TopType(), required=False)],
returns=UnitType(),
py_function=print,
)
@@ -64,11 +64,18 @@ class Preamble(Environment):
pos=[Param("prompt", TopType(), required=False)],
returns=self._types.get_type("str"),
)
self._def_function(
name="len",
pos=[Param("object", TopType())],
returns=self._types.get_type("int"),
)
def _list_of(self, item_type: Type) -> Type:
return self._types.apply_generic(self._types.get_type("list"), [item_type])
def _def_type_constructor(self, name: str, py_function: Optional[Callable] = None):
def _def_type_constructor(
self, name: str, py_function: Optional[Callable[..., Any]] = None
):
# TODO: more specific arg types
self._def_function(
name=name,
@@ -121,7 +128,7 @@ class Preamble(Environment):
kw: list[Param] = [],
returns: Type = UnitType(),
type_vars: list[TypeVar] = [],
py_function: Optional[Callable] = None,
py_function: Optional[Callable[..., Any]] = None,
):
function: Type = self._make_function(
name=name,
@@ -135,5 +142,5 @@ class Preamble(Environment):
if py_function is not None:
self._python_funcs[name] = py_function
def get_py_func(self, name: str) -> Optional[Callable]:
def get_py_func(self, name: str) -> Optional[Callable[..., Any]]:
return self._python_funcs.get(name)

View File

@@ -8,6 +8,7 @@ from midas.ast.location import Location
from midas.ast.printer import MidasPrinter
from midas.checker.environment import Environment
from midas.checker.evaluator import Evaluator
from midas.checker.frames import FrameManager
from midas.checker.operators import (
PY_COMPARATOR_METHODS,
PY_OPERATOR_METHODS,
@@ -21,10 +22,13 @@ from midas.checker.types import (
AliasType,
AppliedType,
BaseType,
ColumnType,
ConstraintType,
DataFrameType,
Function,
GenericType,
OverloadedFunction,
TupleType,
Type,
TypeVar,
UnitType,
@@ -43,6 +47,10 @@ class ReturnException(Exception):
pass
class UndefinedMethodException(Exception):
pass
@dataclass(frozen=True, kw_only=True)
class MappedArgument:
expr: p.Expr
@@ -71,6 +79,7 @@ class PythonTyper(
self.logger: logging.Logger = logging.getLogger("PythonTyper")
self.reporter: FileReporter = reporter.for_file(None)
self.types: TypesRegistry = types
self.frame_mgr: FrameManager = FrameManager(self)
self.global_env: Environment = Preamble(self.types)
self.env: Environment = self.global_env
self.locals: dict[p.Expr, int] = {}
@@ -190,6 +199,36 @@ class PythonTyper(
return self.env.get_at(distance, name)
return self.global_env.get(name)
def call_method(
self,
location: Location,
obj: Type,
method_name: str,
positional: list[TypedExpr],
keywords: dict[str, TypedExpr],
) -> Optional[Type]:
unfolded: Type = unfold_type(obj)
match unfolded:
case DataFrameType():
return self.frame_mgr.call(
method=method_name,
location=location,
frame=unfolded,
positional=positional,
keywords=keywords,
)
method: Optional[Type] = self.types.lookup_member(obj, method_name)
if method is None:
raise UndefinedMethodException
return self._get_call_result(
location,
method,
positional,
keywords,
)
def is_subtype(self, type1: Type, type2: Type) -> bool:
return self.types.is_subtype(type1, type2)
@@ -319,9 +358,15 @@ class PythonTyper(
case p.VariableExpr():
self._assign_var(location, target, value_type)
# Allow any kind of object because we disallow creating new attributes
case p.GetExpr(object=object, name=name):
self._assign_attr(location, object, name, value_type)
# Only support variable expressions because modifying
# the underlying value would require reference types
case p.SubscriptExpr(object=p.VariableExpr() as var, index=index):
self._assign_sub(location, var, index, value_type)
case _:
if not isinstance(target, p.VariableExpr):
self.logger.warning(f"Unsupported assignment to {target}")
@@ -360,6 +405,30 @@ class PythonTyper(
f"Cannot assign {value_type} to member '{object_type}.{name}' of type {member}",
)
def _assign_sub(
self,
location: Location,
var: p.VariableExpr,
index: p.Expr,
value_type: Type,
):
var_type: Type = self.type_of(var)
unfolded_type: Type = unfold_type(var_type)
# TODO: what happens if type is an alias of a dataframe type
match unfolded_type:
case DataFrameType() as frame:
new_type: Type = self.frame_mgr.assign(
self.reporter, location, frame, index, value_type
)
self.env.assign(var.name, new_type)
case UnknownType():
return
case _:
self.reporter.error(
location,
f"Cannot assign {value_type} to index {index} of {var_type}",
)
def visit_return_stmt(self, stmt: p.ReturnStmt) -> None:
type: Type = self.type_of(stmt.value) if stmt.value is not None else UnitType()
self.env.return_types.append(type)
@@ -373,8 +442,10 @@ class PythonTyper(
# print(m) # <- m is still defined
test_type: Type = self.type_of(stmt.test)
# TODO Allow subtypes or any type
if test_type != self.types.get_type("bool"):
if (
not self.types.is_subtype(test_type, self.types.get_type("bool"))
and test_type != UnknownType()
):
self.reporter.error(
stmt.test.location, f"If test must be a boolean, got {test_type}"
)
@@ -390,13 +461,16 @@ class PythonTyper(
pass
def visit_for_stmt(self, stmt: p.ForStmt) -> None:
item_type: Optional[Type] = self._get_iterator_type(stmt.iterator)
if item_type is None:
iterator_type: Type = self.compute_type(stmt.iterator)
self.reporter.error(
stmt.iterator.location, f"{iterator_type} is not iterable"
)
item_type = UnknownType()
item_type: Type = UnknownType()
iterator_type: Type = self.type_of(stmt.iterator)
if iterator_type != UnknownType():
maybe_item_type = self._get_iterator_type(stmt.iterator, iterator_type)
if maybe_item_type is None:
self.reporter.error(
stmt.iterator.location, f"{iterator_type} is not iterable"
)
else:
item_type = maybe_item_type
self._assign(stmt.location, stmt.target, item_type)
self.judge(stmt.target, item_type)
@@ -436,20 +510,16 @@ class PythonTyper(
left: Type = self.type_of(left_expr)
right: Type = self.type_of(right_expr)
operation: Optional[Type] = self.types.lookup_member(left, method)
if operation is None:
result: Optional[Type]
try:
result = self.call_method(location, left, method, [(right_expr, right)], {})
except UndefinedMethodException:
self.reporter.error(
location,
f"Undefined operation {method} between {left} and {right}",
)
return UnknownType()
result: Optional[Type] = self._get_call_result(
location,
operation,
[(right_expr, right)],
{},
)
return result or UnknownType()
def visit_unary_expr(self, expr: p.UnaryExpr) -> Type:
@@ -462,20 +532,17 @@ class PythonTyper(
return UnknownType()
operand: Type = self.type_of(expr.right)
operation: Optional[Type] = self.types.lookup_member(operand, method)
if operation is None:
result: Optional[Type]
try:
result = self.call_method(expr.location, operand, method, [], {})
except UndefinedMethodException:
self.reporter.error(
expr.location,
f"Undefined operation {method} for {operand}",
)
return UnknownType()
result: Optional[Type] = self._get_call_result(
expr.location,
operation,
[],
{},
)
return result or UnknownType()
def visit_call_expr(self, expr: p.CallExpr) -> Type:
@@ -483,13 +550,27 @@ class PythonTyper(
case p.VariableExpr(name="TypeVar"):
return self.define_typevar(expr) or UnknownType()
callee: Type = self.type_of(expr.callee)
positional: list[TypedExpr] = [
(arg, self.type_of(arg)) for arg in expr.arguments
]
keywords: dict[str, TypedExpr] = {
name: (arg, self.type_of(arg)) for name, arg in expr.keywords.items()
}
match expr.callee:
case p.GetExpr(object=obj, name=method):
obj_type: Type = self.type_of(obj)
unfolded: Type = unfold_type(obj_type)
if isinstance(unfolded, DataFrameType):
return self.frame_mgr.call(
method,
expr.location,
unfolded,
positional,
keywords,
)
callee: Type = self.type_of(expr.callee)
return (
self._get_call_result(
location=expr.location,
@@ -504,7 +585,7 @@ class PythonTyper(
object: Type = self.type_of(expr.object)
member: Optional[Type] = self.types.lookup_member(object, expr.name)
if member is None:
self.reporter.error(
self.reporter.warning(
expr.location, f"Unknown member '{expr.name}' of {object}"
)
return UnknownType()
@@ -521,6 +602,8 @@ class PythonTyper(
return self.types.get_type("float")
case str():
return self.types.get_type("str")
case None:
return self.types.get_type("None")
case _:
self.reporter.warning(expr.location, f"Unknown literal {expr}")
return UnknownType()
@@ -563,7 +646,10 @@ class PythonTyper(
test_type: Type = self.type_of(expr.test)
# TODO Allow subtypes or any type
if test_type != self.types.get_type("bool"):
if (
not self.is_subtype(test_type, self.types.get_type("bool"))
and test_type != UnknownType()
):
self.reporter.error(
expr.test.location, f"If test must be a boolean, got {test_type}"
)
@@ -592,9 +678,9 @@ class PythonTyper(
if len(item_types) == 1:
item_type: Type = item_types[0]
return self.types.apply_generic(list_type, [item_type])
self.reporter.error(
self.reporter.warning(
expr.location,
f"Heterogeneous list items: {item_types}",
f"Heterogeneous list items: [{', '.join(map(str, item_types))}]",
)
return self.types.apply_generic(list_type, [UnknownType()])
@@ -624,22 +710,29 @@ class PythonTyper(
if len(key_types) == 1:
key_type = key_types[0]
else:
self.reporter.error(
self.reporter.warning(
expr.location,
f"Heterogeneous dict keys: {key_types}",
f"Heterogeneous dict keys: [{', '.join(map(str, key_types))}]",
)
if len(value_types) == 1:
value_type = value_types[0]
else:
self.reporter.error(
self.reporter.warning(
expr.location,
f"Heterogeneous dict values: {value_types}",
f"Heterogeneous dict values: [{', '.join(map(str, value_types))}]",
)
return self.types.apply_generic(dict_type, [key_type, value_type])
def visit_subscript_expr(self, expr: p.SubscriptExpr) -> Type:
object: Type = self.type_of(expr.object)
unfolded: Type = unfold_type(object)
match unfolded:
case TupleType():
return self._visit_tuple_subscript(unfolded, expr)
case DataFrameType():
return self._visit_frame_subscript(unfolded, expr)
operation: Optional[Type] = self.types.lookup_member(object, "__getitem__")
if operation is None:
self.reporter.error(
@@ -657,6 +750,11 @@ class PythonTyper(
def visit_slice_expr(self, expr: p.SliceExpr) -> Type:
return self.types.get_type("slice")
def visit_tuple_expr(self, expr: p.TupleExpr) -> Type:
return TupleType(
items=tuple(self.type_of(item) for item in expr.items),
)
def visit_raw_expr(self, expr: p.RawExpr) -> Type:
return UnknownType()
@@ -668,22 +766,35 @@ class PythonTyper(
self.reporter.warning(node.location, f"Unknown type '{node.base}'")
return UnknownType()
if node.param is not None:
param: Type = self.resolve_type_expr(node.param)
return self.types.apply_generic(base, [param])
if len(node.args) != 0:
args: list[Type] = [self.resolve_type_expr(arg) for arg in node.args]
return self.types.apply_generic(base, args)
return base
def visit_constraint_type(self, node: p.ConstraintType) -> Type:
self.reporter.warning(node.location, "ConstraintType not yet supported")
return UnknownType()
def visit_frame_column(self, node: p.FrameColumn) -> Type:
self.reporter.warning(node.location, "FrameColumn not yet supported")
return UnknownType()
def visit_frame_column(self, node: p.FrameColumn) -> ColumnType:
return ColumnType(
type=(
self.resolve_type_expr(node.type)
if node.type is not None
else UnknownType()
)
)
def visit_frame_type(self, node: p.FrameType) -> Type:
self.reporter.warning(node.location, "FrameType not yet supported")
return UnknownType()
return DataFrameType(
columns=[
DataFrameType.Column(
index=i,
name=column.name,
type=self.visit_frame_column(column),
)
for i, column in enumerate(node.columns)
]
)
def _get_call_result(
self,
@@ -1055,9 +1166,8 @@ class PythonTyper(
return False
return True
def _get_iterator_type(self, expr: p.Expr) -> Optional[Type]:
def _get_iterator_type(self, expr: p.Expr, type: Type) -> Optional[Type]:
# TODO: lookup __iter__
type: Type = self.type_of(expr)
getitem: Optional[Type] = self.types.lookup_member(type, "__getitem__")
if getitem is None:
return None
@@ -1123,7 +1233,7 @@ class PythonTyper(
node: ast.Expression = ast.parse(value, mode="eval")
return parser._parse_type(node.body)
case p.VariableExpr(name=name):
return p.BaseType(location=location, base=name, param=None)
return p.BaseType(location=location, base=name, args=())
case _:
raise NotImplementedError
@@ -1211,8 +1321,34 @@ class PythonTyper(
return False
return True
case DataFrameType() | ColumnType():
self.reporter.error(
expr.location, f"Cannot cast {lit_value!r} to {target_type}"
)
return False
case _:
self.reporter.info(
expr.location, f"Cannot evaluate cast to {target_type} statically"
)
return False
def _visit_tuple_subscript(self, tup: TupleType, expr: p.SubscriptExpr) -> Type:
match expr.index:
case p.LiteralExpr(value=int() as index):
if index < 0 or index >= len(tup.items):
self.reporter.error(
expr.location, f"Index {index} out of range for tuple {tup}"
)
return UnknownType()
return tup.items[index]
case _:
self.reporter.error(
expr.location, f"Invalid index type {expr.index} on {tup}"
)
return UnknownType()
def _visit_frame_subscript(
self, frame: DataFrameType, expr: p.SubscriptExpr
) -> Type:
return self.frame_mgr.get(self.reporter, expr.location, frame, expr.index)

View File

@@ -8,14 +8,17 @@ from midas.checker.types import (
AliasType,
AppliedType,
BaseType,
ColumnType,
ComplexType,
ConstraintType,
DataFrameType,
ExtensionType,
Function,
GenericType,
OverloadedFunction,
Predicate,
TopType,
TupleType,
Type,
TypeVar,
UnknownType,
@@ -157,6 +160,24 @@ class TypesRegistry:
return False
return True
case (DataFrameType(columns=columns1), DataFrameType(columns=columns2)):
# TODO: check order?
by_name1: dict[str, DataFrameType.Column] = {
col.name: col for col in columns1 if col.name is not None
}
for col2 in columns2:
if col2.name not in by_name1:
return False
if not self.is_subtype(by_name1[col2.name].type, col2.type):
return False
return True
case (ColumnType(type=inner1), ColumnType(type=inner2)):
# TODO: invariant, replace ColumnType with simple GenericType
if not self.are_equivalent(inner1, inner2):
return False
return True
case (Function(), Function()):
return self.is_func_subtype(type1, type2)
@@ -187,6 +208,9 @@ class TypesRegistry:
return False
def are_equivalent(self, type1: Type, type2: Type) -> bool:
return self.is_subtype(type1, type2) and self.is_subtype(type2, type1)
# TODO: verify the logic in here
def is_func_subtype(self, func1: Function, func2: Function) -> bool:
"""Check whether a function is a subtype of another
@@ -323,6 +347,9 @@ class TypesRegistry:
body=substitute_typevars(body, substitutions),
)
case BaseType(name="tuple"):
return TupleType(items=tuple(args))
case _:
raise ValueError(f"{type} is not a generic type")

View File

@@ -128,6 +128,10 @@ class Resolver(p.Stmt.Visitor[None], p.Expr.Visitor[None]):
case p.GetExpr():
target.accept(self)
case p.SubscriptExpr():
target.accept(self)
case _:
raise Exception(f"Unsupported assignment to {target}")
@@ -232,5 +236,9 @@ class Resolver(p.Stmt.Visitor[None], p.Expr.Visitor[None]):
if expr.step is not None:
self.resolve(expr.step)
def visit_tuple_expr(self, expr: p.TupleExpr) -> None:
for item in expr.items:
self.resolve(item)
def visit_raw_expr(self, expr: p.RawExpr) -> None:
pass

View File

@@ -2,7 +2,7 @@ from __future__ import annotations
from dataclasses import dataclass, field
from enum import StrEnum
from typing import Optional, assert_never
from typing import Optional, assert_never, cast
import midas.ast.midas as m
from midas.ast.printer import MidasPrinter
@@ -156,6 +156,37 @@ class ConstraintType:
return f"{self.type} where {printer.print(self.constraint)}"
@dataclass(frozen=True, kw_only=True)
class TupleType:
items: tuple[Type, ...]
def __str__(self) -> str:
return f"({', '.join(map(str, self.items))})"
@dataclass(frozen=True, kw_only=True)
class ColumnType:
type: Type
def __str__(self) -> str:
return f"Column[{self.type}]"
@dataclass(frozen=True, kw_only=True)
class DataFrameType:
columns: list[Column]
def __str__(self) -> str:
schema: list[str] = [f"{col.name}: {col.type}" for col in self.columns]
return f"Frame[{', '.join(schema)}]"
@dataclass(frozen=True, kw_only=True)
class Column:
index: int
name: Optional[str]
type: ColumnType
def substitute_typevars(type: Type, substitutions: dict[str, Type]) -> Type:
def sub_argument(arg: Function.Argument):
return Function.Argument(
@@ -165,6 +196,13 @@ def substitute_typevars(type: Type, substitutions: dict[str, Type]) -> Type:
required=arg.required,
)
def sub_column(col: DataFrameType.Column):
return DataFrameType.Column(
index=col.index,
name=col.name,
type=cast(ColumnType, substitute_typevars(col.type, substitutions)),
)
match type:
case TopType():
return type
@@ -250,10 +288,26 @@ def substitute_typevars(type: Type, substitutions: dict[str, Type]) -> Type:
body=substitute_typevars(body, substitutions),
)
case TupleType(items=items):
return TupleType(
items=tuple(substitute_typevars(item, substitutions) for item in items),
)
case ColumnType(type=items_type):
return ColumnType(
type=substitute_typevars(items_type, substitutions),
)
case DataFrameType(columns=columns):
return DataFrameType(
columns=list(map(sub_column, columns)),
)
case UnknownType() | UnitType():
return type
case TopType() | GenericType():
raise NotImplementedError(f"Unsupported type {type}")
# Ensure exhaustiveness
@@ -317,6 +371,15 @@ def to_annotation(type: Type) -> str:
case ConstraintType():
return str(type)
case TupleType(items=items):
return f"Tuple[{', '.join(map(to_annotation, items))}]"
case ColumnType():
return "pd.Series"
case DataFrameType():
return "pd.DataFrame"
case _:
assert_never(type)
@@ -342,4 +405,7 @@ Type = (
| GenericType
| AppliedType
| ConstraintType
| TupleType
| ColumnType
| DataFrameType
)

View File

@@ -134,9 +134,9 @@ class PythonHighlighter(
def visit_base_type(self, node: p.BaseType) -> None:
self.wrap(node, "base-type")
if node.param is not None:
self.wrap(node.param, "param")
node.param.accept(self)
for arg in node.args:
self.wrap(arg, "arg")
arg.accept(self)
def visit_constraint_type(self, node: p.ConstraintType) -> None:
self.wrap(node, "constraint-type")
@@ -247,6 +247,10 @@ class PythonHighlighter(
if expr.step is not None:
expr.step.accept(self)
def visit_tuple_expr(self, expr: p.TupleExpr) -> None:
for item in expr.items:
item.accept(self)
def visit_raw_expr(self, expr: p.RawExpr) -> None: ...
def visit_raw_stmt(self, stmt: p.RawStmt) -> None: ...
@@ -350,6 +354,14 @@ class MidasHighlighter(
for param in spec.pos + spec.mixed + spec.kw:
param.type.accept(self)
def visit_frame_type(self, type: m.FrameType) -> None:
self.wrap(type, "frame")
for column in type.columns:
self._visit_frame_column(column)
def _visit_frame_column(self, column: m.FrameType.Column) -> None:
self.wrap(column, "column")
class DiagnosticsHighlighter(Highlighter):
EXTRA_CSS_PATH: Optional[Path] = Path(__file__).parent / "hl_diagnostic.css"

View File

@@ -3,7 +3,7 @@ span {
--col: 108, 233, 108;
}
&.param {
&.arg {
--col: 103, 192, 224;
}

View File

@@ -68,7 +68,7 @@ class DiagnosticPrinter:
loc: Location = diagnostic.location
if loc.lineno != loc.end_lineno:
print(diagnostic)
self.print_multiline(lines, diagnostic, indent)
return
start_offset: int = loc.col_offset
@@ -95,3 +95,27 @@ class DiagnosticPrinter:
print(indent_str + before + subject + after)
print(indent_str + cursor)
print()
def print_multiline(
self, all_lines: list[str], diagnostic: Diagnostic, indent: int = 4
):
loc: Location = diagnostic.location
lines: list[str] = all_lines[loc.lineno - 1 : loc.end_lineno]
start_offset: int = loc.col_offset
end_offset: int = loc.end_col_offset or (start_offset + 1)
indent_str: str = " " * indent
color: int = self.COLORS.get(diagnostic.type, Ansi.WHITE)
res: str = indent_str + lines[0][:start_offset]
res += Ansi.FG(color) + lines[0][start_offset:]
for line in lines[1:-1]:
res += "\n" + indent_str + line
res += "\n" + indent_str + lines[-1][:end_offset]
res += Ansi.RESET + lines[-1][end_offset:]
print(diagnostic.location_str + ":")
print(res)
print()
print(Ansi.FG(color) + diagnostic.message + Ansi.RESET)
print()

View File

@@ -1,4 +1,5 @@
import ast
import logging
import shutil
from dataclasses import dataclass, field
from pathlib import Path
@@ -13,13 +14,16 @@ from midas.checker.types import (
AliasType,
AppliedType,
BaseType,
ColumnType,
ComplexType,
ConstraintType,
DataFrameType,
ExtensionType,
Function,
GenericType,
OverloadedFunction,
TopType,
TupleType,
Type,
TypeVar,
UnitType,
@@ -31,15 +35,19 @@ from midas.utils import TypedAST
@dataclass
class Scope:
pre_assertions: list[ast.stmt] = field(default_factory=list)
aliases: list[str] = field(default_factory=list)
pre_assertions: list[ast.stmt] = field(default_factory=list[ast.stmt])
aliases: list[str] = field(default_factory=list[str])
class Generator(p.Stmt.Visitor[ast.stmt], p.Expr.Visitor[ast.expr]):
IS_DATAFRAME_FUNC = "__midas_is_dataframe__"
IS_COLUMN_FUNC = "__midas_is_column__"
def __init__(self, workdir: Path, types: TypesRegistry) -> None:
self.workdir: Path = workdir.resolve()
self.build_dir: Path = self.workdir / "build" / "midas"
self.rel_src_path: Path = Path()
self.logger: logging.Logger = logging.getLogger("Generator")
self._typed_ast: TypedAST = TypedAST(
stmts=[],
@@ -53,12 +61,24 @@ class Generator(p.Stmt.Visitor[ast.stmt], p.Expr.Visitor[ast.expr]):
self._constraint_generator: ConstraintGenerator = ConstraintGenerator(types)
self._constraints: list[tuple[m.Expr, ast.expr]] = []
self.define_is_dataframe: bool = False
self.define_is_column: bool = False
def generate_ast(self, typed_ast: TypedAST, src_path: Path) -> ast.AST:
self.rel_src_path = src_path.resolve().relative_to(self.workdir)
self._typed_ast = typed_ast
body: list[ast.stmt] = self._visit_body(typed_ast.stmts)
predicates: list[ast.stmt] = self._constraint_generator.get_definitions()
module = ast.Module(body=predicates + body, type_ignores=[])
body = predicates + body
if self.define_is_dataframe:
body = [self._is_dataframe_definition()] + body
if self.define_is_column:
body = [self._is_column_definition()] + body
module = ast.Module(body=body, type_ignores=[])
module = ast.fix_missing_locations(module)
return module
@@ -139,7 +159,9 @@ class Generator(p.Stmt.Visitor[ast.stmt], p.Expr.Visitor[ast.expr]):
alias: ast.expr = self._make_alias(expr2)
type: Type = self._get_expr_type(expr)
self._make_cast_asserts(expr.location, alias, type)
asserts: list[ast.stmt] = self._make_cast_asserts(expr.location, alias, type)
for assert_ in asserts:
self._add_assert(assert_)
return alias
@@ -174,6 +196,11 @@ class Generator(p.Stmt.Visitor[ast.stmt], p.Expr.Visitor[ast.expr]):
step=expr.step.accept(self) if expr.step is not None else None,
)
def visit_tuple_expr(self, expr: p.TupleExpr) -> ast.expr:
return ast.Tuple(
elts=[item.accept(self) for item in expr.items],
)
def visit_raw_expr(self, expr: p.RawExpr) -> ast.expr:
return expr.expr
@@ -274,63 +301,156 @@ class Generator(p.Stmt.Visitor[ast.stmt], p.Expr.Visitor[ast.expr]):
)
return alias
def _add_assert(self, expr: ast.expr, message: str | ast.expr):
def _build_assert(self, expr: ast.expr, message: str | ast.expr) -> ast.stmt:
if isinstance(message, str):
message = ast.Constant(value=message)
self._scopes[-1].pre_assertions.append(
ast.Assert(
test=expr,
msg=message,
)
return ast.Assert(
test=expr,
msg=message,
)
def _add_assert(self, assertion: ast.stmt):
self._scopes[-1].pre_assertions.append(assertion)
def _get_expr_type(self, query: p.Expr) -> Type:
for expr, type in self._typed_ast.judgements:
if expr == query:
return type
raise RuntimeError(f"Cannot get type judgement for {query}")
def _make_cast_asserts(self, src_location: Location, expr: ast.expr, type: Type):
def _make_cast_asserts(
self, src_location: Location, expr: ast.expr, type: Type
) -> list[ast.stmt]:
match type:
case UnknownType():
pass
return []
case BaseType(name=name):
self._add_assert(
ast.Call(
func=ast.Name(id="isinstance"),
args=[expr, ast.Name(id=name)],
keywords=[],
),
self._make_cast_assert_message(src_location, expr, type),
)
return [
self._build_assert(
ast.Call(
func=ast.Name(id="isinstance"),
args=[expr, ast.Name(id=name)],
keywords=[],
),
self._make_cast_assert_message(src_location, expr, type),
)
]
case AliasType(type=base):
self._make_cast_asserts(src_location, expr, base)
return self._make_cast_asserts(src_location, expr, base)
case UnitType():
self._add_assert(
ast.Compare(
left=expr,
ops=[ast.Is()],
comparators=[
ast.Constant(value=None),
],
return [
self._build_assert(
ast.Compare(
left=expr,
ops=[ast.Is()],
comparators=[
ast.Constant(value=None),
],
),
self._make_cast_assert_message(src_location, expr, type),
),
self._make_cast_assert_message(src_location, expr, type),
)
]
case AppliedType(body=body):
self._make_cast_asserts(src_location, expr, body)
return self._make_cast_asserts(src_location, expr, body)
case ConstraintType(type=base, constraint=constraint):
self._make_cast_asserts(src_location, expr, base)
self._make_constraint_assert(src_location, expr, constraint)
asserts: list[ast.stmt] = self._make_cast_asserts(
src_location, expr, base
)
asserts.append(
self._make_constraint_assert(src_location, expr, constraint)
)
return asserts
case TypeVar(bound=bound):
# TODO: check with type from arguments / use call-site context
if bound is not None:
self._make_cast_asserts(src_location, expr, bound)
if bound is None:
return []
return self._make_cast_asserts(src_location, expr, bound)
case TupleType(items=items):
asserts: list[ast.stmt] = [
self._build_assert(
ast.Call(
func=ast.Name(id="isinstance"),
args=[expr, ast.Name(id="tuple")],
keywords=[],
),
self._make_cast_assert_message(src_location, expr, type),
),
]
assert isinstance(expr, ast.Tuple)
for item, item_type in zip(expr.elts, items):
asserts.extend(
self._make_cast_asserts(src_location, item, item_type)
)
return asserts
case DataFrameType(columns=columns):
self.define_is_dataframe = True
asserts: list[ast.stmt] = [
self._build_assert(
ast.Call(
func=ast.Name(id=self.IS_DATAFRAME_FUNC),
args=[expr],
keywords=[],
),
self._make_cast_assert_message(
src_location, expr, type, ": Not a dataframe"
),
),
]
for column in columns:
asserts.append(
self._build_assert(
ast.Compare(
left=ast.Constant(value=column.name),
ops=[ast.In()],
comparators=[expr],
),
self._make_cast_assert_message(
src_location,
expr,
type,
f": Missing column {column.name}",
),
)
)
asserts.extend(
self._make_cast_asserts(
src_location,
ast.Subscript(
value=expr, slice=ast.Constant(value=column.name)
),
column.type,
)
)
return asserts
case ColumnType():
self.define_is_column = True
asserts: list[ast.stmt] = [
self._build_assert(
ast.Call(
func=ast.Name(id=self.IS_COLUMN_FUNC),
args=[expr],
keywords=[],
),
self._make_cast_assert_message(
src_location, expr, type, ": Not a column"
),
),
]
inner_assert: Optional[ast.stmt] = self._make_column_inner_assert(
src_location, expr, type
)
if inner_assert is not None:
asserts.append(inner_assert)
return asserts
case (
TopType()
@@ -340,14 +460,19 @@ class Generator(p.Stmt.Visitor[ast.stmt], p.Expr.Visitor[ast.expr]):
| ExtensionType()
| GenericType()
):
raise NotImplementedError(f"Can't make assertion for type {type}")
self.logger.warning(f"Can't make assertion for type {type}")
return []
# Ensure exhaustiveness
case _:
assert_never(type)
def _make_cast_assert_message(
self, location: Location, expr: ast.expr, type: Type
self,
location: Location,
expr: ast.expr,
type: Type,
extra: Optional[str] = None,
) -> ast.expr:
loc_str: str = f"{self.rel_src_path}:L{location.lineno}:{location.col_offset+1}"
# f"file.py:L1:1: CastError: Cannot cast {type(expr).__name__} to Type"
@@ -365,15 +490,15 @@ class Generator(p.Stmt.Visitor[ast.stmt], p.Expr.Visitor[ast.expr]):
),
conversion=-1,
),
ast.Constant(f" to {type}"),
ast.Constant(f" to {type}{extra or ''}"),
]
)
def _make_constraint_assert(
self, src_location: Location, expr: ast.expr, constraint: m.Expr
):
) -> ast.stmt:
test_func: ast.expr = self._get_constraint(constraint)
self._add_assert(
return self._build_assert(
ast.Call(
func=test_func,
args=[expr],
@@ -401,3 +526,90 @@ class Generator(p.Stmt.Visitor[ast.stmt], p.Expr.Visitor[ast.expr]):
constraint: ast.expr = self._constraint_generator.generate(expr)
self._constraints.append((expr, constraint))
return constraint
def _is_dataframe_definition(self) -> ast.stmt:
"""
def IS_DATAFRAME_FUNC(obj) -> bool:
import pandas as pd
return isinstance(obj, pd.DataFrame)
"""
return ast.FunctionDef(
name=self.IS_DATAFRAME_FUNC,
args=ast.arguments(
posonlyargs=[ast.arg(arg="obj")],
args=[],
kwonlyargs=[],
defaults=[],
kw_defaults=[],
),
body=[
ast.Import(names=[ast.alias(name="pandas", asname="pd")]),
ast.Return(
value=ast.Call(
func=ast.Name(id="isinstance"),
args=[
ast.Name(id="obj"),
ast.Attribute(
value=ast.Name(id="pd"),
attr="DataFrame",
),
],
keywords=[],
)
),
],
decorator_list=[],
returns=ast.Name(id="bool"),
)
def _is_column_definition(self) -> ast.stmt:
"""
def IS_COLUMN_FUNC(obj) -> bool:
import pandas as pd
return isinstance(obj, pd.Series)
"""
return ast.FunctionDef(
name=self.IS_COLUMN_FUNC,
args=ast.arguments(
posonlyargs=[ast.arg(arg="obj")],
args=[],
kwonlyargs=[],
defaults=[],
kw_defaults=[],
),
body=[
ast.Import(names=[ast.alias(name="pandas", asname="pd")]),
ast.Return(
value=ast.Call(
func=ast.Name(id="isinstance"),
args=[
ast.Name(id="obj"),
ast.Attribute(
value=ast.Name(id="pd"),
attr="Series",
),
],
keywords=[],
)
),
],
decorator_list=[],
returns=ast.Name(id="bool"),
)
def _make_column_inner_assert(
self, src_location: Location, column: ast.expr, type: ColumnType
) -> Optional[ast.stmt]:
# TODO: improve message, maybe chain contexts
col: ast.expr = ast.Name(id="col")
body: list[ast.stmt] = self._make_cast_asserts(src_location, col, type.type)
if len(body) == 0:
return None
return ast.For(
target=col,
iter=column,
body=body,
orelse=[],
)

View File

@@ -7,13 +7,16 @@ from midas.checker.types import (
AliasType,
AppliedType,
BaseType,
ColumnType,
ComplexType,
ConstraintType,
DataFrameType,
ExtensionType,
Function,
GenericType,
OverloadedFunction,
TopType,
TupleType,
Type,
TypeVar,
UnitType,
@@ -30,6 +33,7 @@ class StubsGenerator:
self.types: TypesRegistry = types
self.stubs: list[ast.stmt] = []
self.typing_imports: set[str] = set()
self.import_pandas: bool = False
self.protocol_idx: int = 0
self.stub_idx: int = 0
self.type_var_idx: int = 0
@@ -38,6 +42,7 @@ class StubsGenerator:
def generate_stubs(self) -> ast.Module:
self.stubs = []
self.typing_imports = set()
self.import_pandas = False
for name, type in self.types._types.items():
# Skip builtin types, not just based on name so the user can override
# TODO: check if added members on builtin type
@@ -53,7 +58,7 @@ class StubsGenerator:
continue
self.generate_stub(name, type)
imports = [
imports: list[ast.stmt] = [
ast.ImportFrom(
module="__future__",
names=[ast.alias(name="annotations")],
@@ -70,6 +75,17 @@ class StubsGenerator:
level=0,
)
)
if self.import_pandas:
imports.append(
ast.Import(
names=[
ast.alias(
name="pandas",
asname="pd",
)
],
)
)
return ast.Module(body=imports + self.stubs, type_ignores=[])
def generate_stub(self, name: str, type: Type):
@@ -231,6 +247,31 @@ class StubsGenerator:
case ConstraintType():
return self.dump_type(type.type)
case TupleType(items=items):
return ast.Subscript(
value=ast.Name(id="tuple"),
slice=ast.Tuple(
elts=[self.dump_type(item) for item in items],
),
)
case ColumnType(type=inner):
self.import_pandas = True
return ast.Subscript(
value=ast.Attribute(
value=ast.Name(id="pd"),
attr="Series",
),
slice=self.dump_type(inner),
)
case DataFrameType():
self.import_pandas = True
return ast.Attribute(
value=ast.Name(id="pd"),
attr="DataFrame",
)
case _:
assert_never(type)

View File

@@ -9,6 +9,7 @@ from midas.ast.midas import (
Expr,
ExtendStmt,
ExtensionType,
FrameType,
FunctionType,
GenericType,
GetExpr,
@@ -204,8 +205,10 @@ class MidasParser(Parser):
return self.generic_type()
def generic_type(self) -> Type:
type: Type = self.named_type()
type: NamedType = self.named_type()
if self.check(TokenType.LEFT_BRACKET):
if type.name.lexeme == "Frame":
return self.frame_type()
args: list[Type] = self.type_args()
return GenericType(
location=Location.span(type.location, self.previous().get_location()),
@@ -224,7 +227,7 @@ class MidasParser(Parser):
self.consume(TokenType.RIGHT_BRACKET, "Missing ']' after generic arguments")
return args
def named_type(self) -> Type:
def named_type(self) -> NamedType:
name: Token = self.consume_identifier("Expected type name")
return NamedType(
location=name.get_location(),
@@ -259,6 +262,32 @@ class MidasParser(Parser):
members=members,
)
def frame_type(self) -> FrameType:
keyword: Token = self.previous()
self.consume(TokenType.LEFT_BRACKET, "Expected '[' to start frame schema")
columns: list[FrameType.Column] = []
while not self.check(TokenType.RIGHT_BRACKET) and not self.is_at_end():
name: Token = self.advance()
self.consume(TokenType.COLON, "Expected ':' between column name and type")
type: Type = self.type_expr()
columns.append(
FrameType.Column(
location=name.location_to(self.previous()),
name=name,
type=type,
)
)
if not self.match(TokenType.COMMA):
break
self.consume(TokenType.RIGHT_BRACKET, "Unclosed frame schema")
return FrameType(
location=keyword.location_to(self.previous()),
columns=columns,
)
def constraint(self) -> Expr:
"""Parse a constraint
@@ -348,7 +377,7 @@ class MidasParser(Parser):
pos_args: list[Expr] = []
kw_args: dict[str, Expr] = {}
keywords: bool = False
while not self.match(TokenType.RIGHT_PAREN):
while not self.check(TokenType.RIGHT_PAREN):
if self.check_identifier() and self.check_next(TokenType.EQUAL):
keywords = True
keyword: Token = self.advance()

View File

@@ -30,6 +30,7 @@ from midas.ast.python import (
Stmt,
SubscriptExpr,
TernaryExpr,
TupleExpr,
TypeAssign,
UnaryExpr,
VariableExpr,
@@ -300,26 +301,28 @@ class PythonParser:
case ast.Subscript(value=ast.Name(id="Frame"), slice=schema):
return self._parse_frame_type(schema)
case ast.Subscript(value=ast.Name(id=name), slice=param):
case ast.Subscript(value=ast.Name(id=name), slice=arg):
args: tuple[MidasType, ...] = (
tuple(self._parse_type(a) for a in arg.elts)
if isinstance(arg, ast.Tuple)
else (self._parse_type(arg),)
)
return BaseType(
location=loc,
base=name,
param=self._parse_type(param),
args=args,
)
case ast.Name(id=name):
return BaseType(
location=loc,
base=name,
param=None,
args=(),
)
case ast.BinOp(left=left_expr, op=ast.Add(), right=right_expr):
left = self._parse_type(left_expr)
match left:
case None:
raise InvalidSyntaxError()
# If chained constraints, separate base type and rebuild constraint
case ConstraintType(type=left_type, constraint=left_constraint):
constraint = ast.BinOp(
@@ -345,7 +348,7 @@ class PythonParser:
return BaseType(
location=loc,
base="None",
param=None,
args=(),
)
case _:
@@ -477,6 +480,12 @@ class PythonParser:
step=self.parse_expr(step) if step is not None else None,
)
case ast.Tuple(elts=items):
return TupleExpr(
location=location,
items=tuple(self.parse_expr(item) for item in items),
)
case _:
print(f"Unsupported expression: {ast.unparse(node)}")
return RawExpr(location=location, expr=node)

View File

@@ -1,3 +1,4 @@
from typing import Generic, TypeVar
from typing import cast as typing_cast
cast = typing_cast
@@ -32,3 +33,20 @@ This operation is unsound, use at your own risk!
_**Internal Python documentation**_
"""
T = TypeVar("T")
class Frame(Generic[T]):
"""A `Frame` is the abstract type implemented by `DataFrame`
A frame contains any number of named columns (see :class:`Column`)
"""
class Column(Generic[T]):
"""A `Column` is the abstract type implemented by `Series`
A column contains a any number of values of the same type
"""

View File

@@ -4,7 +4,35 @@
"type": "Warning",
"location": {
"start": [
6,
8,
12
],
"end": [
8,
43
]
},
"message": "ConstraintType not yet supported"
},
{
"type": "Warning",
"location": {
"start": [
10,
10
],
"end": [
10,
18
]
},
"message": "Unknown type 'datetime'"
},
{
"type": "Warning",
"location": {
"start": [
13,
4
],
"end": [
@@ -12,7 +40,7 @@
5
]
},
"message": "FrameType not yet supported"
"message": "Unknown type '_'"
}
],
"judgments": []

View File

@@ -328,6 +328,19 @@
},
"type": {}
},
{
"location": {
"from": "L6:9",
"to": "L6:10"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L6:5",
@@ -373,19 +386,6 @@
}
}
},
{
"location": {
"from": "L6:9",
"to": "L6:10"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L6:5",
@@ -407,6 +407,32 @@
},
"type": {}
},
{
"location": {
"from": "L7:9",
"to": "L7:10"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L7:12",
"to": "L7:15"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L7:5",
@@ -452,32 +478,6 @@
}
}
},
{
"location": {
"from": "L7:9",
"to": "L7:10"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L7:12",
"to": "L7:15"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L7:5",
@@ -503,6 +503,32 @@
},
"type": {}
},
{
"location": {
"from": "L8:9",
"to": "L8:10"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L8:14",
"to": "L8:17"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L8:5",
@@ -548,32 +574,6 @@
}
}
},
{
"location": {
"from": "L8:9",
"to": "L8:10"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L8:14",
"to": "L8:17"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L8:5",
@@ -600,6 +600,45 @@
},
"type": {}
},
{
"location": {
"from": "L9:9",
"to": "L9:10"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L9:12",
"to": "L9:15"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L9:17",
"to": "L9:23"
},
"expr": {
"_type": "LiteralExpr",
"value": "test"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L9:5",
@@ -645,45 +684,6 @@
}
}
},
{
"location": {
"from": "L9:9",
"to": "L9:10"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L9:12",
"to": "L9:15"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L9:17",
"to": "L9:23"
},
"expr": {
"_type": "LiteralExpr",
"value": "test"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L9:5",
@@ -713,6 +713,45 @@
},
"type": {}
},
{
"location": {
"from": "L10:9",
"to": "L10:10"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L10:12",
"to": "L10:15"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L10:19",
"to": "L10:22"
},
"expr": {
"_type": "LiteralExpr",
"value": 3.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L10:5",
@@ -758,45 +797,6 @@
}
}
},
{
"location": {
"from": "L10:9",
"to": "L10:10"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L10:12",
"to": "L10:15"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L10:19",
"to": "L10:22"
},
"expr": {
"_type": "LiteralExpr",
"value": 3.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L10:5",
@@ -827,6 +827,19 @@
},
"type": {}
},
{
"location": {
"from": "L11:11",
"to": "L11:12"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L11:5",
@@ -872,19 +885,6 @@
}
}
},
{
"location": {
"from": "L11:11",
"to": "L11:12"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L11:5",
@@ -906,6 +906,19 @@
},
"type": {}
},
{
"location": {
"from": "L12:11",
"to": "L12:17"
},
"expr": {
"_type": "LiteralExpr",
"value": "test"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L12:5",
@@ -951,19 +964,6 @@
}
}
},
{
"location": {
"from": "L12:11",
"to": "L12:17"
},
"expr": {
"_type": "LiteralExpr",
"value": "test"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L12:5",
@@ -985,6 +985,45 @@
},
"type": {}
},
{
"location": {
"from": "L14:10",
"to": "L14:11"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L14:13",
"to": "L14:16"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L14:20",
"to": "L14:26"
},
"expr": {
"_type": "LiteralExpr",
"value": "test"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L14:6",
@@ -1030,45 +1069,6 @@
}
}
},
{
"location": {
"from": "L14:10",
"to": "L14:11"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L14:13",
"to": "L14:16"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L14:20",
"to": "L14:26"
},
"expr": {
"_type": "LiteralExpr",
"value": "test"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L14:6",
@@ -1101,6 +1101,45 @@
"name": "bool"
}
},
{
"location": {
"from": "L15:10",
"to": "L15:11"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L15:15",
"to": "L15:18"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L15:22",
"to": "L15:28"
},
"expr": {
"_type": "LiteralExpr",
"value": "test"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L15:6",
@@ -1146,45 +1185,6 @@
}
}
},
{
"location": {
"from": "L15:10",
"to": "L15:11"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L15:15",
"to": "L15:18"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L15:22",
"to": "L15:28"
},
"expr": {
"_type": "LiteralExpr",
"value": "test"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L15:6",
@@ -1217,6 +1217,45 @@
"name": "bool"
}
},
{
"location": {
"from": "L16:10",
"to": "L16:11"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L16:15",
"to": "L16:21"
},
"expr": {
"_type": "LiteralExpr",
"value": "test"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L16:25",
"to": "L16:28"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L16:6",
@@ -1262,45 +1301,6 @@
}
}
},
{
"location": {
"from": "L16:10",
"to": "L16:11"
},
"expr": {
"_type": "LiteralExpr",
"value": 1
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L16:15",
"to": "L16:21"
},
"expr": {
"_type": "LiteralExpr",
"value": "test"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L16:25",
"to": "L16:28"
},
"expr": {
"_type": "LiteralExpr",
"value": 2.0
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L16:6",
@@ -1333,6 +1333,45 @@
"name": "bool"
}
},
{
"location": {
"from": "L18:10",
"to": "L18:13"
},
"expr": {
"_type": "LiteralExpr",
"value": "a"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L18:15",
"to": "L18:16"
},
"expr": {
"_type": "LiteralExpr",
"value": 3
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L18:20",
"to": "L18:25"
},
"expr": {
"_type": "LiteralExpr",
"value": false
},
"type": {
"name": "bool"
}
},
{
"location": {
"from": "L18:6",
@@ -1378,45 +1417,6 @@
}
}
},
{
"location": {
"from": "L18:10",
"to": "L18:13"
},
"expr": {
"_type": "LiteralExpr",
"value": "a"
},
"type": {
"name": "str"
}
},
{
"location": {
"from": "L18:15",
"to": "L18:16"
},
"expr": {
"_type": "LiteralExpr",
"value": 3
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L18:20",
"to": "L18:25"
},
"expr": {
"_type": "LiteralExpr",
"value": false
},
"type": {
"name": "bool"
}
},
{
"location": {
"from": "L18:6",

View File

@@ -24,7 +24,7 @@
"type": {
"_type": "BaseType",
"base": "Meter",
"param": null
"args": []
},
"expr": {
"_type": "LiteralExpr",
@@ -62,7 +62,7 @@
"type": {
"_type": "BaseType",
"base": "Second",
"param": null
"args": []
},
"expr": {
"_type": "LiteralExpr",

View File

@@ -100,6 +100,32 @@
"name": "float"
}
},
{
"location": {
"from": "L11:13",
"to": "L11:15"
},
"expr": {
"_type": "VariableExpr",
"name": "v1"
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L11:17",
"to": "L11:19"
},
"expr": {
"_type": "VariableExpr",
"name": "v2"
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L11:5",
@@ -135,32 +161,6 @@
}
}
},
{
"location": {
"from": "L11:13",
"to": "L11:15"
},
"expr": {
"_type": "VariableExpr",
"name": "v1"
},
"type": {
"name": "int"
}
},
{
"location": {
"from": "L11:17",
"to": "L11:19"
},
"expr": {
"_type": "VariableExpr",
"name": "v2"
},
"type": {
"name": "float"
}
},
{
"location": {
"from": "L11:5",

View File

@@ -72,29 +72,6 @@
}
],
"judgments": [
{
"location": {
"from": "L26:0",
"to": "L26:5"
},
"expr": {
"_type": "VariableExpr",
"name": "print"
},
"type": {
"pos_args": [
{
"pos": 0,
"name": "object",
"type": {},
"required": true
}
],
"args": [],
"kw_args": [],
"returns": {}
}
},
{
"location": {
"from": "L27:4",
@@ -325,6 +302,29 @@
}
}
},
{
"location": {
"from": "L26:0",
"to": "L26:5"
},
"expr": {
"_type": "VariableExpr",
"name": "print"
},
"type": {
"pos_args": [
{
"pos": 0,
"name": "object",
"type": {},
"required": false
}
],
"args": [],
"kw_args": [],
"returns": {}
}
},
{
"location": {
"from": "L26:0",

View File

@@ -63,31 +63,6 @@
"name": "float"
}
},
{
"location": {
"from": "L6:11",
"to": "L6:15"
},
"expr": {
"_type": "VariableExpr",
"name": "bool"
},
"type": {
"pos_args": [
{
"pos": 0,
"name": "object",
"type": {},
"required": false
}
],
"args": [],
"kw_args": [],
"returns": {
"name": "bool"
}
}
},
{
"location": {
"from": "L6:16",
@@ -135,6 +110,31 @@
"name": "int"
}
},
{
"location": {
"from": "L6:11",
"to": "L6:15"
},
"expr": {
"_type": "VariableExpr",
"name": "bool"
},
"type": {
"pos_args": [
{
"pos": 0,
"name": "object",
"type": {},
"required": false
}
],
"args": [],
"kw_args": [],
"returns": {
"name": "bool"
}
}
},
{
"location": {
"from": "L6:11",
@@ -367,6 +367,54 @@
}
}
},
{
"location": {
"from": "L12:21",
"to": "L12:27"
},
"expr": {
"_type": "VariableExpr",
"name": "double"
},
"type": {
"pos_args": [],
"args": [
{
"pos": 0,
"name": "value",
"type": {
"name": "float"
},
"required": true
}
],
"kw_args": [],
"returns": {
"name": "float"
}
}
},
{
"location": {
"from": "L12:29",
"to": "L12:35"
},
"expr": {
"_type": "VariableExpr",
"name": "floats"
},
"type": {
"name": "list",
"args": [
{
"name": "float"
}
],
"body": {
"name": "list"
}
}
},
{
"location": {
"from": "L12:17",
@@ -455,54 +503,6 @@
}
}
},
{
"location": {
"from": "L12:21",
"to": "L12:27"
},
"expr": {
"_type": "VariableExpr",
"name": "double"
},
"type": {
"pos_args": [],
"args": [
{
"pos": 0,
"name": "value",
"type": {
"name": "float"
},
"required": true
}
],
"kw_args": [],
"returns": {
"name": "float"
}
}
},
{
"location": {
"from": "L12:29",
"to": "L12:35"
},
"expr": {
"_type": "VariableExpr",
"name": "floats"
},
"type": {
"name": "list",
"args": [
{
"name": "float"
}
],
"body": {
"name": "list"
}
}
},
{
"location": {
"from": "L12:17",
@@ -538,6 +538,54 @@
}
}
},
{
"location": {
"from": "L13:19",
"to": "L13:25"
},
"expr": {
"_type": "VariableExpr",
"name": "double"
},
"type": {
"pos_args": [],
"args": [
{
"pos": 0,
"name": "value",
"type": {
"name": "float"
},
"required": true
}
],
"kw_args": [],
"returns": {
"name": "float"
}
}
},
{
"location": {
"from": "L13:27",
"to": "L13:31"
},
"expr": {
"_type": "VariableExpr",
"name": "ints"
},
"type": {
"name": "list",
"args": [
{
"name": "int"
}
],
"body": {
"name": "list"
}
}
},
{
"location": {
"from": "L13:15",
@@ -626,54 +674,6 @@
}
}
},
{
"location": {
"from": "L13:19",
"to": "L13:25"
},
"expr": {
"_type": "VariableExpr",
"name": "double"
},
"type": {
"pos_args": [],
"args": [
{
"pos": 0,
"name": "value",
"type": {
"name": "float"
},
"required": true
}
],
"kw_args": [],
"returns": {
"name": "float"
}
}
},
{
"location": {
"from": "L13:27",
"to": "L13:31"
},
"expr": {
"_type": "VariableExpr",
"name": "ints"
},
"type": {
"name": "list",
"args": [
{
"name": "int"
}
],
"body": {
"name": "list"
}
}
},
{
"location": {
"from": "L13:15",
@@ -699,6 +699,54 @@
},
"type": {}
},
{
"location": {
"from": "L14:15",
"to": "L14:21"
},
"expr": {
"_type": "VariableExpr",
"name": "is_odd"
},
"type": {
"pos_args": [],
"args": [
{
"pos": 0,
"name": "value",
"type": {
"name": "int"
},
"required": true
}
],
"kw_args": [],
"returns": {
"name": "bool"
}
}
},
{
"location": {
"from": "L14:23",
"to": "L14:27"
},
"expr": {
"_type": "VariableExpr",
"name": "ints"
},
"type": {
"name": "list",
"args": [
{
"name": "int"
}
],
"body": {
"name": "list"
}
}
},
{
"location": {
"from": "L14:11",
@@ -787,54 +835,6 @@
}
}
},
{
"location": {
"from": "L14:15",
"to": "L14:21"
},
"expr": {
"_type": "VariableExpr",
"name": "is_odd"
},
"type": {
"pos_args": [],
"args": [
{
"pos": 0,
"name": "value",
"type": {
"name": "int"
},
"required": true
}
],
"kw_args": [],
"returns": {
"name": "bool"
}
}
},
{
"location": {
"from": "L14:23",
"to": "L14:27"
},
"expr": {
"_type": "VariableExpr",
"name": "ints"
},
"type": {
"name": "list",
"args": [
{
"name": "int"
}
],
"body": {
"name": "list"
}
}
},
{
"location": {
"from": "L14:11",

View File

@@ -16,7 +16,7 @@
"type": {
"_type": "BaseType",
"base": "bool",
"param": null
"args": []
}
},
{
@@ -25,7 +25,7 @@
"type": {
"_type": "BaseType",
"base": "int",
"param": null
"args": []
}
},
{
@@ -36,7 +36,7 @@
"type": {
"_type": "BaseType",
"base": "float",
"param": null
"args": []
},
"constraint": "(_ > 0) + (_ < 250)"
}
@@ -47,7 +47,7 @@
"type": {
"_type": "BaseType",
"base": "str",
"param": null
"args": []
}
},
{
@@ -56,7 +56,7 @@
"type": {
"_type": "BaseType",
"base": "datetime",
"param": null
"args": []
}
},
{
@@ -65,7 +65,7 @@
"type": {
"_type": "BaseType",
"base": "float",
"param": null
"args": []
}
},
{
@@ -79,7 +79,7 @@
"type": {
"_type": "BaseType",
"base": "_",
"param": null
"args": []
}
}
]

View File

@@ -16,7 +16,7 @@
"type": {
"_type": "BaseType",
"base": "GeoLocation",
"param": null
"args": []
}
}
]
@@ -28,11 +28,13 @@
"type": {
"_type": "BaseType",
"base": "Column",
"param": {
"_type": "BaseType",
"base": "GeoLocation",
"param": null
}
"args": [
{
"_type": "BaseType",
"base": "GeoLocation",
"args": []
}
]
}
},
{
@@ -65,11 +67,13 @@
"type": {
"_type": "BaseType",
"base": "Column",
"param": {
"_type": "BaseType",
"base": "GeoLocation",
"param": null
}
"args": [
{
"_type": "BaseType",
"base": "GeoLocation",
"args": []
}
]
}
},
{
@@ -117,7 +121,7 @@
"type": {
"_type": "BaseType",
"base": "Latitude",
"param": null
"args": []
}
},
{
@@ -146,7 +150,7 @@
"type": {
"_type": "BaseType",
"base": "Latitude",
"param": null
"args": []
}
},
{
@@ -175,11 +179,13 @@
"type": {
"_type": "BaseType",
"base": "Difference",
"param": {
"_type": "BaseType",
"base": "Latitude",
"param": null
}
"args": [
{
"_type": "BaseType",
"base": "Latitude",
"args": []
}
]
}
},
{
@@ -217,7 +223,7 @@
"type": {
"_type": "BaseType",
"base": "int",
"param": null
"args": []
},
"constraint": "_ >= 0"
}
@@ -230,7 +236,7 @@
"type": {
"_type": "BaseType",
"base": "float",
"param": null
"args": []
},
"constraint": "_ >= 0"
}
@@ -252,7 +258,7 @@
"type": {
"_type": "BaseType",
"base": "int",
"param": null
"args": []
},
"constraint": "Positive"
}
@@ -265,7 +271,7 @@
"type": {
"_type": "BaseType",
"base": "float",
"param": null
"args": []
},
"constraint": "Positive"
}

View File

@@ -14,15 +14,17 @@
"type": {
"_type": "BaseType",
"base": "Column",
"param": {
"_type": "ConstraintType",
"type": {
"_type": "BaseType",
"base": "float",
"param": null
},
"constraint": "0 <= _ <= 1"
}
"args": [
{
"_type": "ConstraintType",
"type": {
"_type": "BaseType",
"base": "float",
"args": []
},
"constraint": "0 <= _ <= 1"
}
]
},
"default": null
},
@@ -31,15 +33,17 @@
"type": {
"_type": "BaseType",
"base": "Column",
"param": {
"_type": "ConstraintType",
"type": {
"_type": "BaseType",
"base": "float",
"param": null
},
"constraint": "0 <= _ <= 1"
}
"args": [
{
"_type": "ConstraintType",
"type": {
"_type": "BaseType",
"base": "float",
"args": []
},
"constraint": "0 <= _ <= 1"
}
]
},
"default": null
}
@@ -50,15 +54,17 @@
"returns": {
"_type": "BaseType",
"base": "Column",
"param": {
"_type": "ConstraintType",
"type": {
"_type": "BaseType",
"base": "float",
"param": null
},
"constraint": "0 <= _ <= 2"
}
"args": [
{
"_type": "ConstraintType",
"type": {
"_type": "BaseType",
"base": "float",
"args": []
},
"constraint": "0 <= _ <= 2"
}
]
},
"body": [
{
@@ -67,15 +73,17 @@
"type": {
"_type": "BaseType",
"base": "Column",
"param": {
"_type": "ConstraintType",
"type": {
"_type": "BaseType",
"base": "float",
"param": null
},
"constraint": "0 <= _ <= 2"
}
"args": [
{
"_type": "ConstraintType",
"type": {
"_type": "BaseType",
"base": "float",
"args": []
},
"constraint": "0 <= _ <= 2"
}
]
}
},
{
@@ -117,7 +125,7 @@
"type": {
"_type": "BaseType",
"base": "int",
"param": null
"args": []
},
"default": null
}
@@ -128,7 +136,7 @@
"type": {
"_type": "BaseType",
"base": "float",
"param": null
"args": []
},
"default": null
}
@@ -140,7 +148,7 @@
"type": {
"_type": "BaseType",
"base": "str",
"param": null
"args": []
},
"default": null
}

View File

@@ -8,6 +8,7 @@ from midas.ast.midas import (
Expr,
ExtendStmt,
ExtensionType,
FrameType,
FunctionType,
GenericType,
GetExpr,
@@ -197,3 +198,15 @@ class MidasAstJsonSerializer(
"base": type.base.accept(self),
"extension": type.extension.accept(self),
}
def visit_frame_type(self, type: FrameType) -> dict:
return {
"_type": "FrameType",
"columns": [self._serialize_column(col) for col in type.columns],
}
def _serialize_column(self, column: FrameType.Column):
return {
"name": column.name.lexeme,
"type": column.type.accept(self),
}

View File

@@ -30,6 +30,7 @@ from midas.ast.python import (
Stmt,
SubscriptExpr,
TernaryExpr,
TupleExpr,
TypeAssign,
UnaryExpr,
VariableExpr,
@@ -98,7 +99,7 @@ class PythonAstJsonSerializer(
return {
"_type": "BaseType",
"base": node.base,
"param": self._serialize_optional(node.param),
"args": self._serialize_list(node.args),
}
def visit_constraint_type(self, node: ConstraintType) -> dict:
@@ -302,6 +303,12 @@ class PythonAstJsonSerializer(
"step": self._serialize_optional(expr.step),
}
def visit_tuple_expr(self, expr: TupleExpr) -> dict:
return {
"_type": "TupleExpr",
"items": [item.accept(self) for item in expr.items],
}
def visit_raw_expr(self, expr: RawExpr) -> dict:
return {
"_type": "RawExpr",