From 10ee4991c3799a25edc230cc6c4ed510fd83d6c3 Mon Sep 17 00:00:00 2001
From: LordBaryhobal <lordbaryhobal@gmail.com>
Date: Wed, 13 May 2026 19:26:09 +0200
Subject: [PATCH] feat(parser): add a basic lexer for annotations

---
 lexer/annotations.py | 81 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 lexer/annotations.py

diff --git a/lexer/annotations.py b/lexer/annotations.py
new file mode 100644
index 0000000..b8c7cf7
--- /dev/null
+++ b/lexer/annotations.py
@@ -0,0 +1,81 @@
+from lexer.base import Lexer
+from lexer.token import TokenType
+
+
+class AnnotationLexer(Lexer):
+    def scan_token(self) -> None:
+        char: str = self.advance()
+        match char:
+            case "(":
+                self.add_token(TokenType.LEFT_PAREN)
+            case ")":
+                self.add_token(TokenType.RIGHT_PAREN)
+            case "[":
+                self.add_token(TokenType.LEFT_BRACKET)
+            case "]":
+                self.add_token(TokenType.RIGHT_BRACKET)
+            case ":":
+                self.add_token(TokenType.COLON)
+            case ",":
+                self.add_token(TokenType.COMMA)
+            case "_":
+                self.add_token(TokenType.UNDERSCORE)
+            case "+":
+                self.add_token(TokenType.PLUS)
+            case "#":
+                self.scan_comment()
+            case "\n":
+                self.add_token(TokenType.NEWLINE)
+            case " " | "\r" | "\t":
+                # Consume all whitespace characters until EOL or EOF
+                while (
+                    self.peek().isspace()
+                    and self.peek() != "\n"
+                    and not self.is_at_end()
+                ):
+                    self.advance()
+                self.add_token(TokenType.WHITESPACE)
+            case _:
+                if char.isdigit():
+                    self.scan_number()
+                elif char.isalpha():
+                    self.scan_identifier()
+                else:
+                    self.error("Unexpected character")
+        return None
+
+    def scan_number(self):
+        """Scan the rest of number and add it as a token
+        
+        This method handles both simple integers and floats. Scientific notation
+        and base prefixes (0x, 0b, 0o) are not supported
+        """
+        while self.peek().isdigit():
+            self.advance()
+
+        if self.peek() == "." and self.peek_next().isdigit():
+            self.advance()
+            while self.peek().isdigit():
+                self.advance()
+
+        value: float = float(self.source[self.start : self.idx])
+        self.add_token(TokenType.NUMBER, value)
+
+    def scan_identifier(self):
+        """Scan the rest of an identifier and add it as a token
+        
+        An identifier starts with a letter, followed by any number of
+        alphanumerical characters or underscores
+        """
+        while self.peek().isalnum() or self.peek() == "_":
+            self.advance()
+        self.add_token(TokenType.IDENTIFIER)
+
+    def scan_comment(self):
+        """Scan the rest of a comment and add it as a token
+        
+        A comment starts with a '#' character and ends at the EOL/EOF
+        """
+        while self.peek() != "\n" and not self.is_at_end():
+            self.advance()
+        self.add_token(TokenType.COMMENT)