Source code for yapcad.dsl.tokens

"""
Token types for the yapCAD DSL lexer.

yapCAD DSL v2 - Pythonic Syntax

Token type categories follow error code ranges from the roadmap:
- E0xx: Lexer errors
- E1xx: Parser errors
- E2xx: Type errors
- E3xx: Semantic errors
"""

from enum import Enum, auto
from dataclasses import dataclass
from typing import Any, Optional



[docs]
class TokenType(Enum):
    """All token types recognized by the DSL lexer."""

    # --- Literals ---
    INT_LITERAL = auto()        # 42, 0xff, 0b1010
    FLOAT_LITERAL = auto()      # 3.14, 1e-9, 2.5E+10
    STRING_LITERAL = auto()     # "hello", """multi\nline"""
    BOOL_LITERAL = auto()       # True, False

    # --- Identifiers ---
    IDENTIFIER = auto()         # user-defined names

    # --- Keywords (Pythonic) ---
    MODULE = auto()             # module
    USE = auto()                # use (import-like)
    DEF = auto()                # def (function definition)
    RETURN = auto()             # return
    EMIT = auto()               # emit (with metadata)
    IF = auto()                 # if
    ELIF = auto()               # elif
    ELSE = auto()               # else
    FOR = auto()                # for
    IN = auto()                 # in
    WHILE = auto()              # while (DEPRECATED - removed for static verifiability)
    ASSERT = auto()             # assert
    PASS = auto()               # pass
    AS = auto()                 # as
    MATCH = auto()              # match (pattern matching)
    EXPORT = auto()             # export
    NATIVE = auto()             # native (for @native decorator)

    # --- Deprecated keywords (kept for error messages) ---
    COMMAND = auto()            # command (use 'def' instead)
    LET = auto()                # let (no longer needed)
    REQUIRE = auto()            # require (use 'assert' instead)
    WITH = auto()               # with (use emit kwargs instead)
    PYTHON = auto()             # python (use @native instead)
    FN = auto()                 # fn (use def instead)
    EXPORTS = auto()            # exports (use @native instead)

    # --- Closure keywords ---
    CLOSE = auto()              # close
    CLOSE_C0 = auto()           # closeC0
    CLOSE_C1 = auto()           # closeC1

    # --- Type keywords: Tier 1 - Primitives ---
    TYPE_INT = auto()           # int
    TYPE_FLOAT = auto()         # float
    TYPE_STRING = auto()        # string / str
    TYPE_BOOL = auto()          # bool
    TYPE_POINT = auto()         # point
    TYPE_POINT2D = auto()       # point2d
    TYPE_POINT3D = auto()       # point3d
    TYPE_VECTOR = auto()        # vector
    TYPE_VECTOR2D = auto()      # vector2d
    TYPE_VECTOR3D = auto()      # vector3d
    TYPE_TRANSFORM = auto()     # transform

    # --- Type keywords: Tier 2 - Curves ---
    TYPE_LINE_SEGMENT = auto()  # line_segment
    TYPE_ARC = auto()           # arc
    TYPE_CIRCLE = auto()        # circle
    TYPE_ELLIPSE = auto()       # ellipse
    TYPE_PARABOLA = auto()      # parabola
    TYPE_HYPERBOLA = auto()     # hyperbola
    TYPE_CATMULLROM = auto()    # catmullrom
    TYPE_NURBS = auto()         # nurbs
    TYPE_BEZIER = auto()        # bezier

    # --- Type keywords: Tier 3 - Compound curves ---
    TYPE_PATH2D = auto()        # path2d
    TYPE_PATH3D = auto()        # path3d
    TYPE_PROFILE2D = auto()     # profile2d
    TYPE_REGION2D = auto()      # region2d
    TYPE_LOOP3D = auto()        # loop3d

    # --- Type keywords: Tier 4 - Surfaces ---
    TYPE_SURFACE = auto()       # surface
    TYPE_SHELL = auto()         # shell

    # --- Type keywords: Tier 5 - Solids ---
    TYPE_SOLID = auto()         # solid

    # --- Generic type keywords ---
    TYPE_LIST = auto()          # list
    TYPE_DICT = auto()          # dict

    # --- Arithmetic operators ---
    PLUS = auto()               # +
    MINUS = auto()              # -
    STAR = auto()               # *
    SLASH = auto()              # /
    DOUBLE_SLASH = auto()       # // (integer division)
    PERCENT = auto()            # %
    DOUBLE_STAR = auto()        # ** (power)

    # --- Comparison operators ---
    LT = auto()                 # <
    GT = auto()                 # >
    LE = auto()                 # <=
    GE = auto()                 # >=
    EQ = auto()                 # ==
    NE = auto()                 # !=

    # --- Logical operators (keyword-based, Python style) ---
    AND = auto()                # and
    OR = auto()                 # or
    NOT = auto()                # not

    # --- Assignment ---
    ASSIGN = auto()             # =
    PLUS_ASSIGN = auto()        # += (future)
    MINUS_ASSIGN = auto()       # -= (future)

    # --- Delimiters ---
    LBRACE = auto()             # { (for dict literals only)
    RBRACE = auto()             # } (for dict literals only)
    LPAREN = auto()             # (
    RPAREN = auto()             # )
    LBRACKET = auto()           # [
    RBRACKET = auto()           # ]
    COLON = auto()              # :
    SEMICOLON = auto()          # ; (deprecated, kept for error messages)
    COMMA = auto()              # ,
    DOT = auto()                # .
    ARROW = auto()              # ->
    DOUBLE_ARROW = auto()       # => (for lambdas)
    RANGE = auto()              # .. (range literal syntax)
    QUESTION = auto()           # ? (optional type)
    UNDERSCORE = auto()         # _ (wildcard in match)
    AT = auto()                 # @ (decorator)
    HASH = auto()               # # (comment start - usually skipped)

    # --- Indentation tokens (Python-style blocks) ---
    INDENT = auto()             # Increase in indentation level
    DEDENT = auto()             # Decrease in indentation level
    NEWLINE = auto()            # Significant newline (end of statement)

    # --- Special ---
    EOF = auto()                # end of file

    # --- Native/Python block content ---
    NATIVE_BLOCK = auto()       # Content of @native decorated function




[docs]
@dataclass(frozen=True)
class SourceLocation:
    """Represents a position in source code."""
    line: int           # 1-indexed line number
    column: int         # 1-indexed column number
    offset: int         # 0-indexed character offset from start
    filename: Optional[str] = None

    def __str__(self) -> str:
        if self.filename:
            return f"{self.filename}:{self.line}:{self.column}"
        return f"{self.line}:{self.column}"




[docs]
@dataclass(frozen=True)
class SourceSpan:
    """Represents a range in source code."""
    start: SourceLocation
    end: SourceLocation

    def __str__(self) -> str:
        if self.start.filename:
            return f"{self.start.filename}:{self.start.line}:{self.start.column}-{self.end.line}:{self.end.column}"
        return f"{self.start.line}:{self.start.column}-{self.end.line}:{self.end.column}"




[docs]
@dataclass(frozen=True)
class Token:
    """A single token from the lexer."""
    type: TokenType
    value: Any              # The actual value (int, float, str, etc.)
    lexeme: str             # The original source text
    span: SourceSpan        # Location in source

    def __str__(self) -> str:
        if self.type in (TokenType.INT_LITERAL, TokenType.FLOAT_LITERAL,
                         TokenType.STRING_LITERAL, TokenType.IDENTIFIER):
            return f"{self.type.name}({self.value!r})"
        return self.type.name



# Keyword mapping - maps string to token type
KEYWORDS: dict[str, TokenType] = {
    # Core keywords (Pythonic)
    "module": TokenType.MODULE,
    "use": TokenType.USE,
    "def": TokenType.DEF,
    "return": TokenType.RETURN,
    "emit": TokenType.EMIT,
    "if": TokenType.IF,
    "elif": TokenType.ELIF,
    "else": TokenType.ELSE,
    "for": TokenType.FOR,
    "in": TokenType.IN,
    "while": TokenType.WHILE,
    "assert": TokenType.ASSERT,
    "pass": TokenType.PASS,
    "as": TokenType.AS,
    "match": TokenType.MATCH,
    "export": TokenType.EXPORT,
    "native": TokenType.NATIVE,

    # Logical operators (Python style)
    "and": TokenType.AND,
    "or": TokenType.OR,
    "not": TokenType.NOT,

    # Deprecated keywords (kept for helpful error messages)
    "command": TokenType.COMMAND,
    "let": TokenType.LET,
    "require": TokenType.REQUIRE,
    "with": TokenType.WITH,
    "python": TokenType.PYTHON,
    "fn": TokenType.FN,
    "exports": TokenType.EXPORTS,

    # Closure
    "close": TokenType.CLOSE,
    "closeC0": TokenType.CLOSE_C0,
    "closeC1": TokenType.CLOSE_C1,

    # Boolean literals (Python style: True/False)
    "True": TokenType.BOOL_LITERAL,
    "False": TokenType.BOOL_LITERAL,
    # Also accept lowercase for compatibility
    "true": TokenType.BOOL_LITERAL,
    "false": TokenType.BOOL_LITERAL,

    # Tier 1 types
    "int": TokenType.TYPE_INT,
    "float": TokenType.TYPE_FLOAT,
    "string": TokenType.TYPE_STRING,
    "str": TokenType.TYPE_STRING,  # Python alias
    "bool": TokenType.TYPE_BOOL,
    "point": TokenType.TYPE_POINT,
    "point2d": TokenType.TYPE_POINT2D,
    "point3d": TokenType.TYPE_POINT3D,
    "vector": TokenType.TYPE_VECTOR,
    "vector2d": TokenType.TYPE_VECTOR2D,
    "vector3d": TokenType.TYPE_VECTOR3D,
    "transform": TokenType.TYPE_TRANSFORM,

    # Tier 2 types
    "line_segment": TokenType.TYPE_LINE_SEGMENT,
    "arc": TokenType.TYPE_ARC,
    "circle": TokenType.TYPE_CIRCLE,
    "ellipse": TokenType.TYPE_ELLIPSE,
    "parabola": TokenType.TYPE_PARABOLA,
    "hyperbola": TokenType.TYPE_HYPERBOLA,
    "catmullrom": TokenType.TYPE_CATMULLROM,
    "nurbs": TokenType.TYPE_NURBS,
    "bezier": TokenType.TYPE_BEZIER,

    # Tier 3 types
    "path2d": TokenType.TYPE_PATH2D,
    "path3d": TokenType.TYPE_PATH3D,
    "profile2d": TokenType.TYPE_PROFILE2D,
    "region2d": TokenType.TYPE_REGION2D,
    "loop3d": TokenType.TYPE_LOOP3D,

    # Tier 4 types
    "surface": TokenType.TYPE_SURFACE,
    "shell": TokenType.TYPE_SHELL,

    # Tier 5 types
    "solid": TokenType.TYPE_SOLID,

    # Generic types
    "list": TokenType.TYPE_LIST,
    "dict": TokenType.TYPE_DICT,
}


# Set of deprecated keywords for helpful error messages
DEPRECATED_KEYWORDS: set[str] = {
    "command",  # Use 'def' instead
    "let",      # No longer needed, just use 'name: type = value' or 'name = value'
    "require",  # Use 'assert' instead
    "fn",       # Use 'def' instead
    "exports",  # Use @native decorator instead
    "while",    # Removed for static verifiability - use 'for x in range(n)' instead
}

# Mapping of deprecated keyword to suggestion
DEPRECATED_SUGGESTIONS: dict[str, str] = {
    "command": "Use 'def' to define functions",
    "let": "Variable declarations no longer need 'let'. Use 'name: type = value' or 'name = value'",
    "require": "Use 'assert condition, \"message\"' instead",
    "fn": "Use 'def' to define functions",
    "exports": "Use '@native' decorator instead of 'native python { } exports { }'",
    "with": "Use keyword arguments with emit: 'emit value, name=\"x\", material=\"y\"'",
    "while": "'while' loops are not supported (removed for static verifiability). Use 'for i in range(max_iterations)' with early return instead",
}



[docs]
def is_type_token(token_type: TokenType) -> bool:
    """Check if a token type represents a type keyword."""
    return token_type.name.startswith("TYPE_")




[docs]
def is_deprecated_keyword(keyword: str) -> bool:
    """Check if a keyword is deprecated."""
    return keyword in DEPRECATED_KEYWORDS




[docs]
def get_deprecation_message(keyword: str) -> Optional[str]:
    """Get the deprecation message for a keyword, if any."""
    return DEPRECATED_SUGGESTIONS.get(keyword)