91 lines
3.1 KiB
Python
91 lines
3.1 KiB
Python
"""Private definitions for Token class used by the Lexer"""
|
|
|
|
import re
|
|
from dataclasses import dataclass, field
|
|
from enum import IntEnum
|
|
from typing import Union
|
|
|
|
NUMERIC_CONSTANT_PATTERN = re.compile(r"""([0-9]+)|(true)|(false)""")
|
|
KEYWORD_PATTERN = re.compile(r"""(return)$|(continue)$|(break)$|(new)$""")
|
|
STRING_LITERAL_PATTERN = re.compile(r"""('|\")(.*)(\"|')""")
|
|
MATH_OP_PATTERN = re.compile(r"""\+|-|\*|/|<|>""")
|
|
|
|
class Token_type(IntEnum):
|
|
UNKNOWN=-1 #maybe this should be renamed to IDENTIFIER
|
|
LEFT_PAREN=0,
|
|
RIGTH_PAREN=1,
|
|
LEFT_CURLY=2,
|
|
RIGHT_CURLY=3,
|
|
LEFT_BRACKET=4,
|
|
RIGHT_BRACKET=5,
|
|
COMMA=6,
|
|
EQUAL_SIGN=7,
|
|
SEMICOLON=8
|
|
MATH_OP=9
|
|
NUMERIC_CONSTANT=10,
|
|
IF_STATEMENT=11,
|
|
ELSE_STATEMENT=12,
|
|
WHILE_STATEMENT=13,
|
|
DO_WHILE_STATEMENT=14,
|
|
FOR_STATEMENT=15,
|
|
KEY_WORD=16,
|
|
STRING_LITERAL=17
|
|
TYPE_NAME=18
|
|
|
|
@dataclass(frozen=True)
|
|
class SourceLocation:
|
|
file: str
|
|
line: int
|
|
column: int
|
|
|
|
def __str__(self) -> str:
|
|
return f"File {self.file} {self.line}:{self.column}"
|
|
|
|
@dataclass(frozen=True, eq=True)
|
|
class Token:
|
|
type: Token_type
|
|
location: SourceLocation = field(compare=False, default=SourceLocation("", 0, 0))
|
|
content: str = field(compare=False, default="")
|
|
|
|
def make_token(tag: str, location: SourceLocation, type_name_pattern:re.Pattern) -> Token:
|
|
"""Construct a token object with the provided tag and source location"""
|
|
if tag == '(':
|
|
return Token(Token_type.LEFT_PAREN, location, tag)
|
|
elif tag == ')':
|
|
return Token(Token_type.RIGTH_PAREN, location, tag)
|
|
elif tag == '{':
|
|
return Token(Token_type.LEFT_CURLY, location, tag)
|
|
elif tag == '}':
|
|
return Token(Token_type.RIGHT_CURLY, location, tag)
|
|
elif tag == '[':
|
|
return Token(Token_type.LEFT_BRACKET, location, tag)
|
|
elif tag == ']':
|
|
return Token(Token_type.RIGHT_BRACKET, location, tag)
|
|
elif tag == ',':
|
|
return Token(Token_type.COMMA, location, tag)
|
|
elif tag == '=':
|
|
return Token(Token_type.EQUAL_SIGN, location, tag)
|
|
elif tag == ';':
|
|
return Token(Token_type.SEMICOLON, location, tag)
|
|
elif MATH_OP_PATTERN.match(tag):
|
|
return Token(Token_type.MATH_OP, location, tag)
|
|
elif NUMERIC_CONSTANT_PATTERN.match(tag):
|
|
return Token(Token_type.NUMERIC_CONSTANT, location, tag)
|
|
elif tag == "if":
|
|
return Token(Token_type.IF_STATEMENT, location, tag)
|
|
elif tag == "else":
|
|
return Token(Token_type.ELSE_STATEMENT, location, tag)
|
|
elif tag == "while":
|
|
return Token(Token_type.WHILE_STATEMENT, location, tag)
|
|
elif tag == "do":
|
|
return Token(Token_type.DO_WHILE_STATEMENT, location, tag)
|
|
elif tag == "for":
|
|
return Token(Token_type.FOR_STATEMENT, location, tag)
|
|
elif KEYWORD_PATTERN.match(tag):
|
|
return Token(Token_type.KEY_WORD, location, tag)
|
|
elif STRING_LITERAL_PATTERN.match(tag):
|
|
return Token(Token_type.STRING_LITERAL, location, tag)
|
|
elif type_name_pattern.match(tag):
|
|
return Token(Token_type.TYPE_NAME, location, tag)
|
|
else:
|
|
return Token(Token_type.UNKNOWN, location, tag) |