diff --git a/interpreter/Tokenizer.py b/interpreter/Tokenizer.py index 4fb208a..4a0d148 100644 --- a/interpreter/Tokenizer.py +++ b/interpreter/Tokenizer.py @@ -16,6 +16,7 @@ class Tokenizer: self.source_text = f.read() self.source_index = 0 self.line_number = 1 + self.column_number = 0 self.source_text = re.sub("(private)|(public)|(protected)", "", self.source_text) @@ -37,7 +38,7 @@ class Tokenizer: token = self._get_token(char) logging.debug(f"found token \"{token}\" on line {self.line_number}") - tokens.append(make_token(token, SourceLocation(self._filename, self.line_number), self.type_name_pattern)) + tokens.append(make_token(token, SourceLocation(self._filename, self.line_number, self.column_number), self.type_name_pattern)) return tokens @@ -78,8 +79,10 @@ class Tokenizer: if char == '\n': self.line_number += 1 + self.column_number = 1 self.source_index += 1 + self.column_number += 1 return char def _consume_multiline_comment(self): diff --git a/interpreter/_token.py b/interpreter/_token.py index 1fad304..ef8aab7 100644 --- a/interpreter/_token.py +++ b/interpreter/_token.py @@ -1,6 +1,5 @@ """Private definitions for Token class used by the Lexer""" -import logging import re from enum import IntEnum @@ -12,7 +11,7 @@ STRING_LITERAL_PATTERN = re.compile(r"""('|\")(.*)(\"|')""") MATH_OP_PATTERN = re.compile(r"""\+|-|\*|/|<|>""") class Token_type(IntEnum): - UNKNOWN=-1 #maybe this should be renamed to IDENTIFIERs + UNKNOWN=-1 #maybe this should be renamed to IDENTIFIER LEFT_PAREN=0, RIGTH_PAREN=1, LEFT_CURLY=2, @@ -34,15 +33,22 @@ class Token_type(IntEnum): TYPE_NAME=18 class SourceLocation: - def __init__(self, filename: str, line: int) -> None: + + __slots__ = ["filename", "line", "column"] + + def __init__(self, filename: str, line: int, column: int) -> None: self.filename = filename self.line = line + self.column = column def __str__(self) -> str: - return f"File {self.filename}, line {self.line}" + return f"File {self.filename}, {self.line}:{self.column}" class Token: - def __init__(self, type: Token_type, location: SourceLocation, content: Union[str, None]=None) -> None: + + __slots__ = ["type", "content", "location"] + + def __init__(self, type: Token_type, location: SourceLocation, content: str) -> None: self.type = type self.content = content self.location = location @@ -53,6 +59,7 @@ class Token: return f"{self.type}" def make_token(tag: str, location: SourceLocation, type_name_pattern:re.Pattern) -> Token: + """Construct a token object with the provided tag and source location""" if tag == '(': return Token(Token_type.LEFT_PAREN, location, tag) elif tag == ')':