Added more location data to Token objects so lexing error messages can be more helpful
This commit is contained in:
@@ -16,6 +16,7 @@ class Tokenizer:
|
||||
self.source_text = f.read()
|
||||
self.source_index = 0
|
||||
self.line_number = 1
|
||||
self.column_number = 0
|
||||
|
||||
self.source_text = re.sub("(private)|(public)|(protected)", "", self.source_text)
|
||||
|
||||
@@ -37,7 +38,7 @@ class Tokenizer:
|
||||
|
||||
token = self._get_token(char)
|
||||
logging.debug(f"found token \"{token}\" on line {self.line_number}")
|
||||
tokens.append(make_token(token, SourceLocation(self._filename, self.line_number), self.type_name_pattern))
|
||||
tokens.append(make_token(token, SourceLocation(self._filename, self.line_number, self.column_number), self.type_name_pattern))
|
||||
|
||||
return tokens
|
||||
|
||||
@@ -78,8 +79,10 @@ class Tokenizer:
|
||||
|
||||
if char == '\n':
|
||||
self.line_number += 1
|
||||
self.column_number = 1
|
||||
|
||||
self.source_index += 1
|
||||
self.column_number += 1
|
||||
return char
|
||||
|
||||
def _consume_multiline_comment(self):
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"""Private definitions for Token class used by the Lexer"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from enum import IntEnum
|
||||
@@ -12,7 +11,7 @@ STRING_LITERAL_PATTERN = re.compile(r"""('|\")(.*)(\"|')""")
|
||||
MATH_OP_PATTERN = re.compile(r"""\+|-|\*|/|<|>""")
|
||||
|
||||
class Token_type(IntEnum):
|
||||
UNKNOWN=-1 #maybe this should be renamed to IDENTIFIERs
|
||||
UNKNOWN=-1 #maybe this should be renamed to IDENTIFIER
|
||||
LEFT_PAREN=0,
|
||||
RIGTH_PAREN=1,
|
||||
LEFT_CURLY=2,
|
||||
@@ -34,15 +33,22 @@ class Token_type(IntEnum):
|
||||
TYPE_NAME=18
|
||||
|
||||
class SourceLocation:
|
||||
def __init__(self, filename: str, line: int) -> None:
|
||||
|
||||
__slots__ = ["filename", "line", "column"]
|
||||
|
||||
def __init__(self, filename: str, line: int, column: int) -> None:
|
||||
self.filename = filename
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"File {self.filename}, line {self.line}"
|
||||
return f"File {self.filename}, {self.line}:{self.column}"
|
||||
|
||||
class Token:
|
||||
def __init__(self, type: Token_type, location: SourceLocation, content: Union[str, None]=None) -> None:
|
||||
|
||||
__slots__ = ["type", "content", "location"]
|
||||
|
||||
def __init__(self, type: Token_type, location: SourceLocation, content: str) -> None:
|
||||
self.type = type
|
||||
self.content = content
|
||||
self.location = location
|
||||
@@ -53,6 +59,7 @@ class Token:
|
||||
return f"{self.type}"
|
||||
|
||||
def make_token(tag: str, location: SourceLocation, type_name_pattern:re.Pattern) -> Token:
|
||||
"""Construct a token object with the provided tag and source location"""
|
||||
if tag == '(':
|
||||
return Token(Token_type.LEFT_PAREN, location, tag)
|
||||
elif tag == ')':
|
||||
|
||||
Reference in New Issue
Block a user