Added more location data to Token objects so lexing error messages can be more helpful

This commit is contained in:
weckyy702
2021-04-03 23:34:24 +02:00
parent 3bc93fa3a7
commit f66c5d4321
2 changed files with 16 additions and 6 deletions

View File

@@ -16,6 +16,7 @@ class Tokenizer:
self.source_text = f.read() self.source_text = f.read()
self.source_index = 0 self.source_index = 0
self.line_number = 1 self.line_number = 1
self.column_number = 0
self.source_text = re.sub("(private)|(public)|(protected)", "", self.source_text) self.source_text = re.sub("(private)|(public)|(protected)", "", self.source_text)
@@ -37,7 +38,7 @@ class Tokenizer:
token = self._get_token(char) token = self._get_token(char)
logging.debug(f"found token \"{token}\" on line {self.line_number}") logging.debug(f"found token \"{token}\" on line {self.line_number}")
tokens.append(make_token(token, SourceLocation(self._filename, self.line_number), self.type_name_pattern)) tokens.append(make_token(token, SourceLocation(self._filename, self.line_number, self.column_number), self.type_name_pattern))
return tokens return tokens
@@ -78,8 +79,10 @@ class Tokenizer:
if char == '\n': if char == '\n':
self.line_number += 1 self.line_number += 1
self.column_number = 1
self.source_index += 1 self.source_index += 1
self.column_number += 1
return char return char
def _consume_multiline_comment(self): def _consume_multiline_comment(self):

View File

@@ -1,6 +1,5 @@
"""Private definitions for Token class used by the Lexer""" """Private definitions for Token class used by the Lexer"""
import logging
import re import re
from enum import IntEnum from enum import IntEnum
@@ -12,7 +11,7 @@ STRING_LITERAL_PATTERN = re.compile(r"""('|\")(.*)(\"|')""")
MATH_OP_PATTERN = re.compile(r"""\+|-|\*|/|<|>""") MATH_OP_PATTERN = re.compile(r"""\+|-|\*|/|<|>""")
class Token_type(IntEnum): class Token_type(IntEnum):
UNKNOWN=-1 #maybe this should be renamed to IDENTIFIERs UNKNOWN=-1 #maybe this should be renamed to IDENTIFIER
LEFT_PAREN=0, LEFT_PAREN=0,
RIGTH_PAREN=1, RIGTH_PAREN=1,
LEFT_CURLY=2, LEFT_CURLY=2,
@@ -34,15 +33,22 @@ class Token_type(IntEnum):
TYPE_NAME=18 TYPE_NAME=18
class SourceLocation: class SourceLocation:
def __init__(self, filename: str, line: int) -> None:
__slots__ = ["filename", "line", "column"]
def __init__(self, filename: str, line: int, column: int) -> None:
self.filename = filename self.filename = filename
self.line = line self.line = line
self.column = column
def __str__(self) -> str: def __str__(self) -> str:
return f"File {self.filename}, line {self.line}" return f"File {self.filename}, {self.line}:{self.column}"
class Token: class Token:
def __init__(self, type: Token_type, location: SourceLocation, content: Union[str, None]=None) -> None:
__slots__ = ["type", "content", "location"]
def __init__(self, type: Token_type, location: SourceLocation, content: str) -> None:
self.type = type self.type = type
self.content = content self.content = content
self.location = location self.location = location
@@ -53,6 +59,7 @@ class Token:
return f"{self.type}" return f"{self.type}"
def make_token(tag: str, location: SourceLocation, type_name_pattern:re.Pattern) -> Token: def make_token(tag: str, location: SourceLocation, type_name_pattern:re.Pattern) -> Token:
"""Construct a token object with the provided tag and source location"""
if tag == '(': if tag == '(':
return Token(Token_type.LEFT_PAREN, location, tag) return Token(Token_type.LEFT_PAREN, location, tag)
elif tag == ')': elif tag == ')':