Added more location data to Token objects so lexing error messages can be more helpful
This commit is contained in:
@@ -16,6 +16,7 @@ class Tokenizer:
|
|||||||
self.source_text = f.read()
|
self.source_text = f.read()
|
||||||
self.source_index = 0
|
self.source_index = 0
|
||||||
self.line_number = 1
|
self.line_number = 1
|
||||||
|
self.column_number = 0
|
||||||
|
|
||||||
self.source_text = re.sub("(private)|(public)|(protected)", "", self.source_text)
|
self.source_text = re.sub("(private)|(public)|(protected)", "", self.source_text)
|
||||||
|
|
||||||
@@ -37,7 +38,7 @@ class Tokenizer:
|
|||||||
|
|
||||||
token = self._get_token(char)
|
token = self._get_token(char)
|
||||||
logging.debug(f"found token \"{token}\" on line {self.line_number}")
|
logging.debug(f"found token \"{token}\" on line {self.line_number}")
|
||||||
tokens.append(make_token(token, SourceLocation(self._filename, self.line_number), self.type_name_pattern))
|
tokens.append(make_token(token, SourceLocation(self._filename, self.line_number, self.column_number), self.type_name_pattern))
|
||||||
|
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
@@ -78,8 +79,10 @@ class Tokenizer:
|
|||||||
|
|
||||||
if char == '\n':
|
if char == '\n':
|
||||||
self.line_number += 1
|
self.line_number += 1
|
||||||
|
self.column_number = 1
|
||||||
|
|
||||||
self.source_index += 1
|
self.source_index += 1
|
||||||
|
self.column_number += 1
|
||||||
return char
|
return char
|
||||||
|
|
||||||
def _consume_multiline_comment(self):
|
def _consume_multiline_comment(self):
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
"""Private definitions for Token class used by the Lexer"""
|
"""Private definitions for Token class used by the Lexer"""
|
||||||
|
|
||||||
import logging
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from enum import IntEnum
|
from enum import IntEnum
|
||||||
@@ -12,7 +11,7 @@ STRING_LITERAL_PATTERN = re.compile(r"""('|\")(.*)(\"|')""")
|
|||||||
MATH_OP_PATTERN = re.compile(r"""\+|-|\*|/|<|>""")
|
MATH_OP_PATTERN = re.compile(r"""\+|-|\*|/|<|>""")
|
||||||
|
|
||||||
class Token_type(IntEnum):
|
class Token_type(IntEnum):
|
||||||
UNKNOWN=-1 #maybe this should be renamed to IDENTIFIERs
|
UNKNOWN=-1 #maybe this should be renamed to IDENTIFIER
|
||||||
LEFT_PAREN=0,
|
LEFT_PAREN=0,
|
||||||
RIGTH_PAREN=1,
|
RIGTH_PAREN=1,
|
||||||
LEFT_CURLY=2,
|
LEFT_CURLY=2,
|
||||||
@@ -34,15 +33,22 @@ class Token_type(IntEnum):
|
|||||||
TYPE_NAME=18
|
TYPE_NAME=18
|
||||||
|
|
||||||
class SourceLocation:
|
class SourceLocation:
|
||||||
def __init__(self, filename: str, line: int) -> None:
|
|
||||||
|
__slots__ = ["filename", "line", "column"]
|
||||||
|
|
||||||
|
def __init__(self, filename: str, line: int, column: int) -> None:
|
||||||
self.filename = filename
|
self.filename = filename
|
||||||
self.line = line
|
self.line = line
|
||||||
|
self.column = column
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return f"File {self.filename}, line {self.line}"
|
return f"File {self.filename}, {self.line}:{self.column}"
|
||||||
|
|
||||||
class Token:
|
class Token:
|
||||||
def __init__(self, type: Token_type, location: SourceLocation, content: Union[str, None]=None) -> None:
|
|
||||||
|
__slots__ = ["type", "content", "location"]
|
||||||
|
|
||||||
|
def __init__(self, type: Token_type, location: SourceLocation, content: str) -> None:
|
||||||
self.type = type
|
self.type = type
|
||||||
self.content = content
|
self.content = content
|
||||||
self.location = location
|
self.location = location
|
||||||
@@ -53,6 +59,7 @@ class Token:
|
|||||||
return f"{self.type}"
|
return f"{self.type}"
|
||||||
|
|
||||||
def make_token(tag: str, location: SourceLocation, type_name_pattern:re.Pattern) -> Token:
|
def make_token(tag: str, location: SourceLocation, type_name_pattern:re.Pattern) -> Token:
|
||||||
|
"""Construct a token object with the provided tag and source location"""
|
||||||
if tag == '(':
|
if tag == '(':
|
||||||
return Token(Token_type.LEFT_PAREN, location, tag)
|
return Token(Token_type.LEFT_PAREN, location, tag)
|
||||||
elif tag == ')':
|
elif tag == ')':
|
||||||
|
|||||||
Reference in New Issue
Block a user