updated interpreter
This commit is contained in:
@@ -5,7 +5,7 @@ import re
|
|||||||
from typing import List, overload
|
from typing import List, overload
|
||||||
|
|
||||||
from interpreter.function_scope import Function_scope
|
from interpreter.function_scope import Function_scope
|
||||||
from interpreter._token import Token, make_token
|
from interpreter._token import Token, make_token, Token_type
|
||||||
class Lexer:
|
class Lexer:
|
||||||
"""This class will lex the provided Java source and generate a list of Function_scopes"""
|
"""This class will lex the provided Java source and generate a list of Function_scopes"""
|
||||||
|
|
||||||
@@ -17,9 +17,12 @@ class Lexer:
|
|||||||
self.source_index = 0
|
self.source_index = 0
|
||||||
self.line_number = 1
|
self.line_number = 1
|
||||||
|
|
||||||
def lex(self) -> List[Token]:
|
self._tokens = []
|
||||||
|
self._token_index = 0
|
||||||
|
|
||||||
tokens = []
|
self.token_type_pattern = re.compile('(char)|(int)|(void)|(double)')
|
||||||
|
|
||||||
|
def _get_tokens(self):
|
||||||
|
|
||||||
while char := self._consume():
|
while char := self._consume():
|
||||||
|
|
||||||
@@ -30,10 +33,31 @@ class Lexer:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
token = self._get_token(char)
|
token = self._get_token(char)
|
||||||
logging.debug(f"found token \"{token}\" on line {self.line_number}")
|
#logging.debug(f"found token \"{token}\" on line {self.line_number}")
|
||||||
#tokens.append(make_token(token))
|
self._tokens.append(make_token(token, self.token_type_pattern))
|
||||||
|
|
||||||
|
def get_scopes(self) -> List[Function_scope]:
|
||||||
|
if not self._tokens:
|
||||||
|
self._get_tokens()
|
||||||
|
|
||||||
|
while token := self._consume_token():
|
||||||
|
if token.type == Token_type.UNKNOWN:
|
||||||
|
logging.debug(token)
|
||||||
|
elif token.type == Token_type.TYPE_NAME:
|
||||||
|
if self._peek_token().type != Token_type.UNKNOWN:
|
||||||
|
logging.error("Illegal identifier after Type name!")
|
||||||
|
raise Exception("Illegal identifier after Type name!")
|
||||||
|
elif self._peek_token(1).type == Token_type.LEFT_PAREN:
|
||||||
|
logging.debug(f"Function definition found: {token.content} {self._peek_token().content} ()")
|
||||||
|
self._consume_token()
|
||||||
|
self._consume_token()
|
||||||
|
args = ""
|
||||||
|
while function_token := self._consume_token():
|
||||||
|
if function_token.type == Token_type.RIGTH_PAREN:
|
||||||
|
break
|
||||||
|
print(function_token.type)
|
||||||
|
|
||||||
|
|
||||||
return tokens
|
|
||||||
|
|
||||||
def _get_token(self, char: str) -> str:
|
def _get_token(self, char: str) -> str:
|
||||||
token = char
|
token = char
|
||||||
@@ -76,6 +100,17 @@ class Lexer:
|
|||||||
self.source_index += 1
|
self.source_index += 1
|
||||||
return char
|
return char
|
||||||
|
|
||||||
|
def _peek_token(self, offset:int=0):
|
||||||
|
if (self._token_index+offset) >= len(self._tokens):
|
||||||
|
return None
|
||||||
|
return self._tokens[self._token_index+offset]
|
||||||
|
|
||||||
|
def _consume_token(self):
|
||||||
|
token = self._peek_token()
|
||||||
|
self._token_index+=1
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def _consume_until(self, end_token: str) -> str:...
|
def _consume_until(self, end_token: str) -> str:...
|
||||||
|
|
||||||
|
|||||||
@@ -1,12 +1,15 @@
|
|||||||
"""Private definitions for Token class used by the Lexer"""
|
"""Private definitions for Token class used by the Lexer"""
|
||||||
|
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from enum import IntEnum
|
from enum import IntEnum
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
NUMERIC_CONSTANT_PATTERN = re.compile("([0-9]+)|(true)|(false)")
|
NUMERIC_CONSTANT_PATTERN = re.compile("([0-9]+)|(true)|(false)")
|
||||||
KEYWORD_PATTERN = re.compile("(return)|(continue)|(break)")
|
KEYWORD_PATTERN = re.compile("(return)|(continue)|(break)|(new)")
|
||||||
|
STRING_LITERAL_PATTERN = re.compile("('|\")(.*)(\"|')")
|
||||||
|
MATH_OP_PATTERN = re.compile("\+|-|\*|/")
|
||||||
|
|
||||||
class Token_type(IntEnum):
|
class Token_type(IntEnum):
|
||||||
UNKNOWN=-1
|
UNKNOWN=-1
|
||||||
@@ -17,20 +20,30 @@ class Token_type(IntEnum):
|
|||||||
LEFT_BRACKET=4,
|
LEFT_BRACKET=4,
|
||||||
RIGHT_BRACKET=5,
|
RIGHT_BRACKET=5,
|
||||||
COMMA=6,
|
COMMA=6,
|
||||||
NUMERIC_CONSTANT=7,
|
EQUAL_SIGN=7,
|
||||||
IF_STATEMENT=8,
|
SEMICOLON=8
|
||||||
WHILE_STATEMENT=9,
|
MATH_OP=9
|
||||||
DO_WHILE_STATEMENT=10,
|
NUMERIC_CONSTANT=10,
|
||||||
FOR_STATEMENT=11,
|
IF_STATEMENT=11,
|
||||||
KEY_WORD=13,
|
ELSE_STATEMENT=12,
|
||||||
STRING_LITERAL=14
|
WHILE_STATEMENT=13,
|
||||||
|
DO_WHILE_STATEMENT=14,
|
||||||
|
FOR_STATEMENT=15,
|
||||||
|
KEY_WORD=16,
|
||||||
|
STRING_LITERAL=17
|
||||||
|
TYPE_NAME=18
|
||||||
|
|
||||||
class Token:
|
class Token:
|
||||||
def __init__(self, type: Token_type, content: Union[str, None]=None) -> None:
|
def __init__(self, type: Token_type, content: Union[str, None]=None) -> None:
|
||||||
self.type = type
|
self.type = type
|
||||||
self.content = content
|
self.content = content
|
||||||
|
|
||||||
def make_token(tag: str) -> Token:
|
def __str__(self) -> str:
|
||||||
|
if self.content:
|
||||||
|
return f"{self.type}: {self.content}"
|
||||||
|
return f"{self.typetype}"
|
||||||
|
|
||||||
|
def make_token(tag: str, type_name_pattern:re.Pattern) -> Token:
|
||||||
if tag == '(':
|
if tag == '(':
|
||||||
return Token(Token_type.LEFT_PAREN)
|
return Token(Token_type.LEFT_PAREN)
|
||||||
elif tag == ')':
|
elif tag == ')':
|
||||||
@@ -45,14 +58,30 @@ def make_token(tag: str) -> Token:
|
|||||||
return Token(Token_type.RIGHT_BRACKET)
|
return Token(Token_type.RIGHT_BRACKET)
|
||||||
elif tag == ',':
|
elif tag == ',':
|
||||||
return Token(Token_type.COMMA)
|
return Token(Token_type.COMMA)
|
||||||
|
elif tag == '=':
|
||||||
|
return Token(Token_type.EQUAL_SIGN)
|
||||||
|
elif tag == ';':
|
||||||
|
return Token(Token_type.SEMICOLON)
|
||||||
|
elif MATH_OP_PATTERN.match(tag):
|
||||||
|
return Token(Token_type.MATH_OP)
|
||||||
elif NUMERIC_CONSTANT_PATTERN.match(tag):
|
elif NUMERIC_CONSTANT_PATTERN.match(tag):
|
||||||
return Token(Token_type.NUMERIC_CONSTANT, tag)
|
return Token(Token_type.NUMERIC_CONSTANT, tag)
|
||||||
elif tag == 'if':
|
elif tag == "if":
|
||||||
return Token(Token_type.IF_STATEMENT)
|
return Token(Token_type.IF_STATEMENT)
|
||||||
elif tag == 'while':
|
elif tag == "else":
|
||||||
|
return Token(Token_type.ELSE_STATEMENT)
|
||||||
|
elif tag == "while":
|
||||||
return Token(Token_type.WHILE_STATEMENT)
|
return Token(Token_type.WHILE_STATEMENT)
|
||||||
elif tag == 'do':
|
elif tag == "do":
|
||||||
return Token(Token_type.DO_WHILE_STATEMENT)
|
return Token(Token_type.DO_WHILE_STATEMENT)
|
||||||
elif tag == 'for':
|
elif tag == "for":
|
||||||
return Token(Token_type.FOR_STATEMENT)
|
return Token(Token_type.FOR_STATEMENT)
|
||||||
#TODO: finish this
|
elif KEYWORD_PATTERN.match(tag):
|
||||||
|
return Token(Token_type.KEY_WORD, tag)
|
||||||
|
elif STRING_LITERAL_PATTERN.match(tag):
|
||||||
|
return Token(Token_type, tag[1:-1])
|
||||||
|
elif type_name_pattern.match(tag):
|
||||||
|
return Token(Token_type.TYPE_NAME, tag)
|
||||||
|
else:
|
||||||
|
logging.warn(f"unknown token {tag}")
|
||||||
|
return Token(Token_type.UNKNOWN, tag)
|
||||||
Reference in New Issue
Block a user