updated interpreter

This commit is contained in:
weckyy702
2021-03-28 23:42:54 +02:00
parent 1a04f9914c
commit 53534ee707
2 changed files with 84 additions and 20 deletions

View File

@@ -5,7 +5,7 @@ import re
from typing import List, overload from typing import List, overload
from interpreter.function_scope import Function_scope from interpreter.function_scope import Function_scope
from interpreter._token import Token, make_token from interpreter._token import Token, make_token, Token_type
class Lexer: class Lexer:
"""This class will lex the provided Java source and generate a list of Function_scopes""" """This class will lex the provided Java source and generate a list of Function_scopes"""
@@ -17,9 +17,12 @@ class Lexer:
self.source_index = 0 self.source_index = 0
self.line_number = 1 self.line_number = 1
def lex(self) -> List[Token]: self._tokens = []
self._token_index = 0
tokens = [] self.token_type_pattern = re.compile('(char)|(int)|(void)|(double)')
def _get_tokens(self):
while char := self._consume(): while char := self._consume():
@@ -30,10 +33,31 @@ class Lexer:
continue continue
token = self._get_token(char) token = self._get_token(char)
logging.debug(f"found token \"{token}\" on line {self.line_number}") #logging.debug(f"found token \"{token}\" on line {self.line_number}")
#tokens.append(make_token(token)) self._tokens.append(make_token(token, self.token_type_pattern))
def get_scopes(self) -> List[Function_scope]:
if not self._tokens:
self._get_tokens()
while token := self._consume_token():
if token.type == Token_type.UNKNOWN:
logging.debug(token)
elif token.type == Token_type.TYPE_NAME:
if self._peek_token().type != Token_type.UNKNOWN:
logging.error("Illegal identifier after Type name!")
raise Exception("Illegal identifier after Type name!")
elif self._peek_token(1).type == Token_type.LEFT_PAREN:
logging.debug(f"Function definition found: {token.content} {self._peek_token().content} ()")
self._consume_token()
self._consume_token()
args = ""
while function_token := self._consume_token():
if function_token.type == Token_type.RIGTH_PAREN:
break
print(function_token.type)
return tokens
def _get_token(self, char: str) -> str: def _get_token(self, char: str) -> str:
token = char token = char
@@ -76,6 +100,17 @@ class Lexer:
self.source_index += 1 self.source_index += 1
return char return char
def _peek_token(self, offset:int=0):
if (self._token_index+offset) >= len(self._tokens):
return None
return self._tokens[self._token_index+offset]
def _consume_token(self):
token = self._peek_token()
self._token_index+=1
return token
@overload @overload
def _consume_until(self, end_token: str) -> str:... def _consume_until(self, end_token: str) -> str:...

View File

@@ -1,12 +1,15 @@
"""Private definitions for Token class used by the Lexer""" """Private definitions for Token class used by the Lexer"""
import logging
import re import re
from enum import IntEnum from enum import IntEnum
from typing import Union from typing import Union
NUMERIC_CONSTANT_PATTERN = re.compile("([0-9]+)|(true)|(false)") NUMERIC_CONSTANT_PATTERN = re.compile("([0-9]+)|(true)|(false)")
KEYWORD_PATTERN = re.compile("(return)|(continue)|(break)") KEYWORD_PATTERN = re.compile("(return)|(continue)|(break)|(new)")
STRING_LITERAL_PATTERN = re.compile("('|\")(.*)(\"|')")
MATH_OP_PATTERN = re.compile("\+|-|\*|/")
class Token_type(IntEnum): class Token_type(IntEnum):
UNKNOWN=-1 UNKNOWN=-1
@@ -17,20 +20,30 @@ class Token_type(IntEnum):
LEFT_BRACKET=4, LEFT_BRACKET=4,
RIGHT_BRACKET=5, RIGHT_BRACKET=5,
COMMA=6, COMMA=6,
NUMERIC_CONSTANT=7, EQUAL_SIGN=7,
IF_STATEMENT=8, SEMICOLON=8
WHILE_STATEMENT=9, MATH_OP=9
DO_WHILE_STATEMENT=10, NUMERIC_CONSTANT=10,
FOR_STATEMENT=11, IF_STATEMENT=11,
KEY_WORD=13, ELSE_STATEMENT=12,
STRING_LITERAL=14 WHILE_STATEMENT=13,
DO_WHILE_STATEMENT=14,
FOR_STATEMENT=15,
KEY_WORD=16,
STRING_LITERAL=17
TYPE_NAME=18
class Token: class Token:
def __init__(self, type: Token_type, content: Union[str, None]=None) -> None: def __init__(self, type: Token_type, content: Union[str, None]=None) -> None:
self.type = type self.type = type
self.content = content self.content = content
def make_token(tag: str) -> Token: def __str__(self) -> str:
if self.content:
return f"{self.type}: {self.content}"
return f"{self.typetype}"
def make_token(tag: str, type_name_pattern:re.Pattern) -> Token:
if tag == '(': if tag == '(':
return Token(Token_type.LEFT_PAREN) return Token(Token_type.LEFT_PAREN)
elif tag == ')': elif tag == ')':
@@ -45,14 +58,30 @@ def make_token(tag: str) -> Token:
return Token(Token_type.RIGHT_BRACKET) return Token(Token_type.RIGHT_BRACKET)
elif tag == ',': elif tag == ',':
return Token(Token_type.COMMA) return Token(Token_type.COMMA)
elif tag == '=':
return Token(Token_type.EQUAL_SIGN)
elif tag == ';':
return Token(Token_type.SEMICOLON)
elif MATH_OP_PATTERN.match(tag):
return Token(Token_type.MATH_OP)
elif NUMERIC_CONSTANT_PATTERN.match(tag): elif NUMERIC_CONSTANT_PATTERN.match(tag):
return Token(Token_type.NUMERIC_CONSTANT, tag) return Token(Token_type.NUMERIC_CONSTANT, tag)
elif tag == 'if': elif tag == "if":
return Token(Token_type.IF_STATEMENT) return Token(Token_type.IF_STATEMENT)
elif tag == 'while': elif tag == "else":
return Token(Token_type.ELSE_STATEMENT)
elif tag == "while":
return Token(Token_type.WHILE_STATEMENT) return Token(Token_type.WHILE_STATEMENT)
elif tag == 'do': elif tag == "do":
return Token(Token_type.DO_WHILE_STATEMENT) return Token(Token_type.DO_WHILE_STATEMENT)
elif tag == 'for': elif tag == "for":
return Token(Token_type.FOR_STATEMENT) return Token(Token_type.FOR_STATEMENT)
#TODO: finish this elif KEYWORD_PATTERN.match(tag):
return Token(Token_type.KEY_WORD, tag)
elif STRING_LITERAL_PATTERN.match(tag):
return Token(Token_type, tag[1:-1])
elif type_name_pattern.match(tag):
return Token(Token_type.TYPE_NAME, tag)
else:
logging.warn(f"unknown token {tag}")
return Token(Token_type.UNKNOWN, tag)