split interpretation into two steps: Tokenization and Lexing
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
__author__ = "Weckyy702"
|
||||
|
||||
|
||||
from typing import Iterable, List
|
||||
from typing import Iterable, List, Tuple
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from draw import code_to_image as cti
|
||||
|
||||
@@ -14,7 +14,7 @@ class Iinstruction(metaclass=ABCMeta):
|
||||
self.instruction_text = instruction_text
|
||||
|
||||
@abstractmethod
|
||||
def to_image(self, x:int, y:int, x_sz: int) -> Iterable[float]:
|
||||
def to_image(self, x:int, y:int, x_sz: int) -> Tuple[float]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@@ -25,6 +25,10 @@ class Iinstruction(metaclass=ABCMeta):
|
||||
def getblkwidth(self) -> float:
|
||||
pass
|
||||
|
||||
# @abstractmethod
|
||||
# def add_child_instruction(self, instruction):
|
||||
# pass
|
||||
|
||||
@abstractmethod
|
||||
def __str__(self) -> str:
|
||||
pass
|
||||
@@ -103,7 +107,7 @@ class if_instruction(Iinstruction):
|
||||
return max(self._getblkwidth(), self.get_truewidth() + self.get_falsewidth())
|
||||
|
||||
|
||||
def to_image(self, x:int, y:int, x_sz: int) -> Iterable[float]:
|
||||
def to_image(self, x:int, y:int, x_sz: int) -> Tuple[float]:
|
||||
true_w = self.get_truewidth()
|
||||
false_w = self.get_falsewidth()
|
||||
true_x, true_y, false_x, false_y = cti.draw_if_statement(
|
||||
@@ -176,7 +180,7 @@ class while_instruction_front(Iinstruction):
|
||||
def getblkwidth(self) -> float:
|
||||
return max(self._getblkwidth(), self.get_children_width())
|
||||
|
||||
def to_image(self, x:int, y:int, x_sz: int) -> Iterable[float]:
|
||||
def to_image(self, x:int, y:int, x_sz: int) -> Tuple[float]:
|
||||
children_x, children_y, children_sz_x = cti.draw_while_loop_front(self.instruction_text, x, y, x_sz, self.getblkheight())
|
||||
self.draw_children(children_x, children_y, children_sz_x)
|
||||
|
||||
|
||||
@@ -1,133 +1,355 @@
|
||||
"""Lexer.py: Definition for Lexer class"""
|
||||
|
||||
from os import linesep
|
||||
from draw.Iinstruction import *
|
||||
from typing import List, Optional, Union, Tuple
|
||||
import logging
|
||||
import re
|
||||
from typing import List, overload
|
||||
|
||||
from interpreter.function_scope import Function_scope
|
||||
from interpreter._token import Token, make_token, Token_type
|
||||
from interpreter._token import Token, Token_type
|
||||
from errors.custom import JavaSyntaxError
|
||||
class Lexer:
|
||||
"""This class will lex the provided Java source and generate a list of Function_scopes"""
|
||||
|
||||
TOKEN_MATCH = re.compile("\(|\)|\{|\}|;|(\n)|\+|-|\*|/|<|>|,| ")
|
||||
|
||||
def __init__(self, file_name: str) -> None:
|
||||
with open(file_name) as f:
|
||||
self.source_text = f.read()
|
||||
self.source_index = 0
|
||||
self.line_number = 1
|
||||
|
||||
self._tokens = []
|
||||
def __init__(self, tokens: List[Token]) -> None:
|
||||
self._tokens = tokens
|
||||
self._token_index = 0
|
||||
|
||||
self.token_type_pattern = re.compile('(char)|(int)|(void)|(double)')
|
||||
self._scopes: List[Function_scope] = []
|
||||
self._current_scope = None
|
||||
|
||||
def _get_tokens(self):
|
||||
self._current_scoped_instruction = None
|
||||
|
||||
while char := self._consume():
|
||||
#in case the tokenizer finds valid tokens in the global scope, they will be saved here
|
||||
self._global_instructions = []
|
||||
|
||||
if char.isspace():
|
||||
continue
|
||||
|
||||
|
||||
if self._handle_comments(char):
|
||||
continue
|
||||
|
||||
token = self._get_token(char)
|
||||
#logging.debug(f"found token \"{token}\" on line {self.line_number}")
|
||||
self._tokens.append(make_token(token, self.token_type_pattern))
|
||||
|
||||
def get_scopes(self) -> List[Function_scope]:
|
||||
if not self._tokens:
|
||||
self._get_tokens()
|
||||
|
||||
while token := self._consume_token():
|
||||
if token.type == Token_type.UNKNOWN:
|
||||
logging.debug(token)
|
||||
elif token.type == Token_type.TYPE_NAME:
|
||||
if self._peek_token().type != Token_type.UNKNOWN:
|
||||
logging.error("Illegal identifier after Type name!")
|
||||
raise Exception("Illegal identifier after Type name!")
|
||||
elif self._peek_token(1).type == Token_type.LEFT_PAREN:
|
||||
logging.debug(f"Function definition found: {token.content} {self._peek_token().content} ()")
|
||||
self._consume_token()
|
||||
self._consume_token()
|
||||
args = ""
|
||||
while function_token := self._consume_token():
|
||||
if function_token.type == Token_type.RIGTH_PAREN:
|
||||
break
|
||||
print(function_token.type)
|
||||
|
||||
|
||||
|
||||
def _get_token(self, char: str) -> str:
|
||||
token = char
|
||||
|
||||
if not re.match(Lexer.TOKEN_MATCH, token):
|
||||
|
||||
while (token_char := self._peek()):
|
||||
if re.match(Lexer.TOKEN_MATCH, token_char):
|
||||
break
|
||||
token += self._consume()
|
||||
|
||||
return token
|
||||
|
||||
def _handle_comments(self, char: str) -> bool:
|
||||
if char == '/' and self._peek() == '/':
|
||||
self._get_line() #skip the entire line
|
||||
return True
|
||||
elif char == '/' and self._peek() == '*':
|
||||
self._consume()
|
||||
self._consume_until('/') #skip until closing character. Will probably bug out at some point
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_line(self) -> str:
|
||||
return self._consume_until(re.compile("(\n)|;"))
|
||||
|
||||
def _peek(self, offset:int = 0) -> str:
|
||||
if (self.source_index + offset) >= len(self.source_text):
|
||||
return ''
|
||||
char = self.source_text[self.source_index]
|
||||
|
||||
return char
|
||||
|
||||
def _consume(self) -> str:
|
||||
char = self._peek()
|
||||
|
||||
if char == '\n':
|
||||
self.line_number += 1
|
||||
|
||||
self.source_index += 1
|
||||
return char
|
||||
|
||||
def _peek_token(self, offset:int=0):
|
||||
def _peek(self, offset:int=0) -> Optional[Token]:
|
||||
if (self._token_index+offset) >= len(self._tokens):
|
||||
return None
|
||||
return self._tokens[self._token_index+offset]
|
||||
|
||||
def _consume_token(self):
|
||||
token = self._peek_token()
|
||||
def _consume(self):
|
||||
token = self._peek()
|
||||
self._token_index+=1
|
||||
return token
|
||||
|
||||
|
||||
@overload
|
||||
def _consume_until(self, end_token: str) -> str:...
|
||||
def get_instructions(self):
|
||||
|
||||
@overload
|
||||
def _consume_until(self, end_pattern:re.Pattern) -> str:...
|
||||
scopes = []
|
||||
|
||||
def _consume_until(self, end_token) -> str:
|
||||
res = ""
|
||||
while self._peek():
|
||||
line_tokens = self.get_line_tokens()
|
||||
|
||||
if isinstance(end_token, str):
|
||||
while self._peek() and (char:= self._consume()) != end_token:
|
||||
res += char
|
||||
if self._is_function_def(line_tokens):
|
||||
|
||||
func_name, func_return_type, func_args = self._construct_function_header_from_tokens(line_tokens)
|
||||
instructions = self._get_instructions_in_scope()
|
||||
scopes.append(Function_scope(func_name, func_return_type, func_args))
|
||||
scopes.append(instructions)
|
||||
|
||||
else:
|
||||
line = ""
|
||||
for token in line_tokens[:-1]:
|
||||
line += token.content + ' '
|
||||
|
||||
self._construct_instruction_from_tokens(line_tokens)
|
||||
|
||||
scopes.append([line])
|
||||
|
||||
|
||||
return scopes
|
||||
|
||||
return res
|
||||
|
||||
elif isinstance(end_token, re.Pattern):
|
||||
while self._peek() and not end_token.match(char:= self._consume()):
|
||||
res += char
|
||||
def _get_instructions_in_scope(self):
|
||||
|
||||
lines = []
|
||||
|
||||
while self._peek():
|
||||
|
||||
line_tokens = self.get_line_tokens()
|
||||
|
||||
if len(line_tokens) > 1:
|
||||
line = ""
|
||||
for token in line_tokens[:-1]:
|
||||
line += token.content + ' '
|
||||
|
||||
self._construct_instruction_from_tokens(line_tokens)
|
||||
|
||||
lines.append(line)
|
||||
|
||||
delimiter_token = line_tokens[-1]
|
||||
|
||||
if delimiter_token.type == Token_type.RIGHT_CURLY:
|
||||
return lines
|
||||
if delimiter_token.type == Token_type.LEFT_CURLY:
|
||||
instructions = self._get_instructions_in_scope()
|
||||
for line in instructions:
|
||||
lines.append('\t'+line)
|
||||
|
||||
|
||||
|
||||
def get_line_tokens(self):
|
||||
tokens = []
|
||||
while token := self._consume():
|
||||
tokens.append(token)
|
||||
if token.type in [Token_type.SEMICOLON, Token_type.LEFT_CURLY, Token_type.RIGHT_CURLY]:
|
||||
break
|
||||
return tokens
|
||||
|
||||
|
||||
|
||||
def _is_function_def(self, tokens: List[Token]) -> bool:
|
||||
#if token list is of shape TYPE_NAME IDENTIFIER ( ... {
|
||||
return tokens[0].type == Token_type.TYPE_NAME and tokens[1].type == Token_type.UNKNOWN and tokens[2].type == Token_type.LEFT_PAREN and tokens[-1].type == Token_type.LEFT_CURLY
|
||||
|
||||
|
||||
def _construct_instruction_from_tokens(self, tokens: List[Token]):
|
||||
instruction_token = tokens[0]
|
||||
|
||||
if instruction_token.type == Token_type.IF_STATEMENT:
|
||||
logging.debug("Found if construct")
|
||||
for token in tokens:
|
||||
print('\t', token)
|
||||
|
||||
elif instruction_token.type == Token_type.WHILE_STATEMENT:
|
||||
logging.debug("Found while construct")
|
||||
for token in tokens:
|
||||
print('\t', token)
|
||||
|
||||
return res
|
||||
elif instruction_token.type == Token_type.DO_WHILE_STATEMENT:
|
||||
logging.debug("Found do-while construct")
|
||||
for token in tokens:
|
||||
print('\t', token)
|
||||
|
||||
elif instruction_token.type == Token_type.FOR_STATEMENT:
|
||||
#TODO: change that
|
||||
logging.debug("Found for construct")
|
||||
tokens.extend(self.get_line_tokens())
|
||||
tokens.extend(self.get_line_tokens())
|
||||
for token in tokens:
|
||||
print('\t', token)
|
||||
|
||||
elif instruction_token.type == Token_type.TYPE_NAME:
|
||||
logging.debug("Found Type name construct")
|
||||
for token in tokens:
|
||||
print('\t', token)
|
||||
|
||||
elif instruction_token.type == Token_type.UNKNOWN:
|
||||
logging.debug("Found generic instruction")
|
||||
for token in tokens:
|
||||
print('\t', token)
|
||||
|
||||
def _construct_function_header_from_tokens(self, tokens: List[Token]) -> Tuple[str, str, List[str]]:
|
||||
return "name", "return_type", ["int arg1", "String arg2"]
|
||||
|
||||
|
||||
# def get_scopes(self) -> List[Function_scope]:
|
||||
|
||||
# while token := self._consume_token():
|
||||
|
||||
# if token.type == Token_type.IF_STATEMENT:
|
||||
# self._handle_if_construct()
|
||||
|
||||
# elif token.type == Token_type.WHILE_STATEMENT:
|
||||
# self._handle_while_construct()
|
||||
|
||||
# elif token.type == Token_type.FOR_STATEMENT:
|
||||
# self._handle_for_construct()
|
||||
# elif token.type == Token_type.DO_WHILE_STATEMENT:
|
||||
# self._handle_do_while_construct()
|
||||
|
||||
# elif token.type == Token_type.TYPE_NAME:
|
||||
# self._handle_type_identifier(token)
|
||||
|
||||
# elif token.type == Token_type.UNKNOWN:
|
||||
# self._handle_unknown_token(token)
|
||||
|
||||
# self._handle_globals()
|
||||
# return self._scopes
|
||||
|
||||
# def _append_scoped_instructions_to_parent(self, parent_instruction: Iinstruction):
|
||||
# indent_depth = 1
|
||||
# past_instructions = []
|
||||
# current_parent_instruction = parent_instruction
|
||||
# while (token := self._consume_token()) and indent_depth > 0:
|
||||
|
||||
# current_instruction = self.get_instruction_from_token(token)
|
||||
|
||||
# if token.type == Token_type.RIGHT_CURLY:
|
||||
# current_parent_instruction = past_instructions.pop()
|
||||
# indent_depth-=1
|
||||
|
||||
# if token.type == Token_type.LEFT_CURLY:
|
||||
# past_instructions.append(current_instruction)
|
||||
# current_parent_instruction =
|
||||
|
||||
|
||||
# def _handle_if_construct(self):
|
||||
# self._check_construct("Illformed if construct!")
|
||||
|
||||
# logging.debug("found if construct")
|
||||
# if_tokens = self._get_argument_tokens()
|
||||
|
||||
# if_text = _construct_source_line_from_tokens(if_tokens)
|
||||
|
||||
# self.add_instruction_to_active_scope(if_instruction(if_text, [], []))
|
||||
|
||||
# def _handle_while_construct(self):
|
||||
# self._check_construct("Illformed while construct!")
|
||||
|
||||
# logging.debug("Found while construct")
|
||||
# while_tokens = self._get_argument_tokens()
|
||||
|
||||
# while_text = _construct_source_line_from_tokens(while_tokens)
|
||||
|
||||
# self.add_instruction_to_active_scope(while_instruction_front(while_text, []))
|
||||
|
||||
# def _handle_for_construct(self):
|
||||
# self._check_construct("Illformed for construct!")
|
||||
|
||||
# logging.debug("Found for construct")
|
||||
# for_tokens = self._get_argument_tokens()
|
||||
|
||||
# variable_inst, condition_str, increment_inst = _construct_for_arguments_from_tokens(for_tokens)
|
||||
|
||||
# self.add_instruction_to_active_scope(variable_inst)
|
||||
# self.add_instruction_to_active_scope(for_instruction(condition_str, []))
|
||||
|
||||
# def _handle_do_while_construct(self):
|
||||
# if self._consume_token().type != Token_type.LEFT_CURLY:
|
||||
# raise JavaSyntaxError("Illformed do-while construct!")
|
||||
|
||||
# logging.debug("Found do-while contruct")
|
||||
|
||||
# #These are the instructions in the loops scope
|
||||
# do_while_tokens = self._consume_tokens_until(Token_type.WHILE_STATEMENT) #this will break, but what is the best way to do this? Stack evaluation?
|
||||
|
||||
# while_argument_tokens = self._get_argument_tokens();
|
||||
# while_argument_string = _construct_source_line_from_tokens(while_argument_tokens)
|
||||
|
||||
# self.add_instruction_to_active_scope(while_instruction_back(while_argument_string, []))
|
||||
|
||||
# def _handle_type_identifier(self, token: Token):
|
||||
# if self._token_is_function_def():
|
||||
# logging.debug("Function definition found")
|
||||
# self._handle_new_function_def(token)
|
||||
|
||||
# elif self._token_is_var_dec():
|
||||
# logging.debug("Variable declaration found")
|
||||
# self.add_instruction_to_active_scope(self._make_var_dec(token))
|
||||
|
||||
# elif self._token_is_var_def():
|
||||
# logging.debug(f"Variable definition found")
|
||||
# self.add_instruction_to_active_scope(self._make_var_def(token))
|
||||
|
||||
# else:
|
||||
# raise JavaSyntaxError("Illegal token after type identifier!")
|
||||
|
||||
# def _handle_unknown_token(self, token: Token):
|
||||
# logging.debug("Found unknown Token. Most likely function call")
|
||||
# self.add_instruction_to_active_scope(self._make_generic_instruction(token))
|
||||
|
||||
|
||||
|
||||
# def _token_is_function_def(self) -> bool:
|
||||
# return self._peek_token().type == Token_type.UNKNOWN and self._peek_token(1).type == Token_type.LEFT_PAREN
|
||||
|
||||
# def _token_is_var_dec(self) -> bool:
|
||||
# return self._peek_token().type == Token_type.UNKNOWN and self._peek_token(1).type == Token_type.SEMICOLON
|
||||
|
||||
# def _token_is_var_def(self) -> bool:
|
||||
# return self._peek_token().type == Token_type.UNKNOWN and self._peek_token(1).type == Token_type.EQUAL_SIGN
|
||||
|
||||
|
||||
|
||||
# def _make_var_dec(self, token) -> generic_instruction:
|
||||
# var_type = token.content
|
||||
# var_name = self._consume_token().content
|
||||
# return _construct_generic_instruction_from_variable_def(var_type, var_name, "")
|
||||
|
||||
# def _make_var_def(self, token) -> generic_instruction:
|
||||
# var_type = token.content
|
||||
# var_name = self._consume_token().content
|
||||
# line_tokens = self._get_tokens_until_semicolon()
|
||||
|
||||
# var_value_str = _construct_source_line_from_tokens(line_tokens)
|
||||
|
||||
# return _construct_generic_instruction_from_variable_def(var_type, var_name, var_value_str)
|
||||
|
||||
# def _make_generic_instruction(self, token: Token) -> Iinstruction:
|
||||
# line_tokens = self._get_tokens_until_semicolon()
|
||||
# line_tokens.insert(0, token)
|
||||
# line_text = _construct_source_line_from_tokens(line_tokens)
|
||||
|
||||
# return generic_instruction(line_text)
|
||||
|
||||
# def _handle_new_function_def(self, token: Token):
|
||||
# function_return_type = token.content
|
||||
# function_name = self._consume_token().content
|
||||
# self._consume_token() #get rid of the left parenthesis
|
||||
|
||||
# argument_tokens = self._get_argument_tokens()
|
||||
|
||||
# arg_list = _construct_arg_list_from_tokens(argument_tokens)
|
||||
|
||||
# self._add_scope(function_name, function_return_type, arg_list)
|
||||
|
||||
|
||||
# def add_instruction_to_active_scope(self, instruction: Union[Iinstruction, List[Iinstruction]]):
|
||||
# if isinstance(instruction, List):
|
||||
# if self._current_scope:
|
||||
# self._current_scope.contents.extend(instruction)
|
||||
# else:
|
||||
# self._global_instructions.extend(instruction)
|
||||
# else:
|
||||
# if self._current_scope:
|
||||
# self._current_scope._add_instruction(instruction)
|
||||
# else:
|
||||
# self._global_instructions.append(instruction)
|
||||
|
||||
|
||||
|
||||
# def _add_scope(self, function_name: str, function_return_type: str, function_args: List[str]):
|
||||
# if self._current_scope:
|
||||
# self._scopes.append(self._current_scope) #do not append the global scope as it is still in use
|
||||
# self._current_scope = Function_scope(function_name, function_return_type, function_args) #add a new empty function scope to the list of scopes
|
||||
|
||||
|
||||
# def _handle_globals(self):
|
||||
# """Append all globally declared instructions, if any, to the list of all scopes"""
|
||||
# if len(self._global_instructions) > 0:
|
||||
# global_scope = Function_scope("<Global>", "", [])
|
||||
# global_scope._add_instructions(self._global_instructions)
|
||||
|
||||
|
||||
|
||||
# def _check_construct(self, msg:str):
|
||||
# if self._consume_token().type != Token_type.LEFT_PAREN:
|
||||
# raise JavaSyntaxError(msg)
|
||||
|
||||
|
||||
# def _get_tokens_until_semicolon(self) -> List[Token]:
|
||||
# return self._consume_tokens_until(Token_type.SEMICOLON)
|
||||
|
||||
# def _get_argument_tokens(self) -> List[Token]:
|
||||
# return self._consume_tokens_until(Token_type.RIGTH_PAREN)
|
||||
|
||||
# def _consume_tokens_until(self, end_type: Token_type) -> List[Token]:
|
||||
# tokens = []
|
||||
# while self._peek_token() and (token := self._consume_token()).type != end_type:
|
||||
# tokens.append(token)
|
||||
# return tokens
|
||||
|
||||
# def _construct_generic_instruction_from_variable_def(var_type:str, var_name: str, var_value: str) -> generic_instruction:
|
||||
# if var_value:
|
||||
# return generic_instruction(f"declare variable '{var_name}' of type '{var_type}' with value {var_value}")
|
||||
# return generic_instruction(f"declare variable '{var_name}' of type '{var_type}'")
|
||||
|
||||
# def _construct_source_line_from_tokens(tokens: List[Token]) -> str:
|
||||
# return "src" #TODO: implement
|
||||
|
||||
# def _construct_arg_list_from_tokens(token: List[Token]) -> List[str]:
|
||||
# return ["arg"] #TODO: implement
|
||||
|
||||
# def _construct_for_arguments_from_tokens(tokens: List[Token]) -> Tuple[str, str, str]:
|
||||
# return generic_instruction("var"), "con", "inc" #TODO: implement
|
||||
92
interpreter/Tokenizer.py
Normal file
92
interpreter/Tokenizer.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""Tokenizer.py: Definition for Tokenizer class"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import List, Optional
|
||||
|
||||
from interpreter._token import Token, make_token
|
||||
|
||||
class Tokenizer:
|
||||
"""This class will take the provided source file and convert it to a list of tokens"""
|
||||
|
||||
TOKEN_MATCH = re.compile(r"""\(|\)|\{|\}|;|(\n)|\+|-|\*|/|<|>|,| """) #TODO: make this modular
|
||||
|
||||
def __init__(self, file_name: str) -> None:
|
||||
with open(file_name) as f:
|
||||
self.source_text = f.read()
|
||||
self.source_index = 0
|
||||
self.line_number = 1
|
||||
|
||||
self.type_name_pattern = re.compile('(char)|(int)|(void)|(double)|(Pixel)') #TODO: make this modular
|
||||
|
||||
def get_tokens(self) -> List[Token]:
|
||||
|
||||
tokens = []
|
||||
|
||||
while char := self._consume():
|
||||
|
||||
if char.isspace():
|
||||
continue
|
||||
|
||||
if self._handle_comments(char):
|
||||
continue
|
||||
|
||||
token = self._get_token(char)
|
||||
logging.debug(f"found token \"{token}\" on line {self.line_number}")
|
||||
tokens.append(make_token(token, self.type_name_pattern))
|
||||
|
||||
return tokens
|
||||
|
||||
def _get_token(self, char: str) -> str:
|
||||
token = char
|
||||
|
||||
if not re.match(Tokenizer.TOKEN_MATCH, token):
|
||||
|
||||
while (token_char := self._peek()):
|
||||
if re.match(Tokenizer.TOKEN_MATCH, token_char):
|
||||
break
|
||||
token += self._consume()
|
||||
|
||||
return token
|
||||
|
||||
def _handle_comments(self, char: str) -> bool:
|
||||
if char == '/' and self._peek() == '/':
|
||||
self._get_line() #skip the entire line
|
||||
return True
|
||||
elif char == '/' and self._peek() == '*':
|
||||
self._consume()
|
||||
self._consume_multiline_comment()
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_line(self) -> str:
|
||||
return self._consume_until('\n')
|
||||
|
||||
def _peek(self, offset:int = 0) -> str:
|
||||
if (self.source_index + offset) >= len(self.source_text):
|
||||
return ''
|
||||
char = self.source_text[self.source_index]
|
||||
|
||||
return char
|
||||
|
||||
def _consume(self) -> str:
|
||||
char = self._peek()
|
||||
|
||||
if char == '\n':
|
||||
self.line_number += 1
|
||||
|
||||
self.source_index += 1
|
||||
return char
|
||||
|
||||
def _consume_multiline_comment(self):
|
||||
while self._peek():
|
||||
if self._consume() == '*' and self._peek() == '/':
|
||||
self._consume()
|
||||
break
|
||||
|
||||
def _consume_until(self, end_tag: str) -> str:
|
||||
res = ""
|
||||
while self._peek() and (char:= self._consume()) != end_tag:
|
||||
res += char
|
||||
|
||||
return res
|
||||
@@ -6,13 +6,13 @@ import re
|
||||
from enum import IntEnum
|
||||
from typing import Union
|
||||
|
||||
NUMERIC_CONSTANT_PATTERN = re.compile("([0-9]+)|(true)|(false)")
|
||||
KEYWORD_PATTERN = re.compile("(return)|(continue)|(break)|(new)")
|
||||
STRING_LITERAL_PATTERN = re.compile("('|\")(.*)(\"|')")
|
||||
MATH_OP_PATTERN = re.compile("\+|-|\*|/")
|
||||
NUMERIC_CONSTANT_PATTERN = re.compile(r"""([0-9]+)|(true)|(false)""")
|
||||
KEYWORD_PATTERN = re.compile(r"""(return)|(continue)|(break)|(new)""")
|
||||
STRING_LITERAL_PATTERN = re.compile(r"""('|\")(.*)(\"|')""")
|
||||
MATH_OP_PATTERN = re.compile(r"""\+|-|\*|/|<|>""")
|
||||
|
||||
class Token_type(IntEnum):
|
||||
UNKNOWN=-1
|
||||
UNKNOWN=-1 #maybe this should be renamed to IDENTIFIERs
|
||||
LEFT_PAREN=0,
|
||||
RIGTH_PAREN=1,
|
||||
LEFT_CURLY=2,
|
||||
@@ -40,48 +40,48 @@ class Token:
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.content:
|
||||
return f"{self.type}: {self.content}"
|
||||
return f"{self.typetype}"
|
||||
return f"{str(self.type)}: {self.content}"
|
||||
return f"{self.type}"
|
||||
|
||||
def make_token(tag: str, type_name_pattern:re.Pattern) -> Token:
|
||||
if tag == '(':
|
||||
return Token(Token_type.LEFT_PAREN)
|
||||
return Token(Token_type.LEFT_PAREN, tag)
|
||||
elif tag == ')':
|
||||
return Token(Token_type.RIGTH_PAREN)
|
||||
return Token(Token_type.RIGTH_PAREN, tag)
|
||||
elif tag == '{':
|
||||
return Token(Token_type.LEFT_CURLY)
|
||||
return Token(Token_type.LEFT_CURLY, tag)
|
||||
elif tag == '}':
|
||||
return Token(Token_type.RIGHT_CURLY)
|
||||
return Token(Token_type.RIGHT_CURLY, tag)
|
||||
elif tag == '[':
|
||||
return Token(Token_type.LEFT_BRACKET)
|
||||
return Token(Token_type.LEFT_BRACKET, tag)
|
||||
elif tag == ']':
|
||||
return Token(Token_type.RIGHT_BRACKET)
|
||||
return Token(Token_type.RIGHT_BRACKET, tag)
|
||||
elif tag == ',':
|
||||
return Token(Token_type.COMMA)
|
||||
return Token(Token_type.COMMA, tag)
|
||||
elif tag == '=':
|
||||
return Token(Token_type.EQUAL_SIGN)
|
||||
return Token(Token_type.EQUAL_SIGN, tag)
|
||||
elif tag == ';':
|
||||
return Token(Token_type.SEMICOLON)
|
||||
return Token(Token_type.SEMICOLON, tag)
|
||||
elif MATH_OP_PATTERN.match(tag):
|
||||
return Token(Token_type.MATH_OP)
|
||||
return Token(Token_type.MATH_OP, tag)
|
||||
elif NUMERIC_CONSTANT_PATTERN.match(tag):
|
||||
return Token(Token_type.NUMERIC_CONSTANT, tag)
|
||||
elif tag == "if":
|
||||
return Token(Token_type.IF_STATEMENT)
|
||||
return Token(Token_type.IF_STATEMENT, tag)
|
||||
elif tag == "else":
|
||||
return Token(Token_type.ELSE_STATEMENT)
|
||||
return Token(Token_type.ELSE_STATEMENT, tag)
|
||||
elif tag == "while":
|
||||
return Token(Token_type.WHILE_STATEMENT)
|
||||
return Token(Token_type.WHILE_STATEMENT, tag)
|
||||
elif tag == "do":
|
||||
return Token(Token_type.DO_WHILE_STATEMENT)
|
||||
return Token(Token_type.DO_WHILE_STATEMENT, tag)
|
||||
elif tag == "for":
|
||||
return Token(Token_type.FOR_STATEMENT)
|
||||
return Token(Token_type.FOR_STATEMENT, tag)
|
||||
elif KEYWORD_PATTERN.match(tag):
|
||||
return Token(Token_type.KEY_WORD, tag)
|
||||
elif STRING_LITERAL_PATTERN.match(tag):
|
||||
return Token(Token_type, tag[1:-1])
|
||||
return Token(Token_type.STRING_LITERAL, tag)
|
||||
elif type_name_pattern.match(tag):
|
||||
return Token(Token_type.TYPE_NAME, tag)
|
||||
else:
|
||||
logging.warn(f"unknown token {tag}")
|
||||
logging.info(f"found unknown token {tag}... Function or variable name?")
|
||||
return Token(Token_type.UNKNOWN, tag)
|
||||
@@ -8,8 +8,8 @@ from draw.Iinstruction import Iinstruction
|
||||
class Function_scope(Iterable):
|
||||
"""This class serves as a container for Instructions"""
|
||||
|
||||
def __init__(self, child_instructions: List[Iinstruction], name: str, return_type: str, args: List[str]) -> None:
|
||||
self.contents = child_instructions
|
||||
def __init__(self, name: str, return_type: str, args: List[str]) -> None:
|
||||
self.contents = []
|
||||
self.name = name
|
||||
self.return_type = return_type
|
||||
self.args = args
|
||||
@@ -26,5 +26,11 @@ class Function_scope(Iterable):
|
||||
w = max(w, inst.getblkwidth())
|
||||
return int(w)
|
||||
|
||||
def _add_instruction(self, inst: Iinstruction):
|
||||
self.contents.append(inst)
|
||||
|
||||
def _add_instructions(self, inst: List[Iinstruction]):
|
||||
self.contents.extend(inst)
|
||||
|
||||
def __iter__(self):
|
||||
return self.contents.__iter__()
|
||||
Reference in New Issue
Block a user