diff --git a/draw/Iinstruction.py b/draw/Iinstruction.py index d43aec0..79bcb54 100644 --- a/draw/Iinstruction.py +++ b/draw/Iinstruction.py @@ -3,7 +3,7 @@ __author__ = "Weckyy702" -from typing import Iterable, List +from typing import Iterable, List, Tuple from abc import ABCMeta, abstractmethod from draw import code_to_image as cti @@ -14,7 +14,7 @@ class Iinstruction(metaclass=ABCMeta): self.instruction_text = instruction_text @abstractmethod - def to_image(self, x:int, y:int, x_sz: int) -> Iterable[float]: + def to_image(self, x:int, y:int, x_sz: int) -> Tuple[float]: pass @abstractmethod @@ -25,6 +25,10 @@ class Iinstruction(metaclass=ABCMeta): def getblkwidth(self) -> float: pass + # @abstractmethod + # def add_child_instruction(self, instruction): + # pass + @abstractmethod def __str__(self) -> str: pass @@ -103,7 +107,7 @@ class if_instruction(Iinstruction): return max(self._getblkwidth(), self.get_truewidth() + self.get_falsewidth()) - def to_image(self, x:int, y:int, x_sz: int) -> Iterable[float]: + def to_image(self, x:int, y:int, x_sz: int) -> Tuple[float]: true_w = self.get_truewidth() false_w = self.get_falsewidth() true_x, true_y, false_x, false_y = cti.draw_if_statement( @@ -176,7 +180,7 @@ class while_instruction_front(Iinstruction): def getblkwidth(self) -> float: return max(self._getblkwidth(), self.get_children_width()) - def to_image(self, x:int, y:int, x_sz: int) -> Iterable[float]: + def to_image(self, x:int, y:int, x_sz: int) -> Tuple[float]: children_x, children_y, children_sz_x = cti.draw_while_loop_front(self.instruction_text, x, y, x_sz, self.getblkheight()) self.draw_children(children_x, children_y, children_sz_x) diff --git a/interpreter/Lexer.py b/interpreter/Lexer.py index fe0c6a2..e7da0ca 100644 --- a/interpreter/Lexer.py +++ b/interpreter/Lexer.py @@ -1,133 +1,355 @@ """Lexer.py: Definition for Lexer class""" - +from os import linesep +from draw.Iinstruction import * +from typing import List, Optional, Union, Tuple import logging -import re -from typing import List, overload from interpreter.function_scope import Function_scope -from interpreter._token import Token, make_token, Token_type +from interpreter._token import Token, Token_type +from errors.custom import JavaSyntaxError class Lexer: - """This class will lex the provided Java source and generate a list of Function_scopes""" - - TOKEN_MATCH = re.compile("\(|\)|\{|\}|;|(\n)|\+|-|\*|/|<|>|,| ") - - def __init__(self, file_name: str) -> None: - with open(file_name) as f: - self.source_text = f.read() - self.source_index = 0 - self.line_number = 1 - - self._tokens = [] + def __init__(self, tokens: List[Token]) -> None: + self._tokens = tokens self._token_index = 0 - self.token_type_pattern = re.compile('(char)|(int)|(void)|(double)') + self._scopes: List[Function_scope] = [] + self._current_scope = None - def _get_tokens(self): + self._current_scoped_instruction = None - while char := self._consume(): + #in case the tokenizer finds valid tokens in the global scope, they will be saved here + self._global_instructions = [] - if char.isspace(): - continue + - if self._handle_comments(char): - continue - - token = self._get_token(char) - #logging.debug(f"found token \"{token}\" on line {self.line_number}") - self._tokens.append(make_token(token, self.token_type_pattern)) - - def get_scopes(self) -> List[Function_scope]: - if not self._tokens: - self._get_tokens() - - while token := self._consume_token(): - if token.type == Token_type.UNKNOWN: - logging.debug(token) - elif token.type == Token_type.TYPE_NAME: - if self._peek_token().type != Token_type.UNKNOWN: - logging.error("Illegal identifier after Type name!") - raise Exception("Illegal identifier after Type name!") - elif self._peek_token(1).type == Token_type.LEFT_PAREN: - logging.debug(f"Function definition found: {token.content} {self._peek_token().content} ()") - self._consume_token() - self._consume_token() - args = "" - while function_token := self._consume_token(): - if function_token.type == Token_type.RIGTH_PAREN: - break - print(function_token.type) - - - - def _get_token(self, char: str) -> str: - token = char - - if not re.match(Lexer.TOKEN_MATCH, token): - - while (token_char := self._peek()): - if re.match(Lexer.TOKEN_MATCH, token_char): - break - token += self._consume() - - return token - - def _handle_comments(self, char: str) -> bool: - if char == '/' and self._peek() == '/': - self._get_line() #skip the entire line - return True - elif char == '/' and self._peek() == '*': - self._consume() - self._consume_until('/') #skip until closing character. Will probably bug out at some point - return True - return False - - def _get_line(self) -> str: - return self._consume_until(re.compile("(\n)|;")) - - def _peek(self, offset:int = 0) -> str: - if (self.source_index + offset) >= len(self.source_text): - return '' - char = self.source_text[self.source_index] - - return char - - def _consume(self) -> str: - char = self._peek() - - if char == '\n': - self.line_number += 1 - - self.source_index += 1 - return char - - def _peek_token(self, offset:int=0): + def _peek(self, offset:int=0) -> Optional[Token]: if (self._token_index+offset) >= len(self._tokens): return None return self._tokens[self._token_index+offset] - def _consume_token(self): - token = self._peek_token() + def _consume(self): + token = self._peek() self._token_index+=1 return token - @overload - def _consume_until(self, end_token: str) -> str:... + def get_instructions(self): - @overload - def _consume_until(self, end_pattern:re.Pattern) -> str:... + scopes = [] - def _consume_until(self, end_token) -> str: - res = "" + while self._peek(): + line_tokens = self.get_line_tokens() - if isinstance(end_token, str): - while self._peek() and (char:= self._consume()) != end_token: - res += char + if self._is_function_def(line_tokens): + + func_name, func_return_type, func_args = self._construct_function_header_from_tokens(line_tokens) + instructions = self._get_instructions_in_scope() + scopes.append(Function_scope(func_name, func_return_type, func_args)) + scopes.append(instructions) + + else: + line = "" + for token in line_tokens[:-1]: + line += token.content + ' ' + + self._construct_instruction_from_tokens(line_tokens) + + scopes.append([line]) + + + return scopes - return res - elif isinstance(end_token, re.Pattern): - while self._peek() and not end_token.match(char:= self._consume()): - res += char + def _get_instructions_in_scope(self): + + lines = [] + + while self._peek(): + + line_tokens = self.get_line_tokens() + + if len(line_tokens) > 1: + line = "" + for token in line_tokens[:-1]: + line += token.content + ' ' + + self._construct_instruction_from_tokens(line_tokens) + + lines.append(line) + + delimiter_token = line_tokens[-1] + + if delimiter_token.type == Token_type.RIGHT_CURLY: + return lines + if delimiter_token.type == Token_type.LEFT_CURLY: + instructions = self._get_instructions_in_scope() + for line in instructions: + lines.append('\t'+line) + + + + def get_line_tokens(self): + tokens = [] + while token := self._consume(): + tokens.append(token) + if token.type in [Token_type.SEMICOLON, Token_type.LEFT_CURLY, Token_type.RIGHT_CURLY]: + break + return tokens + + + + def _is_function_def(self, tokens: List[Token]) -> bool: + #if token list is of shape TYPE_NAME IDENTIFIER ( ... { + return tokens[0].type == Token_type.TYPE_NAME and tokens[1].type == Token_type.UNKNOWN and tokens[2].type == Token_type.LEFT_PAREN and tokens[-1].type == Token_type.LEFT_CURLY + + + def _construct_instruction_from_tokens(self, tokens: List[Token]): + instruction_token = tokens[0] + + if instruction_token.type == Token_type.IF_STATEMENT: + logging.debug("Found if construct") + for token in tokens: + print('\t', token) + + elif instruction_token.type == Token_type.WHILE_STATEMENT: + logging.debug("Found while construct") + for token in tokens: + print('\t', token) - return res \ No newline at end of file + elif instruction_token.type == Token_type.DO_WHILE_STATEMENT: + logging.debug("Found do-while construct") + for token in tokens: + print('\t', token) + + elif instruction_token.type == Token_type.FOR_STATEMENT: + #TODO: change that + logging.debug("Found for construct") + tokens.extend(self.get_line_tokens()) + tokens.extend(self.get_line_tokens()) + for token in tokens: + print('\t', token) + + elif instruction_token.type == Token_type.TYPE_NAME: + logging.debug("Found Type name construct") + for token in tokens: + print('\t', token) + + elif instruction_token.type == Token_type.UNKNOWN: + logging.debug("Found generic instruction") + for token in tokens: + print('\t', token) + + def _construct_function_header_from_tokens(self, tokens: List[Token]) -> Tuple[str, str, List[str]]: + return "name", "return_type", ["int arg1", "String arg2"] + + +# def get_scopes(self) -> List[Function_scope]: + +# while token := self._consume_token(): + +# if token.type == Token_type.IF_STATEMENT: +# self._handle_if_construct() + +# elif token.type == Token_type.WHILE_STATEMENT: +# self._handle_while_construct() + +# elif token.type == Token_type.FOR_STATEMENT: +# self._handle_for_construct() +# elif token.type == Token_type.DO_WHILE_STATEMENT: +# self._handle_do_while_construct() + +# elif token.type == Token_type.TYPE_NAME: +# self._handle_type_identifier(token) + +# elif token.type == Token_type.UNKNOWN: +# self._handle_unknown_token(token) + +# self._handle_globals() +# return self._scopes + +# def _append_scoped_instructions_to_parent(self, parent_instruction: Iinstruction): +# indent_depth = 1 +# past_instructions = [] +# current_parent_instruction = parent_instruction +# while (token := self._consume_token()) and indent_depth > 0: + +# current_instruction = self.get_instruction_from_token(token) + +# if token.type == Token_type.RIGHT_CURLY: +# current_parent_instruction = past_instructions.pop() +# indent_depth-=1 + +# if token.type == Token_type.LEFT_CURLY: +# past_instructions.append(current_instruction) +# current_parent_instruction = + + +# def _handle_if_construct(self): +# self._check_construct("Illformed if construct!") + +# logging.debug("found if construct") +# if_tokens = self._get_argument_tokens() + +# if_text = _construct_source_line_from_tokens(if_tokens) + +# self.add_instruction_to_active_scope(if_instruction(if_text, [], [])) + +# def _handle_while_construct(self): +# self._check_construct("Illformed while construct!") + +# logging.debug("Found while construct") +# while_tokens = self._get_argument_tokens() + +# while_text = _construct_source_line_from_tokens(while_tokens) + +# self.add_instruction_to_active_scope(while_instruction_front(while_text, [])) + +# def _handle_for_construct(self): +# self._check_construct("Illformed for construct!") + +# logging.debug("Found for construct") +# for_tokens = self._get_argument_tokens() + +# variable_inst, condition_str, increment_inst = _construct_for_arguments_from_tokens(for_tokens) + +# self.add_instruction_to_active_scope(variable_inst) +# self.add_instruction_to_active_scope(for_instruction(condition_str, [])) + +# def _handle_do_while_construct(self): +# if self._consume_token().type != Token_type.LEFT_CURLY: +# raise JavaSyntaxError("Illformed do-while construct!") + +# logging.debug("Found do-while contruct") + +# #These are the instructions in the loops scope +# do_while_tokens = self._consume_tokens_until(Token_type.WHILE_STATEMENT) #this will break, but what is the best way to do this? Stack evaluation? + +# while_argument_tokens = self._get_argument_tokens(); +# while_argument_string = _construct_source_line_from_tokens(while_argument_tokens) + +# self.add_instruction_to_active_scope(while_instruction_back(while_argument_string, [])) + +# def _handle_type_identifier(self, token: Token): +# if self._token_is_function_def(): +# logging.debug("Function definition found") +# self._handle_new_function_def(token) + +# elif self._token_is_var_dec(): +# logging.debug("Variable declaration found") +# self.add_instruction_to_active_scope(self._make_var_dec(token)) + +# elif self._token_is_var_def(): +# logging.debug(f"Variable definition found") +# self.add_instruction_to_active_scope(self._make_var_def(token)) + +# else: +# raise JavaSyntaxError("Illegal token after type identifier!") + +# def _handle_unknown_token(self, token: Token): +# logging.debug("Found unknown Token. Most likely function call") +# self.add_instruction_to_active_scope(self._make_generic_instruction(token)) + + + +# def _token_is_function_def(self) -> bool: +# return self._peek_token().type == Token_type.UNKNOWN and self._peek_token(1).type == Token_type.LEFT_PAREN + +# def _token_is_var_dec(self) -> bool: +# return self._peek_token().type == Token_type.UNKNOWN and self._peek_token(1).type == Token_type.SEMICOLON + +# def _token_is_var_def(self) -> bool: +# return self._peek_token().type == Token_type.UNKNOWN and self._peek_token(1).type == Token_type.EQUAL_SIGN + + + +# def _make_var_dec(self, token) -> generic_instruction: +# var_type = token.content +# var_name = self._consume_token().content +# return _construct_generic_instruction_from_variable_def(var_type, var_name, "") + +# def _make_var_def(self, token) -> generic_instruction: +# var_type = token.content +# var_name = self._consume_token().content +# line_tokens = self._get_tokens_until_semicolon() + +# var_value_str = _construct_source_line_from_tokens(line_tokens) + +# return _construct_generic_instruction_from_variable_def(var_type, var_name, var_value_str) + +# def _make_generic_instruction(self, token: Token) -> Iinstruction: +# line_tokens = self._get_tokens_until_semicolon() +# line_tokens.insert(0, token) +# line_text = _construct_source_line_from_tokens(line_tokens) + +# return generic_instruction(line_text) + +# def _handle_new_function_def(self, token: Token): +# function_return_type = token.content +# function_name = self._consume_token().content +# self._consume_token() #get rid of the left parenthesis + +# argument_tokens = self._get_argument_tokens() + +# arg_list = _construct_arg_list_from_tokens(argument_tokens) + +# self._add_scope(function_name, function_return_type, arg_list) + + +# def add_instruction_to_active_scope(self, instruction: Union[Iinstruction, List[Iinstruction]]): +# if isinstance(instruction, List): +# if self._current_scope: +# self._current_scope.contents.extend(instruction) +# else: +# self._global_instructions.extend(instruction) +# else: +# if self._current_scope: +# self._current_scope._add_instruction(instruction) +# else: +# self._global_instructions.append(instruction) + + + +# def _add_scope(self, function_name: str, function_return_type: str, function_args: List[str]): +# if self._current_scope: +# self._scopes.append(self._current_scope) #do not append the global scope as it is still in use +# self._current_scope = Function_scope(function_name, function_return_type, function_args) #add a new empty function scope to the list of scopes + + +# def _handle_globals(self): +# """Append all globally declared instructions, if any, to the list of all scopes""" +# if len(self._global_instructions) > 0: +# global_scope = Function_scope("", "", []) +# global_scope._add_instructions(self._global_instructions) + + + +# def _check_construct(self, msg:str): +# if self._consume_token().type != Token_type.LEFT_PAREN: +# raise JavaSyntaxError(msg) + + +# def _get_tokens_until_semicolon(self) -> List[Token]: +# return self._consume_tokens_until(Token_type.SEMICOLON) + +# def _get_argument_tokens(self) -> List[Token]: +# return self._consume_tokens_until(Token_type.RIGTH_PAREN) + +# def _consume_tokens_until(self, end_type: Token_type) -> List[Token]: +# tokens = [] +# while self._peek_token() and (token := self._consume_token()).type != end_type: +# tokens.append(token) +# return tokens + +# def _construct_generic_instruction_from_variable_def(var_type:str, var_name: str, var_value: str) -> generic_instruction: +# if var_value: +# return generic_instruction(f"declare variable '{var_name}' of type '{var_type}' with value {var_value}") +# return generic_instruction(f"declare variable '{var_name}' of type '{var_type}'") + +# def _construct_source_line_from_tokens(tokens: List[Token]) -> str: +# return "src" #TODO: implement + +# def _construct_arg_list_from_tokens(token: List[Token]) -> List[str]: +# return ["arg"] #TODO: implement + +# def _construct_for_arguments_from_tokens(tokens: List[Token]) -> Tuple[str, str, str]: +# return generic_instruction("var"), "con", "inc" #TODO: implement \ No newline at end of file diff --git a/interpreter/Tokenizer.py b/interpreter/Tokenizer.py new file mode 100644 index 0000000..f67f215 --- /dev/null +++ b/interpreter/Tokenizer.py @@ -0,0 +1,92 @@ +"""Tokenizer.py: Definition for Tokenizer class""" + +import logging +import re +from typing import List, Optional + +from interpreter._token import Token, make_token + +class Tokenizer: + """This class will take the provided source file and convert it to a list of tokens""" + + TOKEN_MATCH = re.compile(r"""\(|\)|\{|\}|;|(\n)|\+|-|\*|/|<|>|,| """) #TODO: make this modular + + def __init__(self, file_name: str) -> None: + with open(file_name) as f: + self.source_text = f.read() + self.source_index = 0 + self.line_number = 1 + + self.type_name_pattern = re.compile('(char)|(int)|(void)|(double)|(Pixel)') #TODO: make this modular + + def get_tokens(self) -> List[Token]: + + tokens = [] + + while char := self._consume(): + + if char.isspace(): + continue + + if self._handle_comments(char): + continue + + token = self._get_token(char) + logging.debug(f"found token \"{token}\" on line {self.line_number}") + tokens.append(make_token(token, self.type_name_pattern)) + + return tokens + + def _get_token(self, char: str) -> str: + token = char + + if not re.match(Tokenizer.TOKEN_MATCH, token): + + while (token_char := self._peek()): + if re.match(Tokenizer.TOKEN_MATCH, token_char): + break + token += self._consume() + + return token + + def _handle_comments(self, char: str) -> bool: + if char == '/' and self._peek() == '/': + self._get_line() #skip the entire line + return True + elif char == '/' and self._peek() == '*': + self._consume() + self._consume_multiline_comment() + return True + return False + + def _get_line(self) -> str: + return self._consume_until('\n') + + def _peek(self, offset:int = 0) -> str: + if (self.source_index + offset) >= len(self.source_text): + return '' + char = self.source_text[self.source_index] + + return char + + def _consume(self) -> str: + char = self._peek() + + if char == '\n': + self.line_number += 1 + + self.source_index += 1 + return char + + def _consume_multiline_comment(self): + while self._peek(): + if self._consume() == '*' and self._peek() == '/': + self._consume() + break + + def _consume_until(self, end_tag: str) -> str: + res = "" + while self._peek() and (char:= self._consume()) != end_tag: + res += char + + return res \ No newline at end of file diff --git a/interpreter/_token.py b/interpreter/_token.py index 36b6d5c..59653aa 100644 --- a/interpreter/_token.py +++ b/interpreter/_token.py @@ -6,13 +6,13 @@ import re from enum import IntEnum from typing import Union -NUMERIC_CONSTANT_PATTERN = re.compile("([0-9]+)|(true)|(false)") -KEYWORD_PATTERN = re.compile("(return)|(continue)|(break)|(new)") -STRING_LITERAL_PATTERN = re.compile("('|\")(.*)(\"|')") -MATH_OP_PATTERN = re.compile("\+|-|\*|/") +NUMERIC_CONSTANT_PATTERN = re.compile(r"""([0-9]+)|(true)|(false)""") +KEYWORD_PATTERN = re.compile(r"""(return)|(continue)|(break)|(new)""") +STRING_LITERAL_PATTERN = re.compile(r"""('|\")(.*)(\"|')""") +MATH_OP_PATTERN = re.compile(r"""\+|-|\*|/|<|>""") class Token_type(IntEnum): - UNKNOWN=-1 + UNKNOWN=-1 #maybe this should be renamed to IDENTIFIERs LEFT_PAREN=0, RIGTH_PAREN=1, LEFT_CURLY=2, @@ -40,48 +40,48 @@ class Token: def __str__(self) -> str: if self.content: - return f"{self.type}: {self.content}" - return f"{self.typetype}" + return f"{str(self.type)}: {self.content}" + return f"{self.type}" def make_token(tag: str, type_name_pattern:re.Pattern) -> Token: if tag == '(': - return Token(Token_type.LEFT_PAREN) + return Token(Token_type.LEFT_PAREN, tag) elif tag == ')': - return Token(Token_type.RIGTH_PAREN) + return Token(Token_type.RIGTH_PAREN, tag) elif tag == '{': - return Token(Token_type.LEFT_CURLY) + return Token(Token_type.LEFT_CURLY, tag) elif tag == '}': - return Token(Token_type.RIGHT_CURLY) + return Token(Token_type.RIGHT_CURLY, tag) elif tag == '[': - return Token(Token_type.LEFT_BRACKET) + return Token(Token_type.LEFT_BRACKET, tag) elif tag == ']': - return Token(Token_type.RIGHT_BRACKET) + return Token(Token_type.RIGHT_BRACKET, tag) elif tag == ',': - return Token(Token_type.COMMA) + return Token(Token_type.COMMA, tag) elif tag == '=': - return Token(Token_type.EQUAL_SIGN) + return Token(Token_type.EQUAL_SIGN, tag) elif tag == ';': - return Token(Token_type.SEMICOLON) + return Token(Token_type.SEMICOLON, tag) elif MATH_OP_PATTERN.match(tag): - return Token(Token_type.MATH_OP) + return Token(Token_type.MATH_OP, tag) elif NUMERIC_CONSTANT_PATTERN.match(tag): return Token(Token_type.NUMERIC_CONSTANT, tag) elif tag == "if": - return Token(Token_type.IF_STATEMENT) + return Token(Token_type.IF_STATEMENT, tag) elif tag == "else": - return Token(Token_type.ELSE_STATEMENT) + return Token(Token_type.ELSE_STATEMENT, tag) elif tag == "while": - return Token(Token_type.WHILE_STATEMENT) + return Token(Token_type.WHILE_STATEMENT, tag) elif tag == "do": - return Token(Token_type.DO_WHILE_STATEMENT) + return Token(Token_type.DO_WHILE_STATEMENT, tag) elif tag == "for": - return Token(Token_type.FOR_STATEMENT) + return Token(Token_type.FOR_STATEMENT, tag) elif KEYWORD_PATTERN.match(tag): return Token(Token_type.KEY_WORD, tag) elif STRING_LITERAL_PATTERN.match(tag): - return Token(Token_type, tag[1:-1]) + return Token(Token_type.STRING_LITERAL, tag) elif type_name_pattern.match(tag): return Token(Token_type.TYPE_NAME, tag) else: - logging.warn(f"unknown token {tag}") + logging.info(f"found unknown token {tag}... Function or variable name?") return Token(Token_type.UNKNOWN, tag) \ No newline at end of file diff --git a/interpreter/function_scope.py b/interpreter/function_scope.py index 02acd12..ac8e1e7 100644 --- a/interpreter/function_scope.py +++ b/interpreter/function_scope.py @@ -8,8 +8,8 @@ from draw.Iinstruction import Iinstruction class Function_scope(Iterable): """This class serves as a container for Instructions""" - def __init__(self, child_instructions: List[Iinstruction], name: str, return_type: str, args: List[str]) -> None: - self.contents = child_instructions + def __init__(self, name: str, return_type: str, args: List[str]) -> None: + self.contents = [] self.name = name self.return_type = return_type self.args = args @@ -26,5 +26,11 @@ class Function_scope(Iterable): w = max(w, inst.getblkwidth()) return int(w) + def _add_instruction(self, inst: Iinstruction): + self.contents.append(inst) + + def _add_instructions(self, inst: List[Iinstruction]): + self.contents.extend(inst) + def __iter__(self): return self.contents.__iter__() \ No newline at end of file