Added instruction generation code to Lexer

2021-04-01 17:00:19 +02:00
parent 49f3c84e60
commit 3bc93fa3a7
4 changed files with 148 additions and 74 deletions
--- a/draw/Iinstruction.py
+++ b/draw/Iinstruction.py
@@ -133,7 +133,7 @@ class if_instruction(Iinstruction):
            res += '\t'+str(inst)+";\n"
        res += "}"
        if self.false_case:
-            res += " else {"
+            res += " else {\n"
            for inst in self.true_case:
                res += '\t'+str(inst)+";\n"
            res += "}"
--- a/interpreter/Lexer.py
+++ b/interpreter/Lexer.py
@@ -1,4 +1,5 @@
 """Lexer.py: Definition for Lexer class"""
+from interpreter.Tokenizer import Tokenizer
 from os import linesep
 from draw.Iinstruction import *
 from typing import List, Optional, Union, Tuple
@@ -43,48 +44,37 @@ class Lexer:
            if self._is_function_def(line_tokens):

                func_name, func_return_type, func_args = self._construct_function_header_from_tokens(line_tokens)
+                fs = Function_scope(func_name, func_return_type, func_args)
+
                instructions = self._get_instructions_in_scope()
-                scopes.append(Function_scope(func_name, func_return_type, func_args))
-                scopes.append(instructions)
+                fs._add_instructions(instructions)
+
+                scopes.append(fs)

            else:
-                line = ""
-                for token in line_tokens[:-1]:
-                    line += token.content + ' '
-
-                self._construct_instruction_from_tokens(line_tokens)
-
-                scopes.append([line])
-
+                #something was declared in global scope
+                self._global_instructions.append(self._construct_instruction_from_tokens(line_tokens))

+        self.add_globals_to_scope_list(scopes)
        return scopes
            

    def _get_instructions_in_scope(self):

-        lines = []
+        instructions = []

        while self._peek():

            line_tokens = self.get_line_tokens()
-
-            if len(line_tokens) > 1:
-                line = ""
-                for token in line_tokens[:-1]:
-                    line += token.content + ' '
-
-                self._construct_instruction_from_tokens(line_tokens)
-
-                lines.append(line)
+            instruction = self._construct_instruction_from_tokens(line_tokens)
+            if instruction:
+                instructions.append(instruction)

            delimiter_token = line_tokens[-1]
-
            if delimiter_token.type == Token_type.RIGHT_CURLY:
-                return lines
-            if delimiter_token.type == Token_type.LEFT_CURLY:
-                instructions = self._get_instructions_in_scope()
-                for line in instructions:
-                    lines.append('\t'+line)
+                return instructions
+
+        raise JavaSyntaxError(f"Missing right curly!")



@@ -96,6 +86,13 @@ class Lexer:
                break
        return tokens

+    
+
+    def add_globals_to_scope_list(self, scope_list: List[Function_scope]):
+        global_scope = Function_scope("<Global scope>", "void", [])
+        global_scope._add_instructions(self._global_instructions)
+
+        scope_list.append(global_scope)


    def _is_function_def(self, tokens: List[Token]) -> bool:
@@ -103,45 +100,110 @@ class Lexer:
        return tokens[0].type == Token_type.TYPE_NAME and tokens[1].type == Token_type.UNKNOWN and tokens[2].type == Token_type.LEFT_PAREN and tokens[-1].type == Token_type.LEFT_CURLY


+
    def _construct_instruction_from_tokens(self, tokens: List[Token]):
        instruction_token = tokens[0]

        if instruction_token.type == Token_type.IF_STATEMENT:
-            logging.debug("Found if construct")
-            for token in tokens:
-                print('\t', token)
+            return self._handle_if_construct(tokens)
        
        elif instruction_token.type == Token_type.WHILE_STATEMENT:
-            logging.debug("Found while construct")
-            for token in tokens:
-                print('\t', token)
+            return self._handle_while_construct(tokens)
            
        elif instruction_token.type == Token_type.DO_WHILE_STATEMENT:
-            logging.debug("Found do-while construct")
-            for token in tokens:
-                print('\t', token)
+            return self._handle_do_while_construct(tokens)
        
        elif instruction_token.type == Token_type.FOR_STATEMENT:
-            #TODO: change that
-            logging.debug("Found for construct")
-            tokens.extend(self.get_line_tokens())
-            tokens.extend(self.get_line_tokens())
-            for token in tokens:
-                print('\t', token)
+            return self._handle_for_construct(tokens)
        
        elif instruction_token.type == Token_type.TYPE_NAME:
-            logging.debug("Found Type name construct")
-            for token in tokens:
-                print('\t', token)
+            return self._handle_type_name_construct(tokens)

        elif instruction_token.type == Token_type.UNKNOWN:
-            logging.debug("Found generic instruction")
-            for token in tokens:
-                print('\t', token)
+            return self._handle_generic_construct(tokens)

    def _construct_function_header_from_tokens(self, tokens: List[Token]) -> Tuple[str, str, List[str]]:
        return "name", "return_type", ["int arg1", "String arg2"]

+    def _construct_variable_def_from_tokens(self, tokens: List[Token]) -> str:
+        #token_list: TYPE_NAME IDENTIFIER ;|( = EXPRESSION)
+        _ensure_correct_variable_structure(tokens)
+        if var_value:
+            return f"decalare variable '{'name'}' of type {'type'} type with value {'value'}"
+        return f"declare variable '{'name'}' of type {'type'}"
+
+    """Handler functions for different types of language structures"""
+
+    def _handle_if_construct(self, tokens: List[Token]):
+        logging.debug("Found if construct")
+
+        true_case = self._get_instructions_in_scope()
+        false_case = self._handle_else_construct()
+
+        return if_instruction("if_instruction", true_case, false_case)
+
+    def _handle_else_construct(self):
+        if self._peek().type == Token_type.ELSE_STATEMENT:
+            if self._peek(1).type == Token_type.IF_STATEMENT:
+                logging.debug("Found if-else construct")
+                else_if_tokens = self.get_line_tokens()[1:]
+                return [self._handle_if_construct(else_if_tokens)]
+            else:
+                logging.debug("Found else construct")
+                self.get_line_tokens()
+                return self._get_instructions_in_scope()
+        return None
+
+    def _handle_while_construct(self, tokens: List[Token]):
+        logging.debug("Found while construct")
+        
+        loop_instructions = self._get_instructions_in_scope()
+
+        return while_instruction_front("while_instruction", loop_instructions)
+
+    def _handle_do_while_construct(self, tokens: List[Token]):
+        logging.debug("Found do-while construct")
+
+        loop_instructions = self._get_instructions_in_scope()
+
+        self.get_line_tokens()
+
+        return while_instruction_back("while_instruction_back", loop_instructions)
+
+
+
+    def _handle_for_construct(self, tokens: List[Token]):
+        #TODO: change that
+        logging.debug("Found for construct")
+        tokens.extend(self.get_line_tokens())
+        tokens.extend(self.get_line_tokens())
+
+        loop_instructions = self._get_instructions_in_scope()
+
+        loop_instructions.append(generic_instruction("increment"))
+
+        return for_instruction("for_instruction", loop_instructions)
+
+    def _handle_type_name_construct(self, tokens: List[Token]):
+        logging.debug("Found Type name construct")
+        _ensure_correct_variable_structure(tokens)
+        return generic_instruction("type_name_construct")
+
+    def _handle_generic_construct(self, tokens: List[Token]):
+        logging.debug("Found generic instruction")
+
+        return generic_instruction("generic_instruction")
+
+def _ensure_correct_variable_structure(tokens: List[Token]):
+    #variable structure: TYPE_NAME IDENTIFIER ;|( = EXPRESSION)
+    if len(tokens) < 3:
+        raise JavaSyntaxError(f"{tokens[0].location}: Ill-formed type construct! Expected at least 3 tokens, got {len(tokens)}")
+    if tokens[1].type != Token_type.UNKNOWN:
+        raise JavaSyntaxError(f"{tokens[1].location}: Illegal token after type name! Expected UNKNOWN, got {str(tokens[1].type)}")
+    if not tokens[2].type in [Token_type.SEMICOLON, Token_type.EQUAL_SIGN]:
+        raise JavaSyntaxError(f"{tokens[2].location}: Illegal token after variable name! Expected SEMICOLON or EQUAL_SIGN, got {str(tokens[2].type)}")
+    if tokens[2].type == Token_type.EQUAL_SIGN and len(tokens) < 5:
+        raise JavaSyntaxError(f"{tokens[2].location}: Ill-formed assignment expression! Expected at least 5 tokens, got {len(tokens)}")

 #     def get_scopes(self) -> List[Function_scope]:

--- a/interpreter/Tokenizer.py
+++ b/interpreter/Tokenizer.py
@@ -4,12 +4,12 @@ import logging
 import re
 from typing import List, Optional

-from interpreter._token import Token, make_token
+from interpreter._token import Token, make_token, SourceLocation

 class Tokenizer:
    """This class will take the provided source file and convert it to a list of tokens"""

-    TOKEN_MATCH = re.compile(r"""\(|\)|\{|\}|;|(\n)|\+|-|\*|/|<|>|,| """) #TODO: make this modular
+    TOKEN_MATCH = re.compile(r"""\(|\)|\{|\}|;|(\n)|\+|-|\*|/|<|>|,| """)

    def __init__(self, file_name: str) -> None:
        with open(file_name) as f:
@@ -17,8 +17,12 @@ class Tokenizer:
        self.source_index = 0
        self.line_number = 1

+        self.source_text = re.sub("(private)|(public)|(protected)", "", self.source_text)
+
        self.type_name_pattern = re.compile('(char)|(int)|(void)|(double)|(Pixel)') #TODO: make this modular

+        self._filename = file_name
+
    def get_tokens(self) -> List[Token]:

        tokens = []
@@ -33,7 +37,7 @@ class Tokenizer:

            token = self._get_token(char)
            logging.debug(f"found token \"{token}\" on line {self.line_number}")
-            tokens.append(make_token(token, self.type_name_pattern))
+            tokens.append(make_token(token, SourceLocation(self._filename, self.line_number), self.type_name_pattern))

        return tokens

--- a/interpreter/_token.py
+++ b/interpreter/_token.py
@@ -33,55 +33,63 @@ class Token_type(IntEnum):
    STRING_LITERAL=17
    TYPE_NAME=18

+class SourceLocation:
+    def __init__(self, filename: str, line: int) -> None:
+        self.filename = filename
+        self.line = line
+
+    def __str__(self) -> str:
+        return f"File {self.filename}, line {self.line}"
+
 class Token:
-    def __init__(self, type: Token_type, content: Union[str, None]=None) -> None:
+    def __init__(self, type: Token_type, location: SourceLocation, content: Union[str, None]=None) -> None:
        self.type = type
        self.content = content
+        self.location = location

    def __str__(self) -> str:
        if self.content:
            return f"{str(self.type)}: {self.content}"
        return f"{self.type}"

-def make_token(tag: str, type_name_pattern:re.Pattern) -> Token:
+def make_token(tag: str, location: SourceLocation, type_name_pattern:re.Pattern) -> Token:
    if tag == '(':
-        return Token(Token_type.LEFT_PAREN, tag)
+        return Token(Token_type.LEFT_PAREN, location, tag)
    elif tag == ')':
-        return Token(Token_type.RIGTH_PAREN, tag)
+        return Token(Token_type.RIGTH_PAREN, location, tag)
    elif tag == '{':
-        return Token(Token_type.LEFT_CURLY, tag)
+        return Token(Token_type.LEFT_CURLY, location, tag)
    elif tag == '}':
-        return Token(Token_type.RIGHT_CURLY, tag)
+        return Token(Token_type.RIGHT_CURLY, location, tag)
    elif tag == '[':
-        return Token(Token_type.LEFT_BRACKET, tag)
+        return Token(Token_type.LEFT_BRACKET, location, tag)
    elif tag == ']':
-        return Token(Token_type.RIGHT_BRACKET, tag)
+        return Token(Token_type.RIGHT_BRACKET, location, tag)
    elif tag == ',':
-        return Token(Token_type.COMMA, tag)
+        return Token(Token_type.COMMA, location, tag)
    elif tag == '=':
-        return Token(Token_type.EQUAL_SIGN, tag)
+        return Token(Token_type.EQUAL_SIGN, location, tag)
    elif tag == ';':
-        return Token(Token_type.SEMICOLON, tag)
+        return Token(Token_type.SEMICOLON, location, tag)
    elif MATH_OP_PATTERN.match(tag):
-        return Token(Token_type.MATH_OP, tag)
+        return Token(Token_type.MATH_OP, location, tag)
    elif NUMERIC_CONSTANT_PATTERN.match(tag):
-        return Token(Token_type.NUMERIC_CONSTANT, tag)
+        return Token(Token_type.NUMERIC_CONSTANT, location, tag)
    elif tag == "if":
-        return Token(Token_type.IF_STATEMENT, tag)
+        return Token(Token_type.IF_STATEMENT, location, tag)
    elif tag == "else":
-        return Token(Token_type.ELSE_STATEMENT, tag)
+        return Token(Token_type.ELSE_STATEMENT, location, tag)
    elif tag == "while":
-        return Token(Token_type.WHILE_STATEMENT, tag)
+        return Token(Token_type.WHILE_STATEMENT, location, tag)
    elif tag == "do":
-        return Token(Token_type.DO_WHILE_STATEMENT, tag)
+        return Token(Token_type.DO_WHILE_STATEMENT, location, tag)
    elif tag == "for":
-        return Token(Token_type.FOR_STATEMENT, tag)
+        return Token(Token_type.FOR_STATEMENT, location, tag)
    elif KEYWORD_PATTERN.match(tag):
-        return Token(Token_type.KEY_WORD, tag)
+        return Token(Token_type.KEY_WORD, location, tag)
    elif STRING_LITERAL_PATTERN.match(tag):
-        return Token(Token_type.STRING_LITERAL, tag)
+        return Token(Token_type.STRING_LITERAL, location, tag)
    elif type_name_pattern.match(tag):
-        return Token(Token_type.TYPE_NAME, tag)
+        return Token(Token_type.TYPE_NAME, location, tag)
    else:
-        logging.info(f"found unknown token {tag}... Function or variable name?")
-        return Token(Token_type.UNKNOWN, tag)
+        return Token(Token_type.UNKNOWN, location, tag)