From 3bc93fa3a74363cbd8a7dce026b889fc179d0087 Mon Sep 17 00:00:00 2001
From: weckyy702 <weckyy702@gmail.com>
Date: Thu, 1 Apr 2021 17:00:19 +0200
Subject: [PATCH] Added instruction generation code to Lexer

---
 draw/Iinstruction.py     |   2 +-
 interpreter/Lexer.py     | 156 +++++++++++++++++++++++++++------------
 interpreter/Tokenizer.py |  10 ++-
 interpreter/_token.py    |  54 ++++++++------
 4 files changed, 148 insertions(+), 74 deletions(-)
diff --git a/draw/Iinstruction.py b/draw/Iinstruction.py
index 79bcb54..0d2fd79 100644
--- a/draw/Iinstruction.py
+++ b/draw/Iinstruction.py
@@ -133,7 +133,7 @@ class if_instruction(Iinstruction):
             res += '\t'+str(inst)+";\n"
         res += "}"
         if self.false_case:
-            res += " else {"
+            res += " else {\n"
             for inst in self.true_case:
                 res += '\t'+str(inst)+";\n"
             res += "}"
diff --git a/interpreter/Lexer.py b/interpreter/Lexer.py
index e7da0ca..3ca5ce5 100644
--- a/interpreter/Lexer.py
+++ b/interpreter/Lexer.py
@@ -1,4 +1,5 @@
 """Lexer.py: Definition for Lexer class"""
+from interpreter.Tokenizer import Tokenizer
 from os import linesep
 from draw.Iinstruction import *
 from typing import List, Optional, Union, Tuple
@@ -43,48 +44,37 @@ class Lexer:
             if self._is_function_def(line_tokens):
 
                 func_name, func_return_type, func_args = self._construct_function_header_from_tokens(line_tokens)
+                fs = Function_scope(func_name, func_return_type, func_args)
+
                 instructions = self._get_instructions_in_scope()
-                scopes.append(Function_scope(func_name, func_return_type, func_args))
-                scopes.append(instructions)
+                fs._add_instructions(instructions)
+
+                scopes.append(fs)
 
             else:
-                line = ""
-                for token in line_tokens[:-1]:
-                    line += token.content + ' '
-
-                self._construct_instruction_from_tokens(line_tokens)
-
-                scopes.append([line])
-
+                #something was declared in global scope
+                self._global_instructions.append(self._construct_instruction_from_tokens(line_tokens))
 
+        self.add_globals_to_scope_list(scopes)
         return scopes
             
 
     def _get_instructions_in_scope(self):
 
-        lines = []
+        instructions = []
 
         while self._peek():
 
             line_tokens = self.get_line_tokens()
-
-            if len(line_tokens) > 1:
-                line = ""
-                for token in line_tokens[:-1]:
-                    line += token.content + ' '
-
-                self._construct_instruction_from_tokens(line_tokens)
-
-                lines.append(line)
+            instruction = self._construct_instruction_from_tokens(line_tokens)
+            if instruction:
+                instructions.append(instruction)
 
             delimiter_token = line_tokens[-1]
-
             if delimiter_token.type == Token_type.RIGHT_CURLY:
-                return lines
-            if delimiter_token.type == Token_type.LEFT_CURLY:
-                instructions = self._get_instructions_in_scope()
-                for line in instructions:
-                    lines.append('\t'+line)
+                return instructions
+
+        raise JavaSyntaxError(f"Missing right curly!")
 
 
 
@@ -96,6 +86,13 @@ class Lexer:
                 break
         return tokens
 
+    
+
+    def add_globals_to_scope_list(self, scope_list: List[Function_scope]):
+        global_scope = Function_scope("<Global scope>", "void", [])
+        global_scope._add_instructions(self._global_instructions)
+
+        scope_list.append(global_scope)
 
 
     def _is_function_def(self, tokens: List[Token]) -> bool:
@@ -103,45 +100,110 @@ class Lexer:
         return tokens[0].type == Token_type.TYPE_NAME and tokens[1].type == Token_type.UNKNOWN and tokens[2].type == Token_type.LEFT_PAREN and tokens[-1].type == Token_type.LEFT_CURLY
 
 
+
     def _construct_instruction_from_tokens(self, tokens: List[Token]):
         instruction_token = tokens[0]
 
         if instruction_token.type == Token_type.IF_STATEMENT:
-            logging.debug("Found if construct")
-            for token in tokens:
-                print('\t', token)
+            return self._handle_if_construct(tokens)
         
         elif instruction_token.type == Token_type.WHILE_STATEMENT:
-            logging.debug("Found while construct")
-            for token in tokens:
-                print('\t', token)
+            return self._handle_while_construct(tokens)
             
         elif instruction_token.type == Token_type.DO_WHILE_STATEMENT:
-            logging.debug("Found do-while construct")
-            for token in tokens:
-                print('\t', token)
+            return self._handle_do_while_construct(tokens)
         
         elif instruction_token.type == Token_type.FOR_STATEMENT:
-            #TODO: change that
-            logging.debug("Found for construct")
-            tokens.extend(self.get_line_tokens())
-            tokens.extend(self.get_line_tokens())
-            for token in tokens:
-                print('\t', token)
+            return self._handle_for_construct(tokens)
         
         elif instruction_token.type == Token_type.TYPE_NAME:
-            logging.debug("Found Type name construct")
-            for token in tokens:
-                print('\t', token)
+            return self._handle_type_name_construct(tokens)
 
         elif instruction_token.type == Token_type.UNKNOWN:
-            logging.debug("Found generic instruction")
-            for token in tokens:
-                print('\t', token)
+            return self._handle_generic_construct(tokens)
 
     def _construct_function_header_from_tokens(self, tokens: List[Token]) -> Tuple[str, str, List[str]]:
         return "name", "return_type", ["int arg1", "String arg2"]
 
+    def _construct_variable_def_from_tokens(self, tokens: List[Token]) -> str:
+        #token_list: TYPE_NAME IDENTIFIER ;|( = EXPRESSION)
+        _ensure_correct_variable_structure(tokens)
+        if var_value:
+            return f"decalare variable '{'name'}' of type {'type'} type with value {'value'}"
+        return f"declare variable '{'name'}' of type {'type'}"
+
+    """Handler functions for different types of language structures"""
+
+    def _handle_if_construct(self, tokens: List[Token]):
+        logging.debug("Found if construct")
+
+        true_case = self._get_instructions_in_scope()
+        false_case = self._handle_else_construct()
+
+        return if_instruction("if_instruction", true_case, false_case)
+
+    def _handle_else_construct(self):
+        if self._peek().type == Token_type.ELSE_STATEMENT:
+            if self._peek(1).type == Token_type.IF_STATEMENT:
+                logging.debug("Found if-else construct")
+                else_if_tokens = self.get_line_tokens()[1:]
+                return [self._handle_if_construct(else_if_tokens)]
+            else:
+                logging.debug("Found else construct")
+                self.get_line_tokens()
+                return self._get_instructions_in_scope()
+        return None
+
+    def _handle_while_construct(self, tokens: List[Token]):
+        logging.debug("Found while construct")
+        
+        loop_instructions = self._get_instructions_in_scope()
+
+        return while_instruction_front("while_instruction", loop_instructions)
+
+    def _handle_do_while_construct(self, tokens: List[Token]):
+        logging.debug("Found do-while construct")
+
+        loop_instructions = self._get_instructions_in_scope()
+
+        self.get_line_tokens()
+
+        return while_instruction_back("while_instruction_back", loop_instructions)
+
+
+
+    def _handle_for_construct(self, tokens: List[Token]):
+        #TODO: change that
+        logging.debug("Found for construct")
+        tokens.extend(self.get_line_tokens())
+        tokens.extend(self.get_line_tokens())
+
+        loop_instructions = self._get_instructions_in_scope()
+
+        loop_instructions.append(generic_instruction("increment"))
+
+        return for_instruction("for_instruction", loop_instructions)
+
+    def _handle_type_name_construct(self, tokens: List[Token]):
+        logging.debug("Found Type name construct")
+        _ensure_correct_variable_structure(tokens)
+        return generic_instruction("type_name_construct")
+
+    def _handle_generic_construct(self, tokens: List[Token]):
+        logging.debug("Found generic instruction")
+
+        return generic_instruction("generic_instruction")
+
+def _ensure_correct_variable_structure(tokens: List[Token]):
+    #variable structure: TYPE_NAME IDENTIFIER ;|( = EXPRESSION)
+    if len(tokens) < 3:
+        raise JavaSyntaxError(f"{tokens[0].location}: Ill-formed type construct! Expected at least 3 tokens, got {len(tokens)}")
+    if tokens[1].type != Token_type.UNKNOWN:
+        raise JavaSyntaxError(f"{tokens[1].location}: Illegal token after type name! Expected UNKNOWN, got {str(tokens[1].type)}")
+    if not tokens[2].type in [Token_type.SEMICOLON, Token_type.EQUAL_SIGN]:
+        raise JavaSyntaxError(f"{tokens[2].location}: Illegal token after variable name! Expected SEMICOLON or EQUAL_SIGN, got {str(tokens[2].type)}")
+    if tokens[2].type == Token_type.EQUAL_SIGN and len(tokens) < 5:
+        raise JavaSyntaxError(f"{tokens[2].location}: Ill-formed assignment expression! Expected at least 5 tokens, got {len(tokens)}")
 
 #     def get_scopes(self) -> List[Function_scope]:
 
diff --git a/interpreter/Tokenizer.py b/interpreter/Tokenizer.py
index f67f215..4fb208a 100644
--- a/interpreter/Tokenizer.py
+++ b/interpreter/Tokenizer.py
@@ -4,12 +4,12 @@ import logging
 import re
 from typing import List, Optional
 
-from interpreter._token import Token, make_token
+from interpreter._token import Token, make_token, SourceLocation
 
 class Tokenizer:
     """This class will take the provided source file and convert it to a list of tokens"""
 
-    TOKEN_MATCH = re.compile(r"""\(|\)|\{|\}|;|(\n)|\+|-|\*|/|<|>|,| """) #TODO: make this modular
+    TOKEN_MATCH = re.compile(r"""\(|\)|\{|\}|;|(\n)|\+|-|\*|/|<|>|,| """)
 
     def __init__(self, file_name: str) -> None:
         with open(file_name) as f:
@@ -17,8 +17,12 @@ class Tokenizer:
         self.source_index = 0
         self.line_number = 1
 
+        self.source_text = re.sub("(private)|(public)|(protected)", "", self.source_text)
+
         self.type_name_pattern = re.compile('(char)|(int)|(void)|(double)|(Pixel)') #TODO: make this modular
 
+        self._filename = file_name
+
     def get_tokens(self) -> List[Token]:
 
         tokens = []
@@ -33,7 +37,7 @@ class Tokenizer:
 
             token = self._get_token(char)
             logging.debug(f"found token \"{token}\" on line {self.line_number}")
-            tokens.append(make_token(token, self.type_name_pattern))
+            tokens.append(make_token(token, SourceLocation(self._filename, self.line_number), self.type_name_pattern))
 
         return tokens
 
diff --git a/interpreter/_token.py b/interpreter/_token.py
index 59653aa..1fad304 100644
--- a/interpreter/_token.py
+++ b/interpreter/_token.py
@@ -33,55 +33,63 @@ class Token_type(IntEnum):
     STRING_LITERAL=17
     TYPE_NAME=18
 
+class SourceLocation:
+    def __init__(self, filename: str, line: int) -> None:
+        self.filename = filename
+        self.line = line
+
+    def __str__(self) -> str:
+        return f"File {self.filename}, line {self.line}"
+
 class Token:
-    def __init__(self, type: Token_type, content: Union[str, None]=None) -> None:
+    def __init__(self, type: Token_type, location: SourceLocation, content: Union[str, None]=None) -> None:
         self.type = type
         self.content = content
+        self.location = location
 
     def __str__(self) -> str:
         if self.content:
             return f"{str(self.type)}: {self.content}"
         return f"{self.type}"
 
-def make_token(tag: str, type_name_pattern:re.Pattern) -> Token:
+def make_token(tag: str, location: SourceLocation, type_name_pattern:re.Pattern) -> Token:
     if tag == '(':
-        return Token(Token_type.LEFT_PAREN, tag)
+        return Token(Token_type.LEFT_PAREN, location, tag)
     elif tag == ')':
-        return Token(Token_type.RIGTH_PAREN, tag)
+        return Token(Token_type.RIGTH_PAREN, location, tag)
     elif tag == '{':
-        return Token(Token_type.LEFT_CURLY, tag)
+        return Token(Token_type.LEFT_CURLY, location, tag)
     elif tag == '}':
-        return Token(Token_type.RIGHT_CURLY, tag)
+        return Token(Token_type.RIGHT_CURLY, location, tag)
     elif tag == '[':
-        return Token(Token_type.LEFT_BRACKET, tag)
+        return Token(Token_type.LEFT_BRACKET, location, tag)
     elif tag == ']':
-        return Token(Token_type.RIGHT_BRACKET, tag)
+        return Token(Token_type.RIGHT_BRACKET, location, tag)
     elif tag == ',':
-        return Token(Token_type.COMMA, tag)
+        return Token(Token_type.COMMA, location, tag)
     elif tag == '=':
-        return Token(Token_type.EQUAL_SIGN, tag)
+        return Token(Token_type.EQUAL_SIGN, location, tag)
     elif tag == ';':
-        return Token(Token_type.SEMICOLON, tag)
+        return Token(Token_type.SEMICOLON, location, tag)
     elif MATH_OP_PATTERN.match(tag):
-        return Token(Token_type.MATH_OP, tag)
+        return Token(Token_type.MATH_OP, location, tag)
     elif NUMERIC_CONSTANT_PATTERN.match(tag):
-        return Token(Token_type.NUMERIC_CONSTANT, tag)
+        return Token(Token_type.NUMERIC_CONSTANT, location, tag)
     elif tag == "if":
-        return Token(Token_type.IF_STATEMENT, tag)
+        return Token(Token_type.IF_STATEMENT, location, tag)
     elif tag == "else":
-        return Token(Token_type.ELSE_STATEMENT, tag)
+        return Token(Token_type.ELSE_STATEMENT, location, tag)
     elif tag == "while":
-        return Token(Token_type.WHILE_STATEMENT, tag)
+        return Token(Token_type.WHILE_STATEMENT, location, tag)
     elif tag == "do":
-        return Token(Token_type.DO_WHILE_STATEMENT, tag)
+        return Token(Token_type.DO_WHILE_STATEMENT, location, tag)
     elif tag == "for":
-        return Token(Token_type.FOR_STATEMENT, tag)
+        return Token(Token_type.FOR_STATEMENT, location, tag)
     elif KEYWORD_PATTERN.match(tag):
-        return Token(Token_type.KEY_WORD, tag)
+        return Token(Token_type.KEY_WORD, location, tag)
     elif STRING_LITERAL_PATTERN.match(tag):
-        return Token(Token_type.STRING_LITERAL, tag)
+        return Token(Token_type.STRING_LITERAL, location, tag)
     elif type_name_pattern.match(tag):
-        return Token(Token_type.TYPE_NAME, tag)
+        return Token(Token_type.TYPE_NAME, location, tag)
     else:
-        logging.info(f"found unknown token {tag}... Function or variable name?")
-        return Token(Token_type.UNKNOWN, tag)
\ No newline at end of file
+        return Token(Token_type.UNKNOWN, location, tag)
\ No newline at end of file