Added instruction generation code to Lexer

This commit is contained in:
weckyy702
2021-04-01 17:00:19 +02:00
parent 49f3c84e60
commit 3bc93fa3a7
4 changed files with 148 additions and 74 deletions

View File

@@ -133,7 +133,7 @@ class if_instruction(Iinstruction):
res += '\t'+str(inst)+";\n"
res += "}"
if self.false_case:
res += " else {"
res += " else {\n"
for inst in self.true_case:
res += '\t'+str(inst)+";\n"
res += "}"

View File

@@ -1,4 +1,5 @@
"""Lexer.py: Definition for Lexer class"""
from interpreter.Tokenizer import Tokenizer
from os import linesep
from draw.Iinstruction import *
from typing import List, Optional, Union, Tuple
@@ -43,48 +44,37 @@ class Lexer:
if self._is_function_def(line_tokens):
func_name, func_return_type, func_args = self._construct_function_header_from_tokens(line_tokens)
fs = Function_scope(func_name, func_return_type, func_args)
instructions = self._get_instructions_in_scope()
scopes.append(Function_scope(func_name, func_return_type, func_args))
scopes.append(instructions)
fs._add_instructions(instructions)
scopes.append(fs)
else:
line = ""
for token in line_tokens[:-1]:
line += token.content + ' '
self._construct_instruction_from_tokens(line_tokens)
scopes.append([line])
#something was declared in global scope
self._global_instructions.append(self._construct_instruction_from_tokens(line_tokens))
self.add_globals_to_scope_list(scopes)
return scopes
def _get_instructions_in_scope(self):
lines = []
instructions = []
while self._peek():
line_tokens = self.get_line_tokens()
if len(line_tokens) > 1:
line = ""
for token in line_tokens[:-1]:
line += token.content + ' '
self._construct_instruction_from_tokens(line_tokens)
lines.append(line)
instruction = self._construct_instruction_from_tokens(line_tokens)
if instruction:
instructions.append(instruction)
delimiter_token = line_tokens[-1]
if delimiter_token.type == Token_type.RIGHT_CURLY:
return lines
if delimiter_token.type == Token_type.LEFT_CURLY:
instructions = self._get_instructions_in_scope()
for line in instructions:
lines.append('\t'+line)
return instructions
raise JavaSyntaxError(f"Missing right curly!")
@@ -96,6 +86,13 @@ class Lexer:
break
return tokens
def add_globals_to_scope_list(self, scope_list: List[Function_scope]):
global_scope = Function_scope("<Global scope>", "void", [])
global_scope._add_instructions(self._global_instructions)
scope_list.append(global_scope)
def _is_function_def(self, tokens: List[Token]) -> bool:
@@ -103,45 +100,110 @@ class Lexer:
return tokens[0].type == Token_type.TYPE_NAME and tokens[1].type == Token_type.UNKNOWN and tokens[2].type == Token_type.LEFT_PAREN and tokens[-1].type == Token_type.LEFT_CURLY
def _construct_instruction_from_tokens(self, tokens: List[Token]):
instruction_token = tokens[0]
if instruction_token.type == Token_type.IF_STATEMENT:
logging.debug("Found if construct")
for token in tokens:
print('\t', token)
return self._handle_if_construct(tokens)
elif instruction_token.type == Token_type.WHILE_STATEMENT:
logging.debug("Found while construct")
for token in tokens:
print('\t', token)
return self._handle_while_construct(tokens)
elif instruction_token.type == Token_type.DO_WHILE_STATEMENT:
logging.debug("Found do-while construct")
for token in tokens:
print('\t', token)
return self._handle_do_while_construct(tokens)
elif instruction_token.type == Token_type.FOR_STATEMENT:
#TODO: change that
logging.debug("Found for construct")
tokens.extend(self.get_line_tokens())
tokens.extend(self.get_line_tokens())
for token in tokens:
print('\t', token)
return self._handle_for_construct(tokens)
elif instruction_token.type == Token_type.TYPE_NAME:
logging.debug("Found Type name construct")
for token in tokens:
print('\t', token)
return self._handle_type_name_construct(tokens)
elif instruction_token.type == Token_type.UNKNOWN:
logging.debug("Found generic instruction")
for token in tokens:
print('\t', token)
return self._handle_generic_construct(tokens)
def _construct_function_header_from_tokens(self, tokens: List[Token]) -> Tuple[str, str, List[str]]:
return "name", "return_type", ["int arg1", "String arg2"]
def _construct_variable_def_from_tokens(self, tokens: List[Token]) -> str:
#token_list: TYPE_NAME IDENTIFIER ;|( = EXPRESSION)
_ensure_correct_variable_structure(tokens)
if var_value:
return f"decalare variable '{'name'}' of type {'type'} type with value {'value'}"
return f"declare variable '{'name'}' of type {'type'}"
"""Handler functions for different types of language structures"""
def _handle_if_construct(self, tokens: List[Token]):
logging.debug("Found if construct")
true_case = self._get_instructions_in_scope()
false_case = self._handle_else_construct()
return if_instruction("if_instruction", true_case, false_case)
def _handle_else_construct(self):
if self._peek().type == Token_type.ELSE_STATEMENT:
if self._peek(1).type == Token_type.IF_STATEMENT:
logging.debug("Found if-else construct")
else_if_tokens = self.get_line_tokens()[1:]
return [self._handle_if_construct(else_if_tokens)]
else:
logging.debug("Found else construct")
self.get_line_tokens()
return self._get_instructions_in_scope()
return None
def _handle_while_construct(self, tokens: List[Token]):
logging.debug("Found while construct")
loop_instructions = self._get_instructions_in_scope()
return while_instruction_front("while_instruction", loop_instructions)
def _handle_do_while_construct(self, tokens: List[Token]):
logging.debug("Found do-while construct")
loop_instructions = self._get_instructions_in_scope()
self.get_line_tokens()
return while_instruction_back("while_instruction_back", loop_instructions)
def _handle_for_construct(self, tokens: List[Token]):
#TODO: change that
logging.debug("Found for construct")
tokens.extend(self.get_line_tokens())
tokens.extend(self.get_line_tokens())
loop_instructions = self._get_instructions_in_scope()
loop_instructions.append(generic_instruction("increment"))
return for_instruction("for_instruction", loop_instructions)
def _handle_type_name_construct(self, tokens: List[Token]):
logging.debug("Found Type name construct")
_ensure_correct_variable_structure(tokens)
return generic_instruction("type_name_construct")
def _handle_generic_construct(self, tokens: List[Token]):
logging.debug("Found generic instruction")
return generic_instruction("generic_instruction")
def _ensure_correct_variable_structure(tokens: List[Token]):
#variable structure: TYPE_NAME IDENTIFIER ;|( = EXPRESSION)
if len(tokens) < 3:
raise JavaSyntaxError(f"{tokens[0].location}: Ill-formed type construct! Expected at least 3 tokens, got {len(tokens)}")
if tokens[1].type != Token_type.UNKNOWN:
raise JavaSyntaxError(f"{tokens[1].location}: Illegal token after type name! Expected UNKNOWN, got {str(tokens[1].type)}")
if not tokens[2].type in [Token_type.SEMICOLON, Token_type.EQUAL_SIGN]:
raise JavaSyntaxError(f"{tokens[2].location}: Illegal token after variable name! Expected SEMICOLON or EQUAL_SIGN, got {str(tokens[2].type)}")
if tokens[2].type == Token_type.EQUAL_SIGN and len(tokens) < 5:
raise JavaSyntaxError(f"{tokens[2].location}: Ill-formed assignment expression! Expected at least 5 tokens, got {len(tokens)}")
# def get_scopes(self) -> List[Function_scope]:

View File

@@ -4,12 +4,12 @@ import logging
import re
from typing import List, Optional
from interpreter._token import Token, make_token
from interpreter._token import Token, make_token, SourceLocation
class Tokenizer:
"""This class will take the provided source file and convert it to a list of tokens"""
TOKEN_MATCH = re.compile(r"""\(|\)|\{|\}|;|(\n)|\+|-|\*|/|<|>|,| """) #TODO: make this modular
TOKEN_MATCH = re.compile(r"""\(|\)|\{|\}|;|(\n)|\+|-|\*|/|<|>|,| """)
def __init__(self, file_name: str) -> None:
with open(file_name) as f:
@@ -17,8 +17,12 @@ class Tokenizer:
self.source_index = 0
self.line_number = 1
self.source_text = re.sub("(private)|(public)|(protected)", "", self.source_text)
self.type_name_pattern = re.compile('(char)|(int)|(void)|(double)|(Pixel)') #TODO: make this modular
self._filename = file_name
def get_tokens(self) -> List[Token]:
tokens = []
@@ -33,7 +37,7 @@ class Tokenizer:
token = self._get_token(char)
logging.debug(f"found token \"{token}\" on line {self.line_number}")
tokens.append(make_token(token, self.type_name_pattern))
tokens.append(make_token(token, SourceLocation(self._filename, self.line_number), self.type_name_pattern))
return tokens

View File

@@ -33,55 +33,63 @@ class Token_type(IntEnum):
STRING_LITERAL=17
TYPE_NAME=18
class SourceLocation:
def __init__(self, filename: str, line: int) -> None:
self.filename = filename
self.line = line
def __str__(self) -> str:
return f"File {self.filename}, line {self.line}"
class Token:
def __init__(self, type: Token_type, content: Union[str, None]=None) -> None:
def __init__(self, type: Token_type, location: SourceLocation, content: Union[str, None]=None) -> None:
self.type = type
self.content = content
self.location = location
def __str__(self) -> str:
if self.content:
return f"{str(self.type)}: {self.content}"
return f"{self.type}"
def make_token(tag: str, type_name_pattern:re.Pattern) -> Token:
def make_token(tag: str, location: SourceLocation, type_name_pattern:re.Pattern) -> Token:
if tag == '(':
return Token(Token_type.LEFT_PAREN, tag)
return Token(Token_type.LEFT_PAREN, location, tag)
elif tag == ')':
return Token(Token_type.RIGTH_PAREN, tag)
return Token(Token_type.RIGTH_PAREN, location, tag)
elif tag == '{':
return Token(Token_type.LEFT_CURLY, tag)
return Token(Token_type.LEFT_CURLY, location, tag)
elif tag == '}':
return Token(Token_type.RIGHT_CURLY, tag)
return Token(Token_type.RIGHT_CURLY, location, tag)
elif tag == '[':
return Token(Token_type.LEFT_BRACKET, tag)
return Token(Token_type.LEFT_BRACKET, location, tag)
elif tag == ']':
return Token(Token_type.RIGHT_BRACKET, tag)
return Token(Token_type.RIGHT_BRACKET, location, tag)
elif tag == ',':
return Token(Token_type.COMMA, tag)
return Token(Token_type.COMMA, location, tag)
elif tag == '=':
return Token(Token_type.EQUAL_SIGN, tag)
return Token(Token_type.EQUAL_SIGN, location, tag)
elif tag == ';':
return Token(Token_type.SEMICOLON, tag)
return Token(Token_type.SEMICOLON, location, tag)
elif MATH_OP_PATTERN.match(tag):
return Token(Token_type.MATH_OP, tag)
return Token(Token_type.MATH_OP, location, tag)
elif NUMERIC_CONSTANT_PATTERN.match(tag):
return Token(Token_type.NUMERIC_CONSTANT, tag)
return Token(Token_type.NUMERIC_CONSTANT, location, tag)
elif tag == "if":
return Token(Token_type.IF_STATEMENT, tag)
return Token(Token_type.IF_STATEMENT, location, tag)
elif tag == "else":
return Token(Token_type.ELSE_STATEMENT, tag)
return Token(Token_type.ELSE_STATEMENT, location, tag)
elif tag == "while":
return Token(Token_type.WHILE_STATEMENT, tag)
return Token(Token_type.WHILE_STATEMENT, location, tag)
elif tag == "do":
return Token(Token_type.DO_WHILE_STATEMENT, tag)
return Token(Token_type.DO_WHILE_STATEMENT, location, tag)
elif tag == "for":
return Token(Token_type.FOR_STATEMENT, tag)
return Token(Token_type.FOR_STATEMENT, location, tag)
elif KEYWORD_PATTERN.match(tag):
return Token(Token_type.KEY_WORD, tag)
return Token(Token_type.KEY_WORD, location, tag)
elif STRING_LITERAL_PATTERN.match(tag):
return Token(Token_type.STRING_LITERAL, tag)
return Token(Token_type.STRING_LITERAL, location, tag)
elif type_name_pattern.match(tag):
return Token(Token_type.TYPE_NAME, tag)
return Token(Token_type.TYPE_NAME, location, tag)
else:
logging.info(f"found unknown token {tag}... Function or variable name?")
return Token(Token_type.UNKNOWN, tag)
return Token(Token_type.UNKNOWN, location, tag)