From 53534ee7071d2d543fc532f65d3941b0a0d4d512 Mon Sep 17 00:00:00 2001 From: weckyy702 Date: Sun, 28 Mar 2021 23:42:54 +0200 Subject: [PATCH] updated interpreter --- interpreter/Lexer.py | 47 ++++++++++++++++++++++++++++++----- interpreter/_token.py | 57 ++++++++++++++++++++++++++++++++----------- 2 files changed, 84 insertions(+), 20 deletions(-) diff --git a/interpreter/Lexer.py b/interpreter/Lexer.py index c15a946..fe0c6a2 100644 --- a/interpreter/Lexer.py +++ b/interpreter/Lexer.py @@ -5,7 +5,7 @@ import re from typing import List, overload from interpreter.function_scope import Function_scope -from interpreter._token import Token, make_token +from interpreter._token import Token, make_token, Token_type class Lexer: """This class will lex the provided Java source and generate a list of Function_scopes""" @@ -17,9 +17,12 @@ class Lexer: self.source_index = 0 self.line_number = 1 - def lex(self) -> List[Token]: + self._tokens = [] + self._token_index = 0 - tokens = [] + self.token_type_pattern = re.compile('(char)|(int)|(void)|(double)') + + def _get_tokens(self): while char := self._consume(): @@ -30,10 +33,31 @@ class Lexer: continue token = self._get_token(char) - logging.debug(f"found token \"{token}\" on line {self.line_number}") - #tokens.append(make_token(token)) + #logging.debug(f"found token \"{token}\" on line {self.line_number}") + self._tokens.append(make_token(token, self.token_type_pattern)) + + def get_scopes(self) -> List[Function_scope]: + if not self._tokens: + self._get_tokens() + + while token := self._consume_token(): + if token.type == Token_type.UNKNOWN: + logging.debug(token) + elif token.type == Token_type.TYPE_NAME: + if self._peek_token().type != Token_type.UNKNOWN: + logging.error("Illegal identifier after Type name!") + raise Exception("Illegal identifier after Type name!") + elif self._peek_token(1).type == Token_type.LEFT_PAREN: + logging.debug(f"Function definition found: {token.content} {self._peek_token().content} ()") + self._consume_token() + self._consume_token() + args = "" + while function_token := self._consume_token(): + if function_token.type == Token_type.RIGTH_PAREN: + break + print(function_token.type) + - return tokens def _get_token(self, char: str) -> str: token = char @@ -76,6 +100,17 @@ class Lexer: self.source_index += 1 return char + def _peek_token(self, offset:int=0): + if (self._token_index+offset) >= len(self._tokens): + return None + return self._tokens[self._token_index+offset] + + def _consume_token(self): + token = self._peek_token() + self._token_index+=1 + return token + + @overload def _consume_until(self, end_token: str) -> str:... diff --git a/interpreter/_token.py b/interpreter/_token.py index 27af5d0..36b6d5c 100644 --- a/interpreter/_token.py +++ b/interpreter/_token.py @@ -1,12 +1,15 @@ """Private definitions for Token class used by the Lexer""" +import logging import re from enum import IntEnum from typing import Union NUMERIC_CONSTANT_PATTERN = re.compile("([0-9]+)|(true)|(false)") -KEYWORD_PATTERN = re.compile("(return)|(continue)|(break)") +KEYWORD_PATTERN = re.compile("(return)|(continue)|(break)|(new)") +STRING_LITERAL_PATTERN = re.compile("('|\")(.*)(\"|')") +MATH_OP_PATTERN = re.compile("\+|-|\*|/") class Token_type(IntEnum): UNKNOWN=-1 @@ -17,20 +20,30 @@ class Token_type(IntEnum): LEFT_BRACKET=4, RIGHT_BRACKET=5, COMMA=6, - NUMERIC_CONSTANT=7, - IF_STATEMENT=8, - WHILE_STATEMENT=9, - DO_WHILE_STATEMENT=10, - FOR_STATEMENT=11, - KEY_WORD=13, - STRING_LITERAL=14 + EQUAL_SIGN=7, + SEMICOLON=8 + MATH_OP=9 + NUMERIC_CONSTANT=10, + IF_STATEMENT=11, + ELSE_STATEMENT=12, + WHILE_STATEMENT=13, + DO_WHILE_STATEMENT=14, + FOR_STATEMENT=15, + KEY_WORD=16, + STRING_LITERAL=17 + TYPE_NAME=18 class Token: def __init__(self, type: Token_type, content: Union[str, None]=None) -> None: self.type = type self.content = content -def make_token(tag: str) -> Token: + def __str__(self) -> str: + if self.content: + return f"{self.type}: {self.content}" + return f"{self.typetype}" + +def make_token(tag: str, type_name_pattern:re.Pattern) -> Token: if tag == '(': return Token(Token_type.LEFT_PAREN) elif tag == ')': @@ -45,14 +58,30 @@ def make_token(tag: str) -> Token: return Token(Token_type.RIGHT_BRACKET) elif tag == ',': return Token(Token_type.COMMA) + elif tag == '=': + return Token(Token_type.EQUAL_SIGN) + elif tag == ';': + return Token(Token_type.SEMICOLON) + elif MATH_OP_PATTERN.match(tag): + return Token(Token_type.MATH_OP) elif NUMERIC_CONSTANT_PATTERN.match(tag): return Token(Token_type.NUMERIC_CONSTANT, tag) - elif tag == 'if': + elif tag == "if": return Token(Token_type.IF_STATEMENT) - elif tag == 'while': + elif tag == "else": + return Token(Token_type.ELSE_STATEMENT) + elif tag == "while": return Token(Token_type.WHILE_STATEMENT) - elif tag == 'do': + elif tag == "do": return Token(Token_type.DO_WHILE_STATEMENT) - elif tag == 'for': + elif tag == "for": return Token(Token_type.FOR_STATEMENT) - #TODO: finish this \ No newline at end of file + elif KEYWORD_PATTERN.match(tag): + return Token(Token_type.KEY_WORD, tag) + elif STRING_LITERAL_PATTERN.match(tag): + return Token(Token_type, tag[1:-1]) + elif type_name_pattern.match(tag): + return Token(Token_type.TYPE_NAME, tag) + else: + logging.warn(f"unknown token {tag}") + return Token(Token_type.UNKNOWN, tag) \ No newline at end of file