diff options
| author | Andrew <saintruler@gmail.com> | 2021-07-12 12:32:15 +0400 |
|---|---|---|
| committer | Andrew <saintruler@gmail.com> | 2021-07-12 12:32:15 +0400 |
| commit | 49df60f32aca6428706bc895b7e48ab2d68444b5 (patch) | |
| tree | d78ba857afd5940c63ba4da11a9324b94f8c448f | |
| parent | e3ae2c6c9b3a9d766b76f790ed18621765ed890d (diff) | |
Added ability to set variables.
| -rw-r--r-- | consts.py | 33 | ||||
| -rw-r--r-- | main.py | 38 | ||||
| -rw-r--r-- | parser.py | 137 | ||||
| -rw-r--r-- | tokenizer.py | 52 |
4 files changed, 203 insertions, 57 deletions
@@ -4,18 +4,43 @@ class State: class TokenType: ( - LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, SEMICOLON, COMMA, SYMBOL, *_ + LEFT_PARENTHESIS, RIGHT_PARENTHESIS, + LEFT_BRACE, RIGHT_BRACE, + NUMBER, OPERATOR, SYMBOL, KEYWORD, UNARY, + SEMICOLON, COMMA, EQUALS, *_ ) = range(100) -UNARY = "unary" -FUNCALL = "funcall" +class NodeType: + ( + NUMBER, OPERATOR, SYMBOL, FUNCALL, UNARY, ASSIGNMENT, *_ + ) = range(100) + + +class Keyword: + LET, STRUCT, FN, RETURN, IF, ELSE, FOR, WHILE, CONTINUE, BREAK, *_ = range(100) + + +keywords = { + "let": Keyword.LET, "struct": Keyword.STRUCT, + "fn": Keyword.FN, "return": Keyword.RETURN, + "if": Keyword.IF, "else": Keyword.ELSE, + "for": Keyword.FOR, "while": Keyword.WHILE, "continue": Keyword.CONTINUE, "break": Keyword.BREAK, +} + +keywords_repr = {keywords[i]: i for i in keywords} + +types = [ + "i8", "i16", "i32", "i64", + "f32", "f64", +] + PRECEDENCE = { "+": 10, "-": 10, "*": 20, "^": 30, - UNARY: 40, + NodeType.UNARY: 40, } OPERATOR_CHARS = "*+-/%&~^|#$.:<=>@" @@ -2,7 +2,7 @@ import sys from consts import * from tokenizer import tokenize -from parser import build_tree +from parser import parse, parse_expr # RUNTIME @@ -37,33 +37,45 @@ runtime_functions = { # END OF RUNTIME -def expr_eval(node): - if node.type == NUMBER: +def _eval(node): + if node.type == NodeType.NUMBER: return node.value - elif node.type == SYMBOL: + + elif node.type == NodeType.SYMBOL: return runtime.get(node.value) - elif node.type == FUNCALL: + + elif node.type == NodeType.FUNCALL: fun_name = node.value[0] - args = list(map(expr_eval, node.value[1])) + args = list(map(_eval, node.value[1])) return runtime_functions.get(fun_name)(*args) - elif node.type == OPERATOR: - if node.subtype == UNARY: - return unary_operators.get(node.left.subtype)(expr_eval(node.right)) + + elif node.type == NodeType.OPERATOR: + if node.subtype == NodeType.UNARY: + return unary_operators.get(node.left.subtype)(_eval(node.right)) else: - return operators.get(node.subtype)(expr_eval(node.left), expr_eval(node.right)) + return operators.get(node.subtype)(_eval(node.left), _eval(node.right)) + + elif node.type == NodeType.ASSIGNMENT: + runtime[node.left.value] = _eval(node.right) def main(*argv): if len(argv) != 0: data = argv[0] else: - data = "2 + 3 * 4" + # data = "2 + 3 * 4" + data = "let a = 2 + 3; let e = 4;" # data = "(3 + 3 * 2) * -4" # data = "(2 + factorial(4, )) * 9" tokens = tokenize(data) - node1 = build_tree(tokens) - print(expr_eval(node1)) + # print(tokens) + statements = parse(tokens) + for statement in statements: + _eval(statement) + # node, _ = parse_expr(tokens, 0) + # print(expr_eval(node)) + print(1) if __name__ == "__main__": @@ -1,4 +1,9 @@ from consts import * +from tokenizer import Token + + +class ParserError(Exception): + pass class Node: @@ -14,20 +19,22 @@ class Node: return self.type == other.type and self.value == other.value def __repr__(self): - if self.type == TokenType.LEFT_PARENTHESIS: - return "Node(LEFT_PARENTHESIS)" - if self.type == TokenType.RIGHT_PARENTHESIS: - return "Node(RIGHT_PARENTHESIS)" - if self.type == TokenType.NUMBER: + # NUMBER, OPERATOR, SYMBOL, FUNCALL, UNARY, ASSIGNMENT, *_ + if self.type == NodeType.NUMBER: return f"Node({self.value})" - if self.type == TokenType.OPERATOR: + if self.type == NodeType.OPERATOR: return f"Node({self.subtype})" - if self.type == TokenType.COMMA: - return f"Node(COMMA)" - if self.type == TokenType.SYMBOL: + if self.type == NodeType.SYMBOL: return f"Node(SYMBOL {self.value})" - if self.type == FUNCALL: + if self.type == NodeType.FUNCALL: return f"Node(FUNCALL {self.value})" + if self.type == NodeType.UNARY: + return f"Node(UNARY {self.subtype})" + if self.type == NodeType.ASSIGNMENT: + return f"Node(ASSIGNMENT)" + + return "Node(repr not defined)" + __str__ = __repr__ @@ -43,7 +50,7 @@ def parse_parenthesis(tokens, start): if depth == 0 and token.type == TokenType.RIGHT_PARENTHESIS: break end += 1 - node = build_tree(tokens[start + 1: end]) + node = parse_expr(tokens[start + 1: end]) return node, end @@ -71,39 +78,45 @@ def parse_args(tokens, start): if len(arg) != 0: args.append(arg) - return list(map(build_tree, args)) + return list(map(parse_expr, args)) -def build_tree(tokens): +def parse_expr(tokens, start): state = State.NAME - i = 0 + i = start current_node = None while i < len(tokens): token = tokens[i] + if state == State.OPERATOR and token.type == TokenType.LEFT_PARENTHESIS: args = parse_args(tokens, i) - current_node.type = FUNCALL + current_node.type = NodeType.FUNCALL current_node.value = (current_node.value, args) - elif token.type in [TokenType.LEFT_PARENTHESIS, TokenType.NUMBER, TokenType.SYMBOL] or token.subtype == UNARY: + + elif token.type in [TokenType.LEFT_PARENTHESIS, TokenType.NUMBER, TokenType.SYMBOL] or token.subtype == TokenType.UNARY: + if token.type == TokenType.LEFT_PARENTHESIS: node, i = parse_parenthesis(tokens, i) state = State.NAME + elif token.type == TokenType.NUMBER: - node = Node(TokenType.NUMBER, value=token.value) + node = Node(NodeType.NUMBER, value=token.value) state = State.OPERATOR + elif token.type == TokenType.SYMBOL: - node = Node(TokenType.SYMBOL, value=token.value) + node = Node(NodeType.SYMBOL, value=token.value) state = State.OPERATOR - elif token.subtype == UNARY: - node = Node(TokenType.OPERATOR, subtype=UNARY) - node.left = Node(TokenType.OPERATOR, subtype=token.value) + + elif token.subtype == TokenType.UNARY: + node = Node(NodeType.OPERATOR, subtype=NodeType.UNARY) + node.left = Node(NodeType.OPERATOR, subtype=token.value) state = State.NAME if current_node is None: current_node = node - elif current_node.type == TokenType.NUMBER: + elif current_node.type == NodeType.NUMBER: raise ValueError("Not a valid expression") - elif current_node.type == TokenType.OPERATOR: + elif current_node.type == NodeType.OPERATOR: if current_node.left is None: current_node.left = node node.parent = current_node @@ -113,11 +126,12 @@ def build_tree(tokens): current_node = node else: raise ValueError("Not a valid expression") + elif token.type == TokenType.OPERATOR: - node = Node(token.type, subtype=token.subtype, value=token.value) + node = Node(NodeType.OPERATOR, subtype=token.subtype, value=token.value) if current_node is None: raise ValueError("Not a valid expression") - elif current_node.type in [TokenType.NUMBER, TokenType.OPERATOR, FUNCALL]: + elif current_node.type in [NodeType.NUMBER, NodeType.OPERATOR, NodeType.FUNCALL]: while current_node.parent is not None: if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]: node.parent = current_node.parent @@ -129,11 +143,80 @@ def build_tree(tokens): node.left = current_node current_node = node else: - print(current_node, token) raise ValueError("Not a valid expression") state = State.NAME + + elif token.type == TokenType.SEMICOLON: + break + i += 1 while current_node.parent is not None: current_node = current_node.parent - return current_node + return current_node, i - 1 + + +def parse_let_statement(tokens: list[Token], start: int): + class LetStates: + (WAIT_LET, WAIT_SYMBOL, WAIT_EQUALS, WAIT_EXPR, WAIT_SEMICOLON, PARSE_END, *_) = range(100) + + root_node = Node(NodeType.ASSIGNMENT) + state = LetStates.WAIT_LET + i = start + while i < len(tokens): + token = tokens[i] + if state == LetStates.WAIT_LET: + if token.type == TokenType.KEYWORD and token.subtype == Keyword.LET: + state = LetStates.WAIT_SYMBOL + else: + raise ParserError(f"Waited for let, got {token}") + + elif state == LetStates.WAIT_SYMBOL: + if token.type == TokenType.SYMBOL: + node = Node(NodeType.SYMBOL, value=token.value) + root_node.left = node + state = LetStates.WAIT_EQUALS + else: + raise ParserError(f"Waited for symbol, got {token}") + + elif state == LetStates.WAIT_EQUALS: + if token.type == TokenType.EQUALS: + state = LetStates.WAIT_EXPR + else: + raise ParserError(f"Waited for equals, got {token}") + + elif state == LetStates.WAIT_EXPR: + node, i = parse_expr(tokens, i) + root_node.right = node + state = LetStates.WAIT_SEMICOLON + + elif state == LetStates.WAIT_SEMICOLON: + if token.type == TokenType.SEMICOLON: + state = LetStates.PARSE_END + break + else: + raise ParserError(f"Waited for semicolon, got {token}") + + i += 1 + + if state != LetStates.PARSE_END: + raise ParserError("Statement ended unexpectedly") + + return root_node, i + + +def parse(tokens: list[Token]): + _statements = [ + [Keyword.LET, TokenType.SYMBOL, TokenType.EQUALS, "EXPR", TokenType.SEMICOLON], + [Keyword.FN, TokenType.SYMBOL, TokenType.LEFT_PARENTHESIS, "ARGS", TokenType.RIGHT_PARENTHESIS, TokenType.LEFT_BRACE, ["STATEMENTS"], TokenType.RIGHT_BRACE], + ] + + statements = [] + + i = 0 + while i < len(tokens): + if tokens[i].type == TokenType.KEYWORD and tokens[i].subtype == Keyword.LET: + node, i = parse_let_statement(tokens, i) + statements.append(node) + i += 1 + return statements diff --git a/tokenizer.py b/tokenizer.py index 13b824a..4fd534c 100644 --- a/tokenizer.py +++ b/tokenizer.py @@ -6,7 +6,7 @@ class TokenizerError(Exception): class Token: - def __init__(self, node_type, value=None, subtype=None): + def __init__(self, node_type, *, value=None, subtype=None): self.type = node_type self.value = value self.subtype = subtype @@ -27,6 +27,19 @@ class Token: return f"Token(COMMA)" if self.type == TokenType.SYMBOL: return f"Token(SYMBOL {self.value})" + if self.type == TokenType.KEYWORD: + return f"Token(KEYWORD {keywords_repr[self.subtype]})" + if self.type == TokenType.EQUALS: + return f"Token(=)" + if self.type == TokenType.SEMICOLON: + return f"Token(;)" + if self.type == TokenType.LEFT_BRACE: + return "Token(LEFT_BRACE)" + if self.type == TokenType.LEFT_BRACE: + return "Token(RIGHT_BRACE)" + + + return f"Token(repr not defined)" __str__ = __repr__ @@ -60,35 +73,48 @@ def tokenize(line): while i < len(line): char = line[i] if char == "(": - tokens.append(Token(TokenType.LEFT_PARENTHESIS, None)) + tokens.append(Token(TokenType.LEFT_PARENTHESIS)) state = State.NAME elif char == ")": - tokens.append(Token(TokenType.RIGHT_PARENTHESIS, None)) + tokens.append(Token(TokenType.RIGHT_PARENTHESIS)) state = State.OPERATOR elif char == ",": - tokens.append(Token(TokenType.COMMA, None)) + tokens.append(Token(TokenType.COMMA)) + state = State.NAME + elif char == ";": + tokens.append(Token(TokenType.SEMICOLON)) + state = State.NAME + elif char == "{": + tokens.append(Token(TokenType.LEFT_BRACE)) + state = State.NAME + elif char == "}": + tokens.append(Token(TokenType.RIGHT_BRACE)) state = State.NAME elif char in OPERATOR_CHARS: - if state == State.OPERATOR: - val, i = parse_operator(line, i) + val, i = parse_operator(line, i) + if val == "=": + tokens.append(Token(TokenType.EQUALS)) + state = State.NAME + elif state == State.OPERATOR: tokens.append(Token(TokenType.OPERATOR, subtype=val)) state = State.NAME elif state == State.NAME: - val, i = parse_operator(line, i) - tokens.append(Token(TokenType.OPERATOR, subtype=UNARY, value=val)) + tokens.append(Token(TokenType.OPERATOR, subtype=TokenType.UNARY, value=val)) state = State.NAME elif char in NUMBER_CHARS: val, i = parse_number(line, i) - tokens.append(Token(TokenType.NUMBER, val)) + tokens.append(Token(TokenType.NUMBER, value=val)) state = State.OPERATOR elif char in SYMBOL_CHARS: val, i = parse_symbol(line, i) - tokens.append(Token(TokenType.SYMBOL, val)) - state = State.OPERATOR + if val in keywords: + tokens.append(Token(TokenType.KEYWORD, subtype=keywords[val])) + state = State.NAME + else: + tokens.append(Token(TokenType.SYMBOL, value=val)) + state = State.OPERATOR elif char != " ": raise ValueError("Line is not a valid expression") i += 1 return tokens - - |