diff options
| author | Andrew <saintruler@gmail.com> | 2021-07-12 12:32:15 +0400 |
|---|---|---|
| committer | Andrew <saintruler@gmail.com> | 2021-07-12 12:32:15 +0400 |
| commit | 49df60f32aca6428706bc895b7e48ab2d68444b5 (patch) | |
| tree | d78ba857afd5940c63ba4da11a9324b94f8c448f /parser.py | |
| parent | e3ae2c6c9b3a9d766b76f790ed18621765ed890d (diff) | |
Added ability to set variables.
Diffstat (limited to 'parser.py')
| -rw-r--r-- | parser.py | 137 |
1 files changed, 110 insertions, 27 deletions
@@ -1,4 +1,9 @@ from consts import * +from tokenizer import Token + + +class ParserError(Exception): + pass class Node: @@ -14,20 +19,22 @@ class Node: return self.type == other.type and self.value == other.value def __repr__(self): - if self.type == TokenType.LEFT_PARENTHESIS: - return "Node(LEFT_PARENTHESIS)" - if self.type == TokenType.RIGHT_PARENTHESIS: - return "Node(RIGHT_PARENTHESIS)" - if self.type == TokenType.NUMBER: + # NUMBER, OPERATOR, SYMBOL, FUNCALL, UNARY, ASSIGNMENT, *_ + if self.type == NodeType.NUMBER: return f"Node({self.value})" - if self.type == TokenType.OPERATOR: + if self.type == NodeType.OPERATOR: return f"Node({self.subtype})" - if self.type == TokenType.COMMA: - return f"Node(COMMA)" - if self.type == TokenType.SYMBOL: + if self.type == NodeType.SYMBOL: return f"Node(SYMBOL {self.value})" - if self.type == FUNCALL: + if self.type == NodeType.FUNCALL: return f"Node(FUNCALL {self.value})" + if self.type == NodeType.UNARY: + return f"Node(UNARY {self.subtype})" + if self.type == NodeType.ASSIGNMENT: + return f"Node(ASSIGNMENT)" + + return "Node(repr not defined)" + __str__ = __repr__ @@ -43,7 +50,7 @@ def parse_parenthesis(tokens, start): if depth == 0 and token.type == TokenType.RIGHT_PARENTHESIS: break end += 1 - node = build_tree(tokens[start + 1: end]) + node = parse_expr(tokens[start + 1: end]) return node, end @@ -71,39 +78,45 @@ def parse_args(tokens, start): if len(arg) != 0: args.append(arg) - return list(map(build_tree, args)) + return list(map(parse_expr, args)) -def build_tree(tokens): +def parse_expr(tokens, start): state = State.NAME - i = 0 + i = start current_node = None while i < len(tokens): token = tokens[i] + if state == State.OPERATOR and token.type == TokenType.LEFT_PARENTHESIS: args = parse_args(tokens, i) - current_node.type = FUNCALL + current_node.type = NodeType.FUNCALL current_node.value = (current_node.value, args) - elif token.type in [TokenType.LEFT_PARENTHESIS, TokenType.NUMBER, TokenType.SYMBOL] or token.subtype == UNARY: + + elif token.type in [TokenType.LEFT_PARENTHESIS, TokenType.NUMBER, TokenType.SYMBOL] or token.subtype == TokenType.UNARY: + if token.type == TokenType.LEFT_PARENTHESIS: node, i = parse_parenthesis(tokens, i) state = State.NAME + elif token.type == TokenType.NUMBER: - node = Node(TokenType.NUMBER, value=token.value) + node = Node(NodeType.NUMBER, value=token.value) state = State.OPERATOR + elif token.type == TokenType.SYMBOL: - node = Node(TokenType.SYMBOL, value=token.value) + node = Node(NodeType.SYMBOL, value=token.value) state = State.OPERATOR - elif token.subtype == UNARY: - node = Node(TokenType.OPERATOR, subtype=UNARY) - node.left = Node(TokenType.OPERATOR, subtype=token.value) + + elif token.subtype == TokenType.UNARY: + node = Node(NodeType.OPERATOR, subtype=NodeType.UNARY) + node.left = Node(NodeType.OPERATOR, subtype=token.value) state = State.NAME if current_node is None: current_node = node - elif current_node.type == TokenType.NUMBER: + elif current_node.type == NodeType.NUMBER: raise ValueError("Not a valid expression") - elif current_node.type == TokenType.OPERATOR: + elif current_node.type == NodeType.OPERATOR: if current_node.left is None: current_node.left = node node.parent = current_node @@ -113,11 +126,12 @@ def build_tree(tokens): current_node = node else: raise ValueError("Not a valid expression") + elif token.type == TokenType.OPERATOR: - node = Node(token.type, subtype=token.subtype, value=token.value) + node = Node(NodeType.OPERATOR, subtype=token.subtype, value=token.value) if current_node is None: raise ValueError("Not a valid expression") - elif current_node.type in [TokenType.NUMBER, TokenType.OPERATOR, FUNCALL]: + elif current_node.type in [NodeType.NUMBER, NodeType.OPERATOR, NodeType.FUNCALL]: while current_node.parent is not None: if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]: node.parent = current_node.parent @@ -129,11 +143,80 @@ def build_tree(tokens): node.left = current_node current_node = node else: - print(current_node, token) raise ValueError("Not a valid expression") state = State.NAME + + elif token.type == TokenType.SEMICOLON: + break + i += 1 while current_node.parent is not None: current_node = current_node.parent - return current_node + return current_node, i - 1 + + +def parse_let_statement(tokens: list[Token], start: int): + class LetStates: + (WAIT_LET, WAIT_SYMBOL, WAIT_EQUALS, WAIT_EXPR, WAIT_SEMICOLON, PARSE_END, *_) = range(100) + + root_node = Node(NodeType.ASSIGNMENT) + state = LetStates.WAIT_LET + i = start + while i < len(tokens): + token = tokens[i] + if state == LetStates.WAIT_LET: + if token.type == TokenType.KEYWORD and token.subtype == Keyword.LET: + state = LetStates.WAIT_SYMBOL + else: + raise ParserError(f"Waited for let, got {token}") + + elif state == LetStates.WAIT_SYMBOL: + if token.type == TokenType.SYMBOL: + node = Node(NodeType.SYMBOL, value=token.value) + root_node.left = node + state = LetStates.WAIT_EQUALS + else: + raise ParserError(f"Waited for symbol, got {token}") + + elif state == LetStates.WAIT_EQUALS: + if token.type == TokenType.EQUALS: + state = LetStates.WAIT_EXPR + else: + raise ParserError(f"Waited for equals, got {token}") + + elif state == LetStates.WAIT_EXPR: + node, i = parse_expr(tokens, i) + root_node.right = node + state = LetStates.WAIT_SEMICOLON + + elif state == LetStates.WAIT_SEMICOLON: + if token.type == TokenType.SEMICOLON: + state = LetStates.PARSE_END + break + else: + raise ParserError(f"Waited for semicolon, got {token}") + + i += 1 + + if state != LetStates.PARSE_END: + raise ParserError("Statement ended unexpectedly") + + return root_node, i + + +def parse(tokens: list[Token]): + _statements = [ + [Keyword.LET, TokenType.SYMBOL, TokenType.EQUALS, "EXPR", TokenType.SEMICOLON], + [Keyword.FN, TokenType.SYMBOL, TokenType.LEFT_PARENTHESIS, "ARGS", TokenType.RIGHT_PARENTHESIS, TokenType.LEFT_BRACE, ["STATEMENTS"], TokenType.RIGHT_BRACE], + ] + + statements = [] + + i = 0 + while i < len(tokens): + if tokens[i].type == TokenType.KEYWORD and tokens[i].subtype == Keyword.LET: + node, i = parse_let_statement(tokens, i) + statements.append(node) + i += 1 + return statements |