from consts import * from tokenizer import Token class ParserError(Exception): pass class Node: def __init__(self, node_type, value=None, subtype=None): self.type = node_type self.value = value self.subtype = subtype self.parent = None self.right = None self.left = None def __eq__(self, other): return self.type == other.type and self.value == other.value def __repr__(self): # NUMBER, OPERATOR, SYMBOL, FUNCALL, UNARY, ASSIGNMENT, *_ if self.type == NodeType.NUMBER: return f"Node({self.value})" if self.type == NodeType.OPERATOR: return f"Node({self.subtype})" if self.type == NodeType.SYMBOL: return f"Node(SYMBOL {self.value})" if self.type == NodeType.FUNCALL: return f"Node(FUNCALL {self.value})" if self.type == NodeType.UNARY: return f"Node(UNARY {self.subtype})" if self.type == NodeType.ASSIGNMENT: return f"Node(ASSIGNMENT)" return "Node(repr not defined)" __str__ = __repr__ def parse_parenthesis(tokens, start): end = start depth = 0 while True: token = tokens[end] if token.type == TokenType.RIGHT_PARENTHESIS: depth -= 1 if token.type == TokenType.LEFT_PARENTHESIS: depth += 1 if depth == 0 and token.type == TokenType.RIGHT_PARENTHESIS: break end += 1 node = parse_expr(tokens[start + 1: end]) return node, end def parse_args(tokens, start): end = start depth = 0 while True: token = tokens[end] if token.type == TokenType.RIGHT_PARENTHESIS: depth -= 1 if token.type == TokenType.LEFT_PARENTHESIS: depth += 1 if depth == 0 and token.type == TokenType.RIGHT_PARENTHESIS: break end += 1 args = [] arg = [] for token in tokens[start + 1: end]: if token.type != TokenType.COMMA: arg.append(token) else: args.append(arg.copy()) arg.clear() if len(arg) != 0: args.append(arg) return list(map(parse_expr, args)) def parse_expr(tokens, start): state = State.NAME i = start current_node = None while i < len(tokens): token = tokens[i] if state == State.OPERATOR and token.type == TokenType.LEFT_PARENTHESIS: args = parse_args(tokens, i) current_node.type = NodeType.FUNCALL current_node.value = (current_node.value, args) elif token.type in [TokenType.LEFT_PARENTHESIS, TokenType.NUMBER, TokenType.SYMBOL] or token.subtype == TokenType.UNARY: if token.type == TokenType.LEFT_PARENTHESIS: node, i = parse_parenthesis(tokens, i) state = State.NAME elif token.type == TokenType.NUMBER: node = Node(NodeType.NUMBER, value=token.value) state = State.OPERATOR elif token.type == TokenType.SYMBOL: node = Node(NodeType.SYMBOL, value=token.value) state = State.OPERATOR elif token.subtype == TokenType.UNARY: node = Node(NodeType.OPERATOR, subtype=NodeType.UNARY) node.left = Node(NodeType.OPERATOR, subtype=token.value) state = State.NAME if current_node is None: current_node = node elif current_node.type == NodeType.NUMBER: raise ValueError("Not a valid expression") elif current_node.type == NodeType.OPERATOR: if current_node.left is None: current_node.left = node node.parent = current_node elif current_node.right is None: current_node.right = node node.parent = current_node current_node = node else: raise ValueError("Not a valid expression") elif token.type == TokenType.OPERATOR: node = Node(NodeType.OPERATOR, subtype=token.subtype, value=token.value) if current_node is None: raise ValueError("Not a valid expression") elif current_node.type in [NodeType.NUMBER, NodeType.OPERATOR, NodeType.FUNCALL]: while current_node.parent is not None: if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]: node.parent = current_node.parent if node.parent is not None: node.parent.right = node break current_node = current_node.parent current_node.parent = node node.left = current_node current_node = node else: raise ValueError("Not a valid expression") state = State.NAME elif token.type == TokenType.SEMICOLON: break i += 1 while current_node.parent is not None: current_node = current_node.parent return current_node, i - 1 def parse_let_statement(tokens: list[Token], start: int): class LetStates: (WAIT_LET, WAIT_SYMBOL, WAIT_EQUALS, WAIT_EXPR, WAIT_SEMICOLON, PARSE_END, *_) = range(100) root_node = Node(NodeType.ASSIGNMENT) state = LetStates.WAIT_LET i = start while i < len(tokens): token = tokens[i] if state == LetStates.WAIT_LET: if token.type == TokenType.KEYWORD and token.subtype == Keyword.LET: state = LetStates.WAIT_SYMBOL else: raise ParserError(f"Waited for let, got {token}") elif state == LetStates.WAIT_SYMBOL: if token.type == TokenType.SYMBOL: node = Node(NodeType.SYMBOL, value=token.value) root_node.left = node state = LetStates.WAIT_EQUALS else: raise ParserError(f"Waited for symbol, got {token}") elif state == LetStates.WAIT_EQUALS: if token.type == TokenType.EQUALS: state = LetStates.WAIT_EXPR else: raise ParserError(f"Waited for equals, got {token}") elif state == LetStates.WAIT_EXPR: node, i = parse_expr(tokens, i) root_node.right = node state = LetStates.WAIT_SEMICOLON elif state == LetStates.WAIT_SEMICOLON: if token.type == TokenType.SEMICOLON: state = LetStates.PARSE_END break else: raise ParserError(f"Waited for semicolon, got {token}") i += 1 if state != LetStates.PARSE_END: raise ParserError("Statement ended unexpectedly") return root_node, i def parse(tokens: list[Token]): _statements = [ [Keyword.LET, TokenType.SYMBOL, TokenType.EQUALS, "EXPR", TokenType.SEMICOLON], [Keyword.FN, TokenType.SYMBOL, TokenType.LEFT_PARENTHESIS, "ARGS", TokenType.RIGHT_PARENTHESIS, TokenType.LEFT_BRACE, ["STATEMENTS"], TokenType.RIGHT_BRACE], ] statements = [] i = 0 while i < len(tokens): if tokens[i].type == TokenType.KEYWORD and tokens[i].subtype == Keyword.LET: node, i = parse_let_statement(tokens, i) statements.append(node) i += 1 return statements