from consts import * from tokenizer import Token class ParserError(Exception): pass class Node: def __init__(self, node_type, *, value=None, subtype=None): self.type = node_type self.value = value self.subtype = subtype self.parent = None self.right = None self.left = None def __eq__(self, other): return self.type == other.type and self.value == other.value def __repr__(self): # NUMBER, OPERATOR, SYMBOL, FUNCALL, UNARY, ASSIGNMENT, *_ if self.type == NodeType.NUMBER: return f"Node({self.value})" if self.type == NodeType.OPERATOR: return f"Node({self.subtype})" if self.type == NodeType.SYMBOL: return f"Node(SYMBOL {self.value})" if self.type == NodeType.FUNCALL: return f"Node(FUNCALL {self.value})" if self.type == NodeType.UNARY: return f"Node(UNARY {self.subtype})" if self.type == NodeType.ASSIGNMENT: return f"Node(ASSIGNMENT)" return "Node(repr not defined)" __str__ = __repr__ def parse_parenthesis(tokens, start): end = start depth = 0 while True: token = tokens[end] if token.type == TokenType.RIGHT_PARENTHESIS: depth -= 1 if token.type == TokenType.LEFT_PARENTHESIS: depth += 1 if depth == 0 and token.type == TokenType.RIGHT_PARENTHESIS: break end += 1 node, _ = parse_expr(tokens[start + 1: end], 0) return node, end def parse_args(tokens, start): end = start depth = 0 while True: token = tokens[end] if token.type == TokenType.RIGHT_PARENTHESIS: depth -= 1 if token.type == TokenType.LEFT_PARENTHESIS: depth += 1 if depth == 0 and token.type == TokenType.RIGHT_PARENTHESIS: break end += 1 args = [] arg = [] for token in tokens[start + 1: end]: if token.type != TokenType.COMMA: arg.append(token) else: args.append(arg.copy()) arg.clear() if len(arg) != 0: args.append(arg) tmp = [] for arg in args: node, _ = parse_expr(arg, 0) tmp.append(node) return tmp def parse_expr(tokens, start): state = State.NAME i = start current_node = None while i < len(tokens): token = tokens[i] if state == State.OPERATOR and token.type == TokenType.LEFT_PARENTHESIS: args = parse_args(tokens, i) current_node.type = NodeType.FUNCALL current_node.left = Node(NodeType.SYMBOL, value=current_node.value) current_node.right = Node(NodeType.ARGUMENTS, value=args) elif token.type in [TokenType.LEFT_PARENTHESIS, TokenType.NUMBER, TokenType.SYMBOL] or token.subtype == TokenType.UNARY: if token.type == TokenType.LEFT_PARENTHESIS: node, i = parse_parenthesis(tokens, i) state = State.NAME elif token.type == TokenType.NUMBER: node = Node(NodeType.NUMBER, value=token.value) state = State.OPERATOR elif token.type == TokenType.SYMBOL: node = Node(NodeType.SYMBOL, value=token.value) state = State.OPERATOR elif token.subtype == TokenType.UNARY: node = Node(NodeType.OPERATOR, subtype=NodeType.UNARY) node.left = Node(NodeType.OPERATOR, subtype=token.value) state = State.NAME if current_node is None: current_node = node elif current_node.type == NodeType.NUMBER: raise ValueError("Not a valid expression") elif current_node.type == NodeType.OPERATOR: if current_node.left is None: current_node.left = node node.parent = current_node elif current_node.right is None: current_node.right = node node.parent = current_node current_node = node else: raise ValueError("Not a valid expression") elif token.type == TokenType.OPERATOR: node = Node(NodeType.OPERATOR, subtype=token.subtype, value=token.value) if current_node is None: raise ValueError("Not a valid expression") elif current_node.type in [NodeType.NUMBER, NodeType.OPERATOR, NodeType.FUNCALL]: while current_node.parent is not None: if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]: node.parent = current_node.parent if node.parent is not None: node.parent.right = node break current_node = current_node.parent current_node.parent = node node.left = current_node current_node = node else: raise ValueError("Not a valid expression") state = State.NAME elif token.type in [TokenType.SEMICOLON, TokenType.COMMA]: break i += 1 while current_node.parent is not None: current_node = current_node.parent return current_node, i - 1 def parse_let_statement(tokens: list[Token], start: int): class LetStates: (WAIT_LET, WAIT_SYMBOL, WAIT_EQUALS, WAIT_EXPR, WAIT_SEMICOLON, PARSE_END, *_) = range(100) root_node = Node(NodeType.ASSIGNMENT) state = LetStates.WAIT_LET i = start while i < len(tokens): token = tokens[i] if state == LetStates.WAIT_LET: if token.type == TokenType.KEYWORD and token.subtype == Keyword.LET: state = LetStates.WAIT_SYMBOL else: raise ParserError(f"Waited for let, got {token}") elif state == LetStates.WAIT_SYMBOL: if token.type == TokenType.SYMBOL: node = Node(NodeType.SYMBOL, value=token.value) root_node.left = node state = LetStates.WAIT_EQUALS else: raise ParserError(f"Waited for symbol, got {token}") elif state == LetStates.WAIT_EQUALS: if token.type == TokenType.EQUALS: state = LetStates.WAIT_EXPR else: raise ParserError(f"Waited for equals, got {token}") elif state == LetStates.WAIT_EXPR: node, i = parse_expr(tokens, i) root_node.right = node state = LetStates.WAIT_SEMICOLON elif state == LetStates.WAIT_SEMICOLON: if token.type == TokenType.SEMICOLON: state = LetStates.PARSE_END break else: raise ParserError(f"Waited for semicolon, got {token}") i += 1 if state != LetStates.PARSE_END: raise ParserError("Statement ended unexpectedly") return root_node, i def parse_function_call(tokens: list[Token], start: int): class FuncStates: (WAIT_SYMBOL, WAIT_LEFT_PAR, PARSING_EXPR, WAIT_SEMICOLON, PARSE_END, *_) = range(100) root_node = Node(NodeType.FUNCALL) state = FuncStates.WAIT_SYMBOL i = start args = [] arg = [] while i < len(tokens): token = tokens[i] if state == FuncStates.WAIT_SYMBOL: if token.type == TokenType.SYMBOL: root_node.left = Node(NodeType.SYMBOL, value=token.value) state = FuncStates.WAIT_LEFT_PAR else: raise ParserError(f"Waited for symbol, got {token}") elif state == FuncStates.WAIT_LEFT_PAR: if token.type == TokenType.LEFT_PARENTHESIS: state = FuncStates.PARSING_EXPR else: raise ParserError(f"Waited for left parenthesis, got {token}") elif state == FuncStates.PARSING_EXPR: if token.type == TokenType.COMMA: node, _ = parse_expr(arg, 0) args.append(node) arg.clear() elif token.type == TokenType.RIGHT_PARENTHESIS: node, _ = parse_expr(arg, 0) args.append(node) root_node.right = Node(NodeType.ARGUMENTS, value=args) state = FuncStates.WAIT_SEMICOLON else: arg.append(token) elif state == FuncStates.WAIT_SEMICOLON: if token.type == TokenType.SEMICOLON: state = FuncStates.PARSE_END break else: raise ParserError(f"Waited for semicolon, got {token}") i += 1 if state != FuncStates.PARSE_END: raise ParserError("Statement ended unexpectedly") return root_node, i def parse(tokens: list[Token]): class ParseState: (ANY, GOT_SYMBOL, *_) = range(100) _statements = [ [Keyword.LET, TokenType.SYMBOL, TokenType.EQUALS, "EXPR", TokenType.SEMICOLON], [TokenType.SYMBOL, TokenType.EQUALS, "EXPR", TokenType.SEMICOLON], [TokenType.SYMBOL, TokenType.LEFT_PARENTHESIS, "ARGS", TokenType.RIGHT_PARENTHESIS, TokenType.SEMICOLON], [Keyword.FN, TokenType.SYMBOL, TokenType.LEFT_PARENTHESIS, "ARGS", TokenType.RIGHT_PARENTHESIS, TokenType.LEFT_BRACE, ["STATEMENT"], TokenType.RIGHT_BRACE], ] state = ParseState.ANY statements = [] symbol = None i = 0 while i < len(tokens): token = tokens[i] if state == ParseState.ANY: if token.type == TokenType.KEYWORD: if token.subtype == Keyword.LET: node, i = parse_let_statement(tokens, i) statements.append(node) elif token.type == TokenType.SYMBOL: state = ParseState.GOT_SYMBOL elif state == ParseState.GOT_SYMBOL: if token.type == TokenType.LEFT_PARENTHESIS: node, i = parse_function_call(tokens, i - 1) statements.append(node) state = ParseState.ANY else: raise ParserError("oops...") i += 1 return statements