import sys STATE_OPERATOR, STATE_NAME, *_ = range(100) ( LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, SEMICOLON, COMMA, SYMBOL, *_ ) = range(100) UNARY = "unary" FUNCALL = "funcall" PRECEDENCE = { "+": 10, "-": 10, "*": 20, "^": 30, UNARY: 40, } OPERATOR_CHARS = "*+-/%&~^|#$.:<=>@" NUMBER_CHARS = "0123456789" SYMBOL_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?" # RUNTIME unary_operators = { "-": lambda a: -a, "+": lambda a: a } operators = { "+": lambda a, b: a + b, "-": lambda a, b: a - b, "*": lambda a, b: a * b, "^": lambda a, b: a ** b, } runtime = { "x": 10 } def factorial(n): if n <= 1: return 1 return n * factorial(n - 1) runtime_functions = { "factorial": factorial } # END OF RUNTIME class ParserError(Exception): pass class Node: def __init__(self, node_type, value=None, subtype=None): self.type = node_type self.value = value self.subtype = subtype self.parent = None self.right = None self.left = None def __eq__(self, other): return self.type == other.type and self.value == other.value def __repr__(self): if self.type == LEFT_PARENTHESIS: return "Node(LEFT_PARENTHESIS)" if self.type == RIGHT_PARENTHESIS: return "Node(RIGHT_PARENTHESIS)" if self.type == NUMBER: return f"Node({self.value})" if self.type == OPERATOR: return f"Node({self.subtype})" if self.type == COMMA: return f"Node(COMMA)" if self.type == SYMBOL: return f"Node(SYMBOL {self.value})" if self.type == FUNCALL: return f"Node(FUNCALL {self.value})" __str__ = __repr__ class Token: def __init__(self, node_type, value=None, subtype=None): self.type = node_type self.value = value self.subtype = subtype def __eq__(self, other): return self.type == other.type and self.value == other.value def __repr__(self): if self.type == LEFT_PARENTHESIS: return "Token(LEFT_PARENTHESIS)" if self.type == RIGHT_PARENTHESIS: return "Token(RIGHT_PARENTHESIS)" if self.type == NUMBER: return f"Token({self.value})" if self.type == OPERATOR: return f"Token({self.subtype})" if self.type == COMMA: return f"Token(COMMA)" if self.type == SYMBOL: return f"Token(SYMBOL {self.value})" __str__ = __repr__ def parse_token(line, start, charset): token = "" i = start while i < len(line) and line[i] in charset: token += line[i] i += 1 return token, i - 1 def parse_operator(line, start): return parse_token(line, start, OPERATOR_CHARS) def parse_number(line, start): val, i = parse_token(line, start, NUMBER_CHARS) return float(val), i def parse_symbol(line, start): return parse_token(line, start, SYMBOL_CHARS) def tokenize(line): state = STATE_NAME tokens = [] i = 0 while i < len(line): char = line[i] if char == "(": tokens.append(Token(LEFT_PARENTHESIS, None)) state = STATE_NAME elif char == ")": tokens.append(Token(RIGHT_PARENTHESIS, None)) state = STATE_OPERATOR elif char == ",": tokens.append(Token(COMMA, None)) state = STATE_NAME elif char in OPERATOR_CHARS: if state == STATE_OPERATOR: val, i = parse_operator(line, i) tokens.append(Token(OPERATOR, subtype=val)) state = STATE_NAME elif state == STATE_NAME: val, i = parse_operator(line, i) tokens.append(Token(OPERATOR, subtype=UNARY, value=val)) state = STATE_NAME elif char in NUMBER_CHARS: val, i = parse_number(line, i) tokens.append(Token(NUMBER, val)) state = STATE_OPERATOR elif char in SYMBOL_CHARS: val, i = parse_symbol(line, i) tokens.append(Token(SYMBOL, val)) state = STATE_OPERATOR elif char != " ": raise ValueError("Line is not a valid expression") i += 1 return tokens def parse_parenthesis(tokens, start): end = start depth = 0 while True: token = tokens[end] if token.type == RIGHT_PARENTHESIS: depth -= 1 if token.type == LEFT_PARENTHESIS: depth += 1 if depth == 0 and token.type == RIGHT_PARENTHESIS: break end += 1 node = build_tree(tokens[start + 1: end]) return node, end def parse_args(tokens, start): end = start depth = 0 while True: token = tokens[end] if token.type == RIGHT_PARENTHESIS: depth -= 1 if token.type == LEFT_PARENTHESIS: depth += 1 if depth == 0 and token.type == RIGHT_PARENTHESIS: break end += 1 args = [] arg = [] for token in tokens[start + 1: end]: if token.type != COMMA: arg.append(token) else: args.append(arg.copy()) arg.clear() if len(arg) != 0: args.append(arg) return list(map(build_tree, args)) def build_tree(tokens): state = STATE_NAME i = 0 current_node = None while i < len(tokens): token = tokens[i] if state == STATE_OPERATOR and token.type == LEFT_PARENTHESIS: args = parse_args(tokens, i) current_node.type = FUNCALL current_node.value = (current_node.value, args) elif token.type in [LEFT_PARENTHESIS, NUMBER, SYMBOL] or token.subtype == UNARY: if token.type == LEFT_PARENTHESIS: node, i = parse_parenthesis(tokens, i) state = STATE_NAME elif token.type == NUMBER: node = Node(NUMBER, value=token.value) state = STATE_OPERATOR elif token.type == SYMBOL: node = Node(SYMBOL, value=token.value) state = STATE_OPERATOR elif token.subtype == UNARY: node = Node(OPERATOR, subtype=UNARY) node.left = Node(OPERATOR, subtype=token.value) state = STATE_NAME if current_node is None: current_node = node elif current_node.type == NUMBER: raise ValueError("Not a valid expression") elif current_node.type == OPERATOR: if current_node.left is None: current_node.left = node node.parent = current_node elif current_node.right is None: current_node.right = node node.parent = current_node current_node = node else: raise ValueError("Not a valid expression") elif token.type == OPERATOR: node = Node(token.type, subtype=token.subtype, value=token.value) if current_node is None: raise ValueError("Not a valid expression") elif current_node.type in [NUMBER, OPERATOR, FUNCALL]: while current_node.parent is not None: if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]: node.parent = current_node.parent if node.parent is not None: node.parent.right = node break current_node = current_node.parent current_node.parent = node node.left = current_node current_node = node else: print(current_node, token) raise ValueError("Not a valid expression") state = STATE_NAME i += 1 while current_node.parent is not None: current_node = current_node.parent return current_node def expr_eval(node): if node.type == NUMBER: return node.value elif node.type == SYMBOL: return runtime.get(node.value) elif node.type == FUNCALL: fun_name = node.value[0] args = list(map(expr_eval, node.value[1])) return runtime_functions.get(fun_name)(*args) elif node.type == OPERATOR: if node.subtype == UNARY: return unary_operators.get(node.left.subtype)(expr_eval(node.right)) else: return operators.get(node.subtype)(expr_eval(node.left), expr_eval(node.right)) def main(*argv): data = argv[0] # data = "2 + 3 * -4" # data = "(3 + 3 * 2) * -4" # data = "(2 + factorial(4, )) * 9" tokens = tokenize(data) # print(tokens) node1 = build_tree(tokens) print(expr_eval(node1)) if __name__ == "__main__": main(*sys.argv[1:])