From 9ce329611211d12d4ca6b98b18650194d1a1d0f7 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 11 Jul 2021 21:59:17 +0400 Subject: Initial commit --- tokenizer.py | 243 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 tokenizer.py (limited to 'tokenizer.py') diff --git a/tokenizer.py b/tokenizer.py new file mode 100644 index 0000000..f8c2460 --- /dev/null +++ b/tokenizer.py @@ -0,0 +1,243 @@ +import sys + + +STATE_ANY, STATE_OPERATOR, STATE_NUMBER, *_ = range(100) +LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, MULTIPLY, PLUS, MINUS, *_ = range(100) +OPERATORS = [MULTIPLY, PLUS, MINUS] + + +class Node: + def __init__(self, node_type, value): + self.type = node_type + self.value = value + self.parent = None + self.right = None + self.left = None + + def __eq__(self, other): + return self.type == other.type and self.value == other.value + + def __repr__(self): + if self.type == LEFT_PARENTHESIS: + return "Node(LEFT_PARENTHESIS)" + if self.type == RIGHT_PARENTHESIS: + return "Node(RIGHT_PARENTHESIS)" + if self.type == NUMBER: + return f"Node({self.value})" + if self.type == MULTIPLY: + return "Node(MULTIPLY)" + if self.type == PLUS: + return "Node(PLUS)" + if self.type == MINUS: + return "Node(MINUS)" + __str__ = __repr__ + + +class Token: + def __init__(self, node_type, value): + self.type = node_type + self.value = value + + def __eq__(self, other): + return self.type == other.type and self.value == other.value + + def __repr__(self): + if self.type == LEFT_PARENTHESIS: + return "Token(LEFT_PARENTHESIS)" + if self.type == RIGHT_PARENTHESIS: + return "Token(RIGHT_PARENTHESIS)" + if self.type == NUMBER: + return f"Token({self.value})" + if self.type == MULTIPLY: + return "Token(MULTIPLY)" + if self.type == PLUS: + return "Token(PLUS)" + if self.type == MINUS: + return "Token(MINUS)" + __str__ = __repr__ + + +def parse_number(line, start): + number = "" + if line[start] in "+-": + number += line[start] + start += 1 + + i = start + while i < len(line) and line[i] in "0123456789": + number += line[i] + i += 1 + + return int(number), i - 1 + + +def tokenize(line): + state = STATE_NUMBER + + tokens = [] + i = 0 + while i < len(line): + char = line[i] + if char == "(": + tokens.append(Token(LEFT_PARENTHESIS, None)) + state = STATE_NUMBER + elif char == ")": + tokens.append(Token(RIGHT_PARENTHESIS, None)) + state = STATE_OPERATOR + elif char == "+" and state == STATE_OPERATOR: + tokens.append(Token(PLUS, None)) + state = STATE_NUMBER + elif char == "-" and state == STATE_OPERATOR: + tokens.append(Token(MINUS, None)) + state = STATE_NUMBER + elif char == "*": + if state == STATE_OPERATOR: + tokens.append(Token(MULTIPLY, None)) + state = STATE_NUMBER + else: + print(1, tokens) + raise ValueError("Line is not a valid expression") + elif char in "-+0123456789": + val, i = parse_number(line, i) + tokens.append(Token(NUMBER, val)) + state = STATE_OPERATOR + elif char != " ": + print(char) + print(2, tokens) + raise ValueError("Line is not a valid expression") + i += 1 + + return tokens + + +def parse_parenthesis(tokens, start): + print(tokens) + i = start + end = start + depth = 0 + print(tokens[i]) + while True: + token = tokens[i] + if token.type == RIGHT_PARENTHESIS: + depth -= 1 + if token.type == LEFT_PARENTHESIS: + depth += 1 + if depth == 0 and token.type == RIGHT_PARENTHESIS: + end = i + 1 + break + i += 1 + print("=== BUILDING PARENTHESIS ===") + print(tokens[start + 1 : end - 1]) + node = build_tree(tokens[start + 1 : end - 1]) + print("=== EXITED PARENTHESIS ===") + return node, end - 1 + + +def build_tree(tokens): + i = 0 + current_node = None + while i < len(tokens): + token = tokens[i] + if token.type == LEFT_PARENTHESIS: + node, i = parse_parenthesis(tokens, i) + if current_node is None: + current_node = node + elif current_node.type == NUMBER: + raise ValueError("Not a valid expression") + elif current_node.type in OPERATORS: + if current_node.left is None: + print(f"Placed number {node} on the left of {current_node}") + current_node.left = node + node.parent = current_node + elif current_node.right is None: + print(f"Placed number {node} on the right of {current_node}") + current_node.right = node + node.parent = current_node + current_node = node + else: + raise ValueError("Not a valid expression") + elif token.type == NUMBER: + node = Node(NUMBER, token.value) + if current_node is None: + print(f"Number {node} is new root") + current_node = node + elif current_node.type in OPERATORS: + if current_node.left is None: + print(f"Placed number {node} on the left of {current_node}") + current_node.left = node + node.parent = current_node + elif current_node.right is None: + print(f"Placed number {node} on the right of {current_node}") + current_node.right = node + node.parent = current_node + current_node = node + else: + raise ValueError("Not a valid expression") + elif token.type in [PLUS, MINUS]: + node = Node(token.type, None) + if current_node is None: + raise ValueError("Not a valid expression") + elif current_node.type == NUMBER: + while current_node.parent is not None: + current_node = current_node.parent + print(f"Operator {node} is new root with {current_node} on the left") + current_node.parent = node + node.left = current_node + current_node = node + elif current_node.type == MULTIPLY: + print(f"Placed {node} over {node.left}") + current_node.parent = node + node.left = current_node + current_node = node + else: + raise ValueError("Not a valid expression") + elif token.type == MULTIPLY: + node = Node(MULTIPLY, None) + if current_node is None: + raise ValueError("Not a valid expression") + elif current_node.type in [NUMBER, MULTIPLY]: + print(f"Replaced with {node} node {current_node}") + node.left = current_node + node.parent = current_node.parent + if node.parent is not None: + node.parent.right = node + current_node.parent = node + current_node = node + elif current_node.type in [PLUS, MINUS]: + if current_node.parent is not None: + raise ValueError("oops...") + current_node.parent = node + node.left = current_node + current_node = node + else: + raise ValueError("Not a valid expression") + i += 1 + + while current_node.parent is not None: + current_node = current_node.parent + return current_node + + +def expr_eval(node): + if node.type == NUMBER: + return node.value + elif node.type == MULTIPLY: + return expr_eval(node.left) * expr_eval(node.right) + elif node.type == PLUS: + return expr_eval(node.left) + expr_eval(node.right) + elif node.type == MINUS: + return expr_eval(node.left) - expr_eval(node.right) + +def main(*argv): + data = argv[0] + tokens = tokenize(data) + node = build_tree(tokens) + # print(tokens) + # print(node) + # print(node.left) + # print(node.right) + print(expr_eval(node)) + + +if __name__ == "__main__": + main(*sys.argv[1:]) -- cgit v1.2.3