summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tokenizer.py243
1 files changed, 243 insertions, 0 deletions
diff --git a/tokenizer.py b/tokenizer.py
new file mode 100644
index 0000000..f8c2460
--- /dev/null
+++ b/tokenizer.py
@@ -0,0 +1,243 @@
+import sys
+
+
+STATE_ANY, STATE_OPERATOR, STATE_NUMBER, *_ = range(100)
+LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, MULTIPLY, PLUS, MINUS, *_ = range(100)
+OPERATORS = [MULTIPLY, PLUS, MINUS]
+
+
+class Node:
+ def __init__(self, node_type, value):
+ self.type = node_type
+ self.value = value
+ self.parent = None
+ self.right = None
+ self.left = None
+
+ def __eq__(self, other):
+ return self.type == other.type and self.value == other.value
+
+ def __repr__(self):
+ if self.type == LEFT_PARENTHESIS:
+ return "Node(LEFT_PARENTHESIS)"
+ if self.type == RIGHT_PARENTHESIS:
+ return "Node(RIGHT_PARENTHESIS)"
+ if self.type == NUMBER:
+ return f"Node({self.value})"
+ if self.type == MULTIPLY:
+ return "Node(MULTIPLY)"
+ if self.type == PLUS:
+ return "Node(PLUS)"
+ if self.type == MINUS:
+ return "Node(MINUS)"
+ __str__ = __repr__
+
+
+class Token:
+ def __init__(self, node_type, value):
+ self.type = node_type
+ self.value = value
+
+ def __eq__(self, other):
+ return self.type == other.type and self.value == other.value
+
+ def __repr__(self):
+ if self.type == LEFT_PARENTHESIS:
+ return "Token(LEFT_PARENTHESIS)"
+ if self.type == RIGHT_PARENTHESIS:
+ return "Token(RIGHT_PARENTHESIS)"
+ if self.type == NUMBER:
+ return f"Token({self.value})"
+ if self.type == MULTIPLY:
+ return "Token(MULTIPLY)"
+ if self.type == PLUS:
+ return "Token(PLUS)"
+ if self.type == MINUS:
+ return "Token(MINUS)"
+ __str__ = __repr__
+
+
+def parse_number(line, start):
+ number = ""
+ if line[start] in "+-":
+ number += line[start]
+ start += 1
+
+ i = start
+ while i < len(line) and line[i] in "0123456789":
+ number += line[i]
+ i += 1
+
+ return int(number), i - 1
+
+
+def tokenize(line):
+ state = STATE_NUMBER
+
+ tokens = []
+ i = 0
+ while i < len(line):
+ char = line[i]
+ if char == "(":
+ tokens.append(Token(LEFT_PARENTHESIS, None))
+ state = STATE_NUMBER
+ elif char == ")":
+ tokens.append(Token(RIGHT_PARENTHESIS, None))
+ state = STATE_OPERATOR
+ elif char == "+" and state == STATE_OPERATOR:
+ tokens.append(Token(PLUS, None))
+ state = STATE_NUMBER
+ elif char == "-" and state == STATE_OPERATOR:
+ tokens.append(Token(MINUS, None))
+ state = STATE_NUMBER
+ elif char == "*":
+ if state == STATE_OPERATOR:
+ tokens.append(Token(MULTIPLY, None))
+ state = STATE_NUMBER
+ else:
+ print(1, tokens)
+ raise ValueError("Line is not a valid expression")
+ elif char in "-+0123456789":
+ val, i = parse_number(line, i)
+ tokens.append(Token(NUMBER, val))
+ state = STATE_OPERATOR
+ elif char != " ":
+ print(char)
+ print(2, tokens)
+ raise ValueError("Line is not a valid expression")
+ i += 1
+
+ return tokens
+
+
+def parse_parenthesis(tokens, start):
+ print(tokens)
+ i = start
+ end = start
+ depth = 0
+ print(tokens[i])
+ while True:
+ token = tokens[i]
+ if token.type == RIGHT_PARENTHESIS:
+ depth -= 1
+ if token.type == LEFT_PARENTHESIS:
+ depth += 1
+ if depth == 0 and token.type == RIGHT_PARENTHESIS:
+ end = i + 1
+ break
+ i += 1
+ print("=== BUILDING PARENTHESIS ===")
+ print(tokens[start + 1 : end - 1])
+ node = build_tree(tokens[start + 1 : end - 1])
+ print("=== EXITED PARENTHESIS ===")
+ return node, end - 1
+
+
+def build_tree(tokens):
+ i = 0
+ current_node = None
+ while i < len(tokens):
+ token = tokens[i]
+ if token.type == LEFT_PARENTHESIS:
+ node, i = parse_parenthesis(tokens, i)
+ if current_node is None:
+ current_node = node
+ elif current_node.type == NUMBER:
+ raise ValueError("Not a valid expression")
+ elif current_node.type in OPERATORS:
+ if current_node.left is None:
+ print(f"Placed number {node} on the left of {current_node}")
+ current_node.left = node
+ node.parent = current_node
+ elif current_node.right is None:
+ print(f"Placed number {node} on the right of {current_node}")
+ current_node.right = node
+ node.parent = current_node
+ current_node = node
+ else:
+ raise ValueError("Not a valid expression")
+ elif token.type == NUMBER:
+ node = Node(NUMBER, token.value)
+ if current_node is None:
+ print(f"Number {node} is new root")
+ current_node = node
+ elif current_node.type in OPERATORS:
+ if current_node.left is None:
+ print(f"Placed number {node} on the left of {current_node}")
+ current_node.left = node
+ node.parent = current_node
+ elif current_node.right is None:
+ print(f"Placed number {node} on the right of {current_node}")
+ current_node.right = node
+ node.parent = current_node
+ current_node = node
+ else:
+ raise ValueError("Not a valid expression")
+ elif token.type in [PLUS, MINUS]:
+ node = Node(token.type, None)
+ if current_node is None:
+ raise ValueError("Not a valid expression")
+ elif current_node.type == NUMBER:
+ while current_node.parent is not None:
+ current_node = current_node.parent
+ print(f"Operator {node} is new root with {current_node} on the left")
+ current_node.parent = node
+ node.left = current_node
+ current_node = node
+ elif current_node.type == MULTIPLY:
+ print(f"Placed {node} over {node.left}")
+ current_node.parent = node
+ node.left = current_node
+ current_node = node
+ else:
+ raise ValueError("Not a valid expression")
+ elif token.type == MULTIPLY:
+ node = Node(MULTIPLY, None)
+ if current_node is None:
+ raise ValueError("Not a valid expression")
+ elif current_node.type in [NUMBER, MULTIPLY]:
+ print(f"Replaced with {node} node {current_node}")
+ node.left = current_node
+ node.parent = current_node.parent
+ if node.parent is not None:
+ node.parent.right = node
+ current_node.parent = node
+ current_node = node
+ elif current_node.type in [PLUS, MINUS]:
+ if current_node.parent is not None:
+ raise ValueError("oops...")
+ current_node.parent = node
+ node.left = current_node
+ current_node = node
+ else:
+ raise ValueError("Not a valid expression")
+ i += 1
+
+ while current_node.parent is not None:
+ current_node = current_node.parent
+ return current_node
+
+
+def expr_eval(node):
+ if node.type == NUMBER:
+ return node.value
+ elif node.type == MULTIPLY:
+ return expr_eval(node.left) * expr_eval(node.right)
+ elif node.type == PLUS:
+ return expr_eval(node.left) + expr_eval(node.right)
+ elif node.type == MINUS:
+ return expr_eval(node.left) - expr_eval(node.right)
+
+def main(*argv):
+ data = argv[0]
+ tokens = tokenize(data)
+ node = build_tree(tokens)
+ # print(tokens)
+ # print(node)
+ # print(node.left)
+ # print(node.right)
+ print(expr_eval(node))
+
+
+if __name__ == "__main__":
+ main(*sys.argv[1:])