summaryrefslogtreecommitdiff
path: root/tokenizer.py
diff options
context:
space:
mode:
Diffstat (limited to 'tokenizer.py')
-rw-r--r--tokenizer.py222
1 files changed, 2 insertions, 220 deletions
diff --git a/tokenizer.py b/tokenizer.py
index c5bac82..cad2752 100644
--- a/tokenizer.py
+++ b/tokenizer.py
@@ -1,91 +1,10 @@
-import sys
+from consts import *
-STATE_OPERATOR, STATE_NAME, *_ = range(100)
-(
- LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, SEMICOLON, COMMA, SYMBOL, *_
-) = range(100)
-
-UNARY = "unary"
-FUNCALL = "funcall"
-PRECEDENCE = {
- "+": 10,
- "-": 10,
- "*": 20,
- "^": 30,
- UNARY: 40,
-}
-
-OPERATOR_CHARS = "*+-/%&~^|#$.:<=>@"
-NUMBER_CHARS = "0123456789"
-SYMBOL_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?"
-
-# RUNTIME
-
-unary_operators = {
- "-": lambda a: -a,
- "+": lambda a: a
-}
-
-operators = {
- "+": lambda a, b: a + b,
- "-": lambda a, b: a - b,
- "*": lambda a, b: a * b,
- "^": lambda a, b: a ** b,
-}
-
-runtime = {
- "x": 10
-}
-
-
-def factorial(n):
- if n <= 1:
- return 1
- return n * factorial(n - 1)
-
-
-runtime_functions = {
- "factorial": factorial
-}
-
-# END OF RUNTIME
-
-
-class ParserError(Exception):
+class TokenizerError(Exception):
pass
-class Node:
- def __init__(self, node_type, value=None, subtype=None):
- self.type = node_type
- self.value = value
- self.subtype = subtype
- self.parent = None
- self.right = None
- self.left = None
-
- def __eq__(self, other):
- return self.type == other.type and self.value == other.value
-
- def __repr__(self):
- if self.type == LEFT_PARENTHESIS:
- return "Node(LEFT_PARENTHESIS)"
- if self.type == RIGHT_PARENTHESIS:
- return "Node(RIGHT_PARENTHESIS)"
- if self.type == NUMBER:
- return f"Node({self.value})"
- if self.type == OPERATOR:
- return f"Node({self.subtype})"
- if self.type == COMMA:
- return f"Node(COMMA)"
- if self.type == SYMBOL:
- return f"Node(SYMBOL {self.value})"
- if self.type == FUNCALL:
- return f"Node(FUNCALL {self.value})"
- __str__ = __repr__
-
-
class Token:
def __init__(self, node_type, value=None, subtype=None):
self.type = node_type
@@ -173,140 +92,3 @@ def tokenize(line):
return tokens
-def parse_parenthesis(tokens, start):
- end = start
- depth = 0
- while True:
- token = tokens[end]
- if token.type == RIGHT_PARENTHESIS:
- depth -= 1
- if token.type == LEFT_PARENTHESIS:
- depth += 1
- if depth == 0 and token.type == RIGHT_PARENTHESIS:
- break
- end += 1
- node = build_tree(tokens[start + 1: end])
- return node, end
-
-
-def parse_args(tokens, start):
- end = start
- depth = 0
- while True:
- token = tokens[end]
- if token.type == RIGHT_PARENTHESIS:
- depth -= 1
- if token.type == LEFT_PARENTHESIS:
- depth += 1
- if depth == 0 and token.type == RIGHT_PARENTHESIS:
- break
- end += 1
-
- args = []
- arg = []
- for token in tokens[start + 1: end]:
- if token.type != COMMA:
- arg.append(token)
- else:
- args.append(arg.copy())
- arg.clear()
- if len(arg) != 0:
- args.append(arg)
-
- return list(map(build_tree, args))
-
-
-def build_tree(tokens):
- state = STATE_NAME
- i = 0
- current_node = None
- while i < len(tokens):
- token = tokens[i]
- if state == STATE_OPERATOR and token.type == LEFT_PARENTHESIS:
- args = parse_args(tokens, i)
- current_node.type = FUNCALL
- current_node.value = (current_node.value, args)
- elif token.type in [LEFT_PARENTHESIS, NUMBER, SYMBOL] or token.subtype == UNARY:
- if token.type == LEFT_PARENTHESIS:
- node, i = parse_parenthesis(tokens, i)
- state = STATE_NAME
- elif token.type == NUMBER:
- node = Node(NUMBER, value=token.value)
- state = STATE_OPERATOR
- elif token.type == SYMBOL:
- node = Node(SYMBOL, value=token.value)
- state = STATE_OPERATOR
- elif token.subtype == UNARY:
- node = Node(OPERATOR, subtype=UNARY)
- node.left = Node(OPERATOR, subtype=token.value)
- state = STATE_NAME
-
- if current_node is None:
- current_node = node
- elif current_node.type == NUMBER:
- raise ValueError("Not a valid expression")
- elif current_node.type == OPERATOR:
- if current_node.left is None:
- current_node.left = node
- node.parent = current_node
- elif current_node.right is None:
- current_node.right = node
- node.parent = current_node
- current_node = node
- else:
- raise ValueError("Not a valid expression")
- elif token.type == OPERATOR:
- node = Node(token.type, subtype=token.subtype, value=token.value)
- if current_node is None:
- raise ValueError("Not a valid expression")
- elif current_node.type in [NUMBER, OPERATOR, FUNCALL]:
- while current_node.parent is not None:
- if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]:
- node.parent = current_node.parent
- if node.parent is not None:
- node.parent.right = node
- break
- current_node = current_node.parent
- current_node.parent = node
- node.left = current_node
- current_node = node
- else:
- print(current_node, token)
- raise ValueError("Not a valid expression")
- state = STATE_NAME
- i += 1
-
- while current_node.parent is not None:
- current_node = current_node.parent
- return current_node
-
-
-def expr_eval(node):
- if node.type == NUMBER:
- return node.value
- elif node.type == SYMBOL:
- return runtime.get(node.value)
- elif node.type == FUNCALL:
- fun_name = node.value[0]
- args = list(map(expr_eval, node.value[1]))
- return runtime_functions.get(fun_name)(*args)
- elif node.type == OPERATOR:
- if node.subtype == UNARY:
- return unary_operators.get(node.left.subtype)(expr_eval(node.right))
- else:
- return operators.get(node.subtype)(expr_eval(node.left), expr_eval(node.right))
-
-
-def main(*argv):
- data = argv[0]
- # data = "2 + 3 * -4"
- # data = "(3 + 3 * 2) * -4"
- # data = "(2 + factorial(4, )) * 9"
- tokens = tokenize(data)
- # print(tokens)
- node1 = build_tree(tokens)
- print(expr_eval(node1))
-
-
-if __name__ == "__main__":
- main(*sys.argv[1:])