summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew <saintruler@gmail.com>2021-07-12 11:12:14 +0400
committerAndrew <saintruler@gmail.com>2021-07-12 11:12:14 +0400
commita175d139e74f52a83adb647925c3842f8ab026fb (patch)
treeff859d00b59950c76d0271537d6838afb4ff8a89
parentabb8ad61e3605a5e17e05b2f300d2b7277bd424a (diff)
Divided code in modules.
-rw-r--r--consts.py18
-rw-r--r--main.py70
-rw-r--r--parser.py139
-rw-r--r--tokenizer.py222
4 files changed, 229 insertions, 220 deletions
diff --git a/consts.py b/consts.py
new file mode 100644
index 0000000..5bf3fdd
--- /dev/null
+++ b/consts.py
@@ -0,0 +1,18 @@
+STATE_OPERATOR, STATE_NAME, *_ = range(100)
+(
+ LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, SEMICOLON, COMMA, SYMBOL, *_
+) = range(100)
+
+UNARY = "unary"
+FUNCALL = "funcall"
+PRECEDENCE = {
+ "+": 10,
+ "-": 10,
+ "*": 20,
+ "^": 30,
+ UNARY: 40,
+}
+
+OPERATOR_CHARS = "*+-/%&~^|#$.:<=>@"
+NUMBER_CHARS = "0123456789"
+SYMBOL_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?"
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..a86d293
--- /dev/null
+++ b/main.py
@@ -0,0 +1,70 @@
+import sys
+
+from consts import *
+from tokenizer import tokenize
+from parser import build_tree
+
+
+# RUNTIME
+
+unary_operators = {
+ "-": lambda a: -a,
+ "+": lambda a: a
+}
+
+operators = {
+ "+": lambda a, b: a + b,
+ "-": lambda a, b: a - b,
+ "*": lambda a, b: a * b,
+ "^": lambda a, b: a ** b,
+}
+
+runtime = {
+ "x": 10
+}
+
+
+def factorial(n):
+ if n <= 1:
+ return 1
+ return n * factorial(n - 1)
+
+
+runtime_functions = {
+ "factorial": factorial
+}
+
+# END OF RUNTIME
+
+
+def expr_eval(node):
+ if node.type == NUMBER:
+ return node.value
+ elif node.type == SYMBOL:
+ return runtime.get(node.value)
+ elif node.type == FUNCALL:
+ fun_name = node.value[0]
+ args = list(map(expr_eval, node.value[1]))
+ return runtime_functions.get(fun_name)(*args)
+ elif node.type == OPERATOR:
+ if node.subtype == UNARY:
+ return unary_operators.get(node.left.subtype)(expr_eval(node.right))
+ else:
+ return operators.get(node.subtype)(expr_eval(node.left), expr_eval(node.right))
+
+
+def main(*argv):
+ if len(argv) != 0:
+ data = argv[0]
+ else:
+ data = "2 + 3 * 4"
+ # data = "(3 + 3 * 2) * -4"
+ # data = "(2 + factorial(4, )) * 9"
+
+ tokens = tokenize(data)
+ node1 = build_tree(tokens)
+ print(expr_eval(node1))
+
+
+if __name__ == "__main__":
+ main(*sys.argv[1:])
diff --git a/parser.py b/parser.py
new file mode 100644
index 0000000..1cad137
--- /dev/null
+++ b/parser.py
@@ -0,0 +1,139 @@
+from consts import *
+
+
+class Node:
+ def __init__(self, node_type, value=None, subtype=None):
+ self.type = node_type
+ self.value = value
+ self.subtype = subtype
+ self.parent = None
+ self.right = None
+ self.left = None
+
+ def __eq__(self, other):
+ return self.type == other.type and self.value == other.value
+
+ def __repr__(self):
+ if self.type == LEFT_PARENTHESIS:
+ return "Node(LEFT_PARENTHESIS)"
+ if self.type == RIGHT_PARENTHESIS:
+ return "Node(RIGHT_PARENTHESIS)"
+ if self.type == NUMBER:
+ return f"Node({self.value})"
+ if self.type == OPERATOR:
+ return f"Node({self.subtype})"
+ if self.type == COMMA:
+ return f"Node(COMMA)"
+ if self.type == SYMBOL:
+ return f"Node(SYMBOL {self.value})"
+ if self.type == FUNCALL:
+ return f"Node(FUNCALL {self.value})"
+ __str__ = __repr__
+
+
+def parse_parenthesis(tokens, start):
+ end = start
+ depth = 0
+ while True:
+ token = tokens[end]
+ if token.type == RIGHT_PARENTHESIS:
+ depth -= 1
+ if token.type == LEFT_PARENTHESIS:
+ depth += 1
+ if depth == 0 and token.type == RIGHT_PARENTHESIS:
+ break
+ end += 1
+ node = build_tree(tokens[start + 1: end])
+ return node, end
+
+
+def parse_args(tokens, start):
+ end = start
+ depth = 0
+ while True:
+ token = tokens[end]
+ if token.type == RIGHT_PARENTHESIS:
+ depth -= 1
+ if token.type == LEFT_PARENTHESIS:
+ depth += 1
+ if depth == 0 and token.type == RIGHT_PARENTHESIS:
+ break
+ end += 1
+
+ args = []
+ arg = []
+ for token in tokens[start + 1: end]:
+ if token.type != COMMA:
+ arg.append(token)
+ else:
+ args.append(arg.copy())
+ arg.clear()
+ if len(arg) != 0:
+ args.append(arg)
+
+ return list(map(build_tree, args))
+
+
+def build_tree(tokens):
+ state = STATE_NAME
+ i = 0
+ current_node = None
+ while i < len(tokens):
+ token = tokens[i]
+ if state == STATE_OPERATOR and token.type == LEFT_PARENTHESIS:
+ args = parse_args(tokens, i)
+ current_node.type = FUNCALL
+ current_node.value = (current_node.value, args)
+ elif token.type in [LEFT_PARENTHESIS, NUMBER, SYMBOL] or token.subtype == UNARY:
+ if token.type == LEFT_PARENTHESIS:
+ node, i = parse_parenthesis(tokens, i)
+ state = STATE_NAME
+ elif token.type == NUMBER:
+ node = Node(NUMBER, value=token.value)
+ state = STATE_OPERATOR
+ elif token.type == SYMBOL:
+ node = Node(SYMBOL, value=token.value)
+ state = STATE_OPERATOR
+ elif token.subtype == UNARY:
+ node = Node(OPERATOR, subtype=UNARY)
+ node.left = Node(OPERATOR, subtype=token.value)
+ state = STATE_NAME
+
+ if current_node is None:
+ current_node = node
+ elif current_node.type == NUMBER:
+ raise ValueError("Not a valid expression")
+ elif current_node.type == OPERATOR:
+ if current_node.left is None:
+ current_node.left = node
+ node.parent = current_node
+ elif current_node.right is None:
+ current_node.right = node
+ node.parent = current_node
+ current_node = node
+ else:
+ raise ValueError("Not a valid expression")
+ elif token.type == OPERATOR:
+ node = Node(token.type, subtype=token.subtype, value=token.value)
+ if current_node is None:
+ raise ValueError("Not a valid expression")
+ elif current_node.type in [NUMBER, OPERATOR, FUNCALL]:
+ while current_node.parent is not None:
+ if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]:
+ node.parent = current_node.parent
+ if node.parent is not None:
+ node.parent.right = node
+ break
+ current_node = current_node.parent
+ current_node.parent = node
+ node.left = current_node
+ current_node = node
+ else:
+ print(current_node, token)
+ raise ValueError("Not a valid expression")
+ state = STATE_NAME
+ i += 1
+
+ while current_node.parent is not None:
+ current_node = current_node.parent
+ return current_node
diff --git a/tokenizer.py b/tokenizer.py
index c5bac82..cad2752 100644
--- a/tokenizer.py
+++ b/tokenizer.py
@@ -1,91 +1,10 @@
-import sys
+from consts import *
-STATE_OPERATOR, STATE_NAME, *_ = range(100)
-(
- LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, SEMICOLON, COMMA, SYMBOL, *_
-) = range(100)
-
-UNARY = "unary"
-FUNCALL = "funcall"
-PRECEDENCE = {
- "+": 10,
- "-": 10,
- "*": 20,
- "^": 30,
- UNARY: 40,
-}
-
-OPERATOR_CHARS = "*+-/%&~^|#$.:<=>@"
-NUMBER_CHARS = "0123456789"
-SYMBOL_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?"
-
-# RUNTIME
-
-unary_operators = {
- "-": lambda a: -a,
- "+": lambda a: a
-}
-
-operators = {
- "+": lambda a, b: a + b,
- "-": lambda a, b: a - b,
- "*": lambda a, b: a * b,
- "^": lambda a, b: a ** b,
-}
-
-runtime = {
- "x": 10
-}
-
-
-def factorial(n):
- if n <= 1:
- return 1
- return n * factorial(n - 1)
-
-
-runtime_functions = {
- "factorial": factorial
-}
-
-# END OF RUNTIME
-
-
-class ParserError(Exception):
+class TokenizerError(Exception):
pass
-class Node:
- def __init__(self, node_type, value=None, subtype=None):
- self.type = node_type
- self.value = value
- self.subtype = subtype
- self.parent = None
- self.right = None
- self.left = None
-
- def __eq__(self, other):
- return self.type == other.type and self.value == other.value
-
- def __repr__(self):
- if self.type == LEFT_PARENTHESIS:
- return "Node(LEFT_PARENTHESIS)"
- if self.type == RIGHT_PARENTHESIS:
- return "Node(RIGHT_PARENTHESIS)"
- if self.type == NUMBER:
- return f"Node({self.value})"
- if self.type == OPERATOR:
- return f"Node({self.subtype})"
- if self.type == COMMA:
- return f"Node(COMMA)"
- if self.type == SYMBOL:
- return f"Node(SYMBOL {self.value})"
- if self.type == FUNCALL:
- return f"Node(FUNCALL {self.value})"
- __str__ = __repr__
-
-
class Token:
def __init__(self, node_type, value=None, subtype=None):
self.type = node_type
@@ -173,140 +92,3 @@ def tokenize(line):
return tokens
-def parse_parenthesis(tokens, start):
- end = start
- depth = 0
- while True:
- token = tokens[end]
- if token.type == RIGHT_PARENTHESIS:
- depth -= 1
- if token.type == LEFT_PARENTHESIS:
- depth += 1
- if depth == 0 and token.type == RIGHT_PARENTHESIS:
- break
- end += 1
- node = build_tree(tokens[start + 1: end])
- return node, end
-
-
-def parse_args(tokens, start):
- end = start
- depth = 0
- while True:
- token = tokens[end]
- if token.type == RIGHT_PARENTHESIS:
- depth -= 1
- if token.type == LEFT_PARENTHESIS:
- depth += 1
- if depth == 0 and token.type == RIGHT_PARENTHESIS:
- break
- end += 1
-
- args = []
- arg = []
- for token in tokens[start + 1: end]:
- if token.type != COMMA:
- arg.append(token)
- else:
- args.append(arg.copy())
- arg.clear()
- if len(arg) != 0:
- args.append(arg)
-
- return list(map(build_tree, args))
-
-
-def build_tree(tokens):
- state = STATE_NAME
- i = 0
- current_node = None
- while i < len(tokens):
- token = tokens[i]
- if state == STATE_OPERATOR and token.type == LEFT_PARENTHESIS:
- args = parse_args(tokens, i)
- current_node.type = FUNCALL
- current_node.value = (current_node.value, args)
- elif token.type in [LEFT_PARENTHESIS, NUMBER, SYMBOL] or token.subtype == UNARY:
- if token.type == LEFT_PARENTHESIS:
- node, i = parse_parenthesis(tokens, i)
- state = STATE_NAME
- elif token.type == NUMBER:
- node = Node(NUMBER, value=token.value)
- state = STATE_OPERATOR
- elif token.type == SYMBOL:
- node = Node(SYMBOL, value=token.value)
- state = STATE_OPERATOR
- elif token.subtype == UNARY:
- node = Node(OPERATOR, subtype=UNARY)
- node.left = Node(OPERATOR, subtype=token.value)
- state = STATE_NAME
-
- if current_node is None:
- current_node = node
- elif current_node.type == NUMBER:
- raise ValueError("Not a valid expression")
- elif current_node.type == OPERATOR:
- if current_node.left is None:
- current_node.left = node
- node.parent = current_node
- elif current_node.right is None:
- current_node.right = node
- node.parent = current_node
- current_node = node
- else:
- raise ValueError("Not a valid expression")
- elif token.type == OPERATOR:
- node = Node(token.type, subtype=token.subtype, value=token.value)
- if current_node is None:
- raise ValueError("Not a valid expression")
- elif current_node.type in [NUMBER, OPERATOR, FUNCALL]:
- while current_node.parent is not None:
- if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]:
- node.parent = current_node.parent
- if node.parent is not None:
- node.parent.right = node
- break
- current_node = current_node.parent
- current_node.parent = node
- node.left = current_node
- current_node = node
- else:
- print(current_node, token)
- raise ValueError("Not a valid expression")
- state = STATE_NAME
- i += 1
-
- while current_node.parent is not None:
- current_node = current_node.parent
- return current_node
-
-
-def expr_eval(node):
- if node.type == NUMBER:
- return node.value
- elif node.type == SYMBOL:
- return runtime.get(node.value)
- elif node.type == FUNCALL:
- fun_name = node.value[0]
- args = list(map(expr_eval, node.value[1]))
- return runtime_functions.get(fun_name)(*args)
- elif node.type == OPERATOR:
- if node.subtype == UNARY:
- return unary_operators.get(node.left.subtype)(expr_eval(node.right))
- else:
- return operators.get(node.subtype)(expr_eval(node.left), expr_eval(node.right))
-
-
-def main(*argv):
- data = argv[0]
- # data = "2 + 3 * -4"
- # data = "(3 + 3 * 2) * -4"
- # data = "(2 + factorial(4, )) * 9"
- tokens = tokenize(data)
- # print(tokens)
- node1 = build_tree(tokens)
- print(expr_eval(node1))
-
-
-if __name__ == "__main__":
- main(*sys.argv[1:])