diff options
| author | Andrew <saintruler@gmail.com> | 2021-07-12 11:12:14 +0400 |
|---|---|---|
| committer | Andrew <saintruler@gmail.com> | 2021-07-12 11:12:14 +0400 |
| commit | a175d139e74f52a83adb647925c3842f8ab026fb (patch) | |
| tree | ff859d00b59950c76d0271537d6838afb4ff8a89 | |
| parent | abb8ad61e3605a5e17e05b2f300d2b7277bd424a (diff) | |
Divided code in modules.
| -rw-r--r-- | consts.py | 18 | ||||
| -rw-r--r-- | main.py | 70 | ||||
| -rw-r--r-- | parser.py | 139 | ||||
| -rw-r--r-- | tokenizer.py | 222 |
4 files changed, 229 insertions, 220 deletions
diff --git a/consts.py b/consts.py new file mode 100644 index 0000000..5bf3fdd --- /dev/null +++ b/consts.py @@ -0,0 +1,18 @@ +STATE_OPERATOR, STATE_NAME, *_ = range(100) +( + LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, SEMICOLON, COMMA, SYMBOL, *_ +) = range(100) + +UNARY = "unary" +FUNCALL = "funcall" +PRECEDENCE = { + "+": 10, + "-": 10, + "*": 20, + "^": 30, + UNARY: 40, +} + +OPERATOR_CHARS = "*+-/%&~^|#$.:<=>@" +NUMBER_CHARS = "0123456789" +SYMBOL_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?" @@ -0,0 +1,70 @@ +import sys + +from consts import * +from tokenizer import tokenize +from parser import build_tree + + +# RUNTIME + +unary_operators = { + "-": lambda a: -a, + "+": lambda a: a +} + +operators = { + "+": lambda a, b: a + b, + "-": lambda a, b: a - b, + "*": lambda a, b: a * b, + "^": lambda a, b: a ** b, +} + +runtime = { + "x": 10 +} + + +def factorial(n): + if n <= 1: + return 1 + return n * factorial(n - 1) + + +runtime_functions = { + "factorial": factorial +} + +# END OF RUNTIME + + +def expr_eval(node): + if node.type == NUMBER: + return node.value + elif node.type == SYMBOL: + return runtime.get(node.value) + elif node.type == FUNCALL: + fun_name = node.value[0] + args = list(map(expr_eval, node.value[1])) + return runtime_functions.get(fun_name)(*args) + elif node.type == OPERATOR: + if node.subtype == UNARY: + return unary_operators.get(node.left.subtype)(expr_eval(node.right)) + else: + return operators.get(node.subtype)(expr_eval(node.left), expr_eval(node.right)) + + +def main(*argv): + if len(argv) != 0: + data = argv[0] + else: + data = "2 + 3 * 4" + # data = "(3 + 3 * 2) * -4" + # data = "(2 + factorial(4, )) * 9" + + tokens = tokenize(data) + node1 = build_tree(tokens) + print(expr_eval(node1)) + + +if __name__ == "__main__": + main(*sys.argv[1:]) diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..1cad137 --- /dev/null +++ b/parser.py @@ -0,0 +1,139 @@ +from consts import * + + +class Node: + def __init__(self, node_type, value=None, subtype=None): + self.type = node_type + self.value = value + self.subtype = subtype + self.parent = None + self.right = None + self.left = None + + def __eq__(self, other): + return self.type == other.type and self.value == other.value + + def __repr__(self): + if self.type == LEFT_PARENTHESIS: + return "Node(LEFT_PARENTHESIS)" + if self.type == RIGHT_PARENTHESIS: + return "Node(RIGHT_PARENTHESIS)" + if self.type == NUMBER: + return f"Node({self.value})" + if self.type == OPERATOR: + return f"Node({self.subtype})" + if self.type == COMMA: + return f"Node(COMMA)" + if self.type == SYMBOL: + return f"Node(SYMBOL {self.value})" + if self.type == FUNCALL: + return f"Node(FUNCALL {self.value})" + __str__ = __repr__ + + +def parse_parenthesis(tokens, start): + end = start + depth = 0 + while True: + token = tokens[end] + if token.type == RIGHT_PARENTHESIS: + depth -= 1 + if token.type == LEFT_PARENTHESIS: + depth += 1 + if depth == 0 and token.type == RIGHT_PARENTHESIS: + break + end += 1 + node = build_tree(tokens[start + 1: end]) + return node, end + + +def parse_args(tokens, start): + end = start + depth = 0 + while True: + token = tokens[end] + if token.type == RIGHT_PARENTHESIS: + depth -= 1 + if token.type == LEFT_PARENTHESIS: + depth += 1 + if depth == 0 and token.type == RIGHT_PARENTHESIS: + break + end += 1 + + args = [] + arg = [] + for token in tokens[start + 1: end]: + if token.type != COMMA: + arg.append(token) + else: + args.append(arg.copy()) + arg.clear() + if len(arg) != 0: + args.append(arg) + + return list(map(build_tree, args)) + + +def build_tree(tokens): + state = STATE_NAME + i = 0 + current_node = None + while i < len(tokens): + token = tokens[i] + if state == STATE_OPERATOR and token.type == LEFT_PARENTHESIS: + args = parse_args(tokens, i) + current_node.type = FUNCALL + current_node.value = (current_node.value, args) + elif token.type in [LEFT_PARENTHESIS, NUMBER, SYMBOL] or token.subtype == UNARY: + if token.type == LEFT_PARENTHESIS: + node, i = parse_parenthesis(tokens, i) + state = STATE_NAME + elif token.type == NUMBER: + node = Node(NUMBER, value=token.value) + state = STATE_OPERATOR + elif token.type == SYMBOL: + node = Node(SYMBOL, value=token.value) + state = STATE_OPERATOR + elif token.subtype == UNARY: + node = Node(OPERATOR, subtype=UNARY) + node.left = Node(OPERATOR, subtype=token.value) + state = STATE_NAME + + if current_node is None: + current_node = node + elif current_node.type == NUMBER: + raise ValueError("Not a valid expression") + elif current_node.type == OPERATOR: + if current_node.left is None: + current_node.left = node + node.parent = current_node + elif current_node.right is None: + current_node.right = node + node.parent = current_node + current_node = node + else: + raise ValueError("Not a valid expression") + elif token.type == OPERATOR: + node = Node(token.type, subtype=token.subtype, value=token.value) + if current_node is None: + raise ValueError("Not a valid expression") + elif current_node.type in [NUMBER, OPERATOR, FUNCALL]: + while current_node.parent is not None: + if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]: + node.parent = current_node.parent + if node.parent is not None: + node.parent.right = node + break + current_node = current_node.parent + current_node.parent = node + node.left = current_node + current_node = node + else: + print(current_node, token) + raise ValueError("Not a valid expression") + state = STATE_NAME + i += 1 + + while current_node.parent is not None: + current_node = current_node.parent + return current_node diff --git a/tokenizer.py b/tokenizer.py index c5bac82..cad2752 100644 --- a/tokenizer.py +++ b/tokenizer.py @@ -1,91 +1,10 @@ -import sys +from consts import * -STATE_OPERATOR, STATE_NAME, *_ = range(100) -( - LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, SEMICOLON, COMMA, SYMBOL, *_ -) = range(100) - -UNARY = "unary" -FUNCALL = "funcall" -PRECEDENCE = { - "+": 10, - "-": 10, - "*": 20, - "^": 30, - UNARY: 40, -} - -OPERATOR_CHARS = "*+-/%&~^|#$.:<=>@" -NUMBER_CHARS = "0123456789" -SYMBOL_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?" - -# RUNTIME - -unary_operators = { - "-": lambda a: -a, - "+": lambda a: a -} - -operators = { - "+": lambda a, b: a + b, - "-": lambda a, b: a - b, - "*": lambda a, b: a * b, - "^": lambda a, b: a ** b, -} - -runtime = { - "x": 10 -} - - -def factorial(n): - if n <= 1: - return 1 - return n * factorial(n - 1) - - -runtime_functions = { - "factorial": factorial -} - -# END OF RUNTIME - - -class ParserError(Exception): +class TokenizerError(Exception): pass -class Node: - def __init__(self, node_type, value=None, subtype=None): - self.type = node_type - self.value = value - self.subtype = subtype - self.parent = None - self.right = None - self.left = None - - def __eq__(self, other): - return self.type == other.type and self.value == other.value - - def __repr__(self): - if self.type == LEFT_PARENTHESIS: - return "Node(LEFT_PARENTHESIS)" - if self.type == RIGHT_PARENTHESIS: - return "Node(RIGHT_PARENTHESIS)" - if self.type == NUMBER: - return f"Node({self.value})" - if self.type == OPERATOR: - return f"Node({self.subtype})" - if self.type == COMMA: - return f"Node(COMMA)" - if self.type == SYMBOL: - return f"Node(SYMBOL {self.value})" - if self.type == FUNCALL: - return f"Node(FUNCALL {self.value})" - __str__ = __repr__ - - class Token: def __init__(self, node_type, value=None, subtype=None): self.type = node_type @@ -173,140 +92,3 @@ def tokenize(line): return tokens -def parse_parenthesis(tokens, start): - end = start - depth = 0 - while True: - token = tokens[end] - if token.type == RIGHT_PARENTHESIS: - depth -= 1 - if token.type == LEFT_PARENTHESIS: - depth += 1 - if depth == 0 and token.type == RIGHT_PARENTHESIS: - break - end += 1 - node = build_tree(tokens[start + 1: end]) - return node, end - - -def parse_args(tokens, start): - end = start - depth = 0 - while True: - token = tokens[end] - if token.type == RIGHT_PARENTHESIS: - depth -= 1 - if token.type == LEFT_PARENTHESIS: - depth += 1 - if depth == 0 and token.type == RIGHT_PARENTHESIS: - break - end += 1 - - args = [] - arg = [] - for token in tokens[start + 1: end]: - if token.type != COMMA: - arg.append(token) - else: - args.append(arg.copy()) - arg.clear() - if len(arg) != 0: - args.append(arg) - - return list(map(build_tree, args)) - - -def build_tree(tokens): - state = STATE_NAME - i = 0 - current_node = None - while i < len(tokens): - token = tokens[i] - if state == STATE_OPERATOR and token.type == LEFT_PARENTHESIS: - args = parse_args(tokens, i) - current_node.type = FUNCALL - current_node.value = (current_node.value, args) - elif token.type in [LEFT_PARENTHESIS, NUMBER, SYMBOL] or token.subtype == UNARY: - if token.type == LEFT_PARENTHESIS: - node, i = parse_parenthesis(tokens, i) - state = STATE_NAME - elif token.type == NUMBER: - node = Node(NUMBER, value=token.value) - state = STATE_OPERATOR - elif token.type == SYMBOL: - node = Node(SYMBOL, value=token.value) - state = STATE_OPERATOR - elif token.subtype == UNARY: - node = Node(OPERATOR, subtype=UNARY) - node.left = Node(OPERATOR, subtype=token.value) - state = STATE_NAME - - if current_node is None: - current_node = node - elif current_node.type == NUMBER: - raise ValueError("Not a valid expression") - elif current_node.type == OPERATOR: - if current_node.left is None: - current_node.left = node - node.parent = current_node - elif current_node.right is None: - current_node.right = node - node.parent = current_node - current_node = node - else: - raise ValueError("Not a valid expression") - elif token.type == OPERATOR: - node = Node(token.type, subtype=token.subtype, value=token.value) - if current_node is None: - raise ValueError("Not a valid expression") - elif current_node.type in [NUMBER, OPERATOR, FUNCALL]: - while current_node.parent is not None: - if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]: - node.parent = current_node.parent - if node.parent is not None: - node.parent.right = node - break - current_node = current_node.parent - current_node.parent = node - node.left = current_node - current_node = node - else: - print(current_node, token) - raise ValueError("Not a valid expression") - state = STATE_NAME - i += 1 - - while current_node.parent is not None: - current_node = current_node.parent - return current_node - - -def expr_eval(node): - if node.type == NUMBER: - return node.value - elif node.type == SYMBOL: - return runtime.get(node.value) - elif node.type == FUNCALL: - fun_name = node.value[0] - args = list(map(expr_eval, node.value[1])) - return runtime_functions.get(fun_name)(*args) - elif node.type == OPERATOR: - if node.subtype == UNARY: - return unary_operators.get(node.left.subtype)(expr_eval(node.right)) - else: - return operators.get(node.subtype)(expr_eval(node.left), expr_eval(node.right)) - - -def main(*argv): - data = argv[0] - # data = "2 + 3 * -4" - # data = "(3 + 3 * 2) * -4" - # data = "(2 + factorial(4, )) * 9" - tokens = tokenize(data) - # print(tokens) - node1 = build_tree(tokens) - print(expr_eval(node1)) - - -if __name__ == "__main__": - main(*sys.argv[1:]) |