diff options
| author | Andrew <saintruler@gmail.com> | 2021-07-12 11:12:14 +0400 |
|---|---|---|
| committer | Andrew <saintruler@gmail.com> | 2021-07-12 11:12:14 +0400 |
| commit | a175d139e74f52a83adb647925c3842f8ab026fb (patch) | |
| tree | ff859d00b59950c76d0271537d6838afb4ff8a89 /tokenizer.py | |
| parent | abb8ad61e3605a5e17e05b2f300d2b7277bd424a (diff) | |
Divided code in modules.
Diffstat (limited to 'tokenizer.py')
| -rw-r--r-- | tokenizer.py | 222 |
1 files changed, 2 insertions, 220 deletions
diff --git a/tokenizer.py b/tokenizer.py index c5bac82..cad2752 100644 --- a/tokenizer.py +++ b/tokenizer.py @@ -1,91 +1,10 @@ -import sys +from consts import * -STATE_OPERATOR, STATE_NAME, *_ = range(100) -( - LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, SEMICOLON, COMMA, SYMBOL, *_ -) = range(100) - -UNARY = "unary" -FUNCALL = "funcall" -PRECEDENCE = { - "+": 10, - "-": 10, - "*": 20, - "^": 30, - UNARY: 40, -} - -OPERATOR_CHARS = "*+-/%&~^|#$.:<=>@" -NUMBER_CHARS = "0123456789" -SYMBOL_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?" - -# RUNTIME - -unary_operators = { - "-": lambda a: -a, - "+": lambda a: a -} - -operators = { - "+": lambda a, b: a + b, - "-": lambda a, b: a - b, - "*": lambda a, b: a * b, - "^": lambda a, b: a ** b, -} - -runtime = { - "x": 10 -} - - -def factorial(n): - if n <= 1: - return 1 - return n * factorial(n - 1) - - -runtime_functions = { - "factorial": factorial -} - -# END OF RUNTIME - - -class ParserError(Exception): +class TokenizerError(Exception): pass -class Node: - def __init__(self, node_type, value=None, subtype=None): - self.type = node_type - self.value = value - self.subtype = subtype - self.parent = None - self.right = None - self.left = None - - def __eq__(self, other): - return self.type == other.type and self.value == other.value - - def __repr__(self): - if self.type == LEFT_PARENTHESIS: - return "Node(LEFT_PARENTHESIS)" - if self.type == RIGHT_PARENTHESIS: - return "Node(RIGHT_PARENTHESIS)" - if self.type == NUMBER: - return f"Node({self.value})" - if self.type == OPERATOR: - return f"Node({self.subtype})" - if self.type == COMMA: - return f"Node(COMMA)" - if self.type == SYMBOL: - return f"Node(SYMBOL {self.value})" - if self.type == FUNCALL: - return f"Node(FUNCALL {self.value})" - __str__ = __repr__ - - class Token: def __init__(self, node_type, value=None, subtype=None): self.type = node_type @@ -173,140 +92,3 @@ def tokenize(line): return tokens -def parse_parenthesis(tokens, start): - end = start - depth = 0 - while True: - token = tokens[end] - if token.type == RIGHT_PARENTHESIS: - depth -= 1 - if token.type == LEFT_PARENTHESIS: - depth += 1 - if depth == 0 and token.type == RIGHT_PARENTHESIS: - break - end += 1 - node = build_tree(tokens[start + 1: end]) - return node, end - - -def parse_args(tokens, start): - end = start - depth = 0 - while True: - token = tokens[end] - if token.type == RIGHT_PARENTHESIS: - depth -= 1 - if token.type == LEFT_PARENTHESIS: - depth += 1 - if depth == 0 and token.type == RIGHT_PARENTHESIS: - break - end += 1 - - args = [] - arg = [] - for token in tokens[start + 1: end]: - if token.type != COMMA: - arg.append(token) - else: - args.append(arg.copy()) - arg.clear() - if len(arg) != 0: - args.append(arg) - - return list(map(build_tree, args)) - - -def build_tree(tokens): - state = STATE_NAME - i = 0 - current_node = None - while i < len(tokens): - token = tokens[i] - if state == STATE_OPERATOR and token.type == LEFT_PARENTHESIS: - args = parse_args(tokens, i) - current_node.type = FUNCALL - current_node.value = (current_node.value, args) - elif token.type in [LEFT_PARENTHESIS, NUMBER, SYMBOL] or token.subtype == UNARY: - if token.type == LEFT_PARENTHESIS: - node, i = parse_parenthesis(tokens, i) - state = STATE_NAME - elif token.type == NUMBER: - node = Node(NUMBER, value=token.value) - state = STATE_OPERATOR - elif token.type == SYMBOL: - node = Node(SYMBOL, value=token.value) - state = STATE_OPERATOR - elif token.subtype == UNARY: - node = Node(OPERATOR, subtype=UNARY) - node.left = Node(OPERATOR, subtype=token.value) - state = STATE_NAME - - if current_node is None: - current_node = node - elif current_node.type == NUMBER: - raise ValueError("Not a valid expression") - elif current_node.type == OPERATOR: - if current_node.left is None: - current_node.left = node - node.parent = current_node - elif current_node.right is None: - current_node.right = node - node.parent = current_node - current_node = node - else: - raise ValueError("Not a valid expression") - elif token.type == OPERATOR: - node = Node(token.type, subtype=token.subtype, value=token.value) - if current_node is None: - raise ValueError("Not a valid expression") - elif current_node.type in [NUMBER, OPERATOR, FUNCALL]: - while current_node.parent is not None: - if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]: - node.parent = current_node.parent - if node.parent is not None: - node.parent.right = node - break - current_node = current_node.parent - current_node.parent = node - node.left = current_node - current_node = node - else: - print(current_node, token) - raise ValueError("Not a valid expression") - state = STATE_NAME - i += 1 - - while current_node.parent is not None: - current_node = current_node.parent - return current_node - - -def expr_eval(node): - if node.type == NUMBER: - return node.value - elif node.type == SYMBOL: - return runtime.get(node.value) - elif node.type == FUNCALL: - fun_name = node.value[0] - args = list(map(expr_eval, node.value[1])) - return runtime_functions.get(fun_name)(*args) - elif node.type == OPERATOR: - if node.subtype == UNARY: - return unary_operators.get(node.left.subtype)(expr_eval(node.right)) - else: - return operators.get(node.subtype)(expr_eval(node.left), expr_eval(node.right)) - - -def main(*argv): - data = argv[0] - # data = "2 + 3 * -4" - # data = "(3 + 3 * 2) * -4" - # data = "(2 + factorial(4, )) * 9" - tokens = tokenize(data) - # print(tokens) - node1 = build_tree(tokens) - print(expr_eval(node1)) - - -if __name__ == "__main__": - main(*sys.argv[1:]) |