diff options
| author | Andrew <saintruler@gmail.com> | 2021-07-12 00:46:35 +0400 |
|---|---|---|
| committer | Andrew <saintruler@gmail.com> | 2021-07-12 00:46:35 +0400 |
| commit | febb2ff27f6da9aed93220b80c43b37f51bbea3b (patch) | |
| tree | 07dbcdb196c88c9afa77fe553c0cab2f5cb51d6f | |
| parent | f6e4b7fe02f13b4e4228a15ddd19925fd2e93022 (diff) | |
Generalized parser for unary operators
| -rw-r--r-- | tokenizer.py | 139 |
1 files changed, 77 insertions, 62 deletions
diff --git a/tokenizer.py b/tokenizer.py index 2a9a714..d3fec34 100644 --- a/tokenizer.py +++ b/tokenizer.py @@ -1,25 +1,46 @@ import sys -STATE_ANY, STATE_OPERATOR, STATE_NUMBER, *_ = range(100) +STATE_OPERATOR, STATE_NAME, *_ = range(100) ( - LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, - MULTIPLY, PLUS, MINUS, POW, - *_ + LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, *_ ) = range(100) -OPERATORS = [MULTIPLY, PLUS, MINUS, POW] + +UNARY = "unary" PRECEDENCE = { - PLUS: 10, - MINUS: 10, - MULTIPLY: 20, - POW: 30, + "+": 10, + "-": 10, + "*": 20, + "^": 30, + UNARY: 40, +} + +unary_operators = { + "-": lambda a: -a, + "+": lambda a: a +} + +operators = { + "+": lambda a, b: a + b, + "-": lambda a, b: a - b, + "*": lambda a, b: a * b, + "^": lambda a, b: a ** b, } +OPERATOR_SYMBOLS = "*+-/%&~^|#$.:<=>@" +NUMBER_SYMBOLS = "0123456789" +NAME_SYMBOLS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?" + + +class ParserError(Exception): + pass + class Node: - def __init__(self, node_type, value): + def __init__(self, node_type, value=None, subtype=None): self.type = node_type self.value = value + self.subtype = subtype self.parent = None self.right = None self.left = None @@ -34,21 +55,16 @@ class Node: return "Node(RIGHT_PARENTHESIS)" if self.type == NUMBER: return f"Node({self.value})" - if self.type == MULTIPLY: - return "Node(MULTIPLY)" - if self.type == PLUS: - return "Node(PLUS)" - if self.type == MINUS: - return "Node(MINUS)" - if self.type == POW: - return "Node(POW)" + if self.type == OPERATOR: + return f"Node({self.subtype})" __str__ = __repr__ class Token: - def __init__(self, node_type, value): + def __init__(self, node_type, value=None, subtype=None): self.type = node_type self.value = value + self.subtype = subtype def __eq__(self, other): return self.type == other.type and self.value == other.value @@ -60,17 +76,21 @@ class Token: return "Token(RIGHT_PARENTHESIS)" if self.type == NUMBER: return f"Token({self.value})" - if self.type == MULTIPLY: - return "Token(MULTIPLY)" - if self.type == PLUS: - return "Token(PLUS)" - if self.type == MINUS: - return "Token(MINUS)" - if self.type == POW: - return "Token(POW)" + if self.type == OPERATOR: + return f"Token({self.subtype})" __str__ = __repr__ +def parse_operator(line, start): + operator = "" + i = start + while i < len(line) and line[i] in OPERATOR_SYMBOLS: + operator += line[i] + i += 1 + + return operator, i - 1 + + def parse_number(line, start): number = "" if line[start] in "+-": @@ -78,15 +98,15 @@ def parse_number(line, start): start += 1 i = start - while i < len(line) and line[i] in "0123456789": + while i < len(line) and line[i] in NUMBER_SYMBOLS: number += line[i] i += 1 - return int(number), i - 1 + return float(number), i - 1 def tokenize(line): - state = STATE_NUMBER + state = STATE_NAME tokens = [] i = 0 @@ -94,28 +114,19 @@ def tokenize(line): char = line[i] if char == "(": tokens.append(Token(LEFT_PARENTHESIS, None)) - state = STATE_NUMBER + state = STATE_NAME elif char == ")": tokens.append(Token(RIGHT_PARENTHESIS, None)) state = STATE_OPERATOR - elif char == "+" and state == STATE_OPERATOR: - tokens.append(Token(PLUS, None)) - state = STATE_NUMBER - elif char == "-" and state == STATE_OPERATOR: - tokens.append(Token(MINUS, None)) - state = STATE_NUMBER - elif char == "*": - if state == STATE_OPERATOR: - tokens.append(Token(MULTIPLY, None)) - state = STATE_NUMBER - else: - raise ValueError("Line is not a valid expression") - elif char == "^": + elif char in OPERATOR_SYMBOLS: if state == STATE_OPERATOR: - tokens.append(Token(POW, None)) - state = STATE_NUMBER - else: - raise ValueError("Line is not a valid expression") + val, i = parse_operator(line, i) + tokens.append(Token(OPERATOR, subtype=val)) + state = STATE_NAME + elif state == STATE_NAME: + val, i = parse_operator(line, i) + tokens.append(Token(OPERATOR, subtype=UNARY, value=val)) + state = STATE_NAME elif char in "-+0123456789": val, i = parse_number(line, i) tokens.append(Token(NUMBER, val)) @@ -148,17 +159,20 @@ def build_tree(tokens): current_node = None while i < len(tokens): token = tokens[i] - if token.type in [LEFT_PARENTHESIS, NUMBER]: + if token.type in [LEFT_PARENTHESIS, NUMBER] or token.subtype == UNARY: if token.type == LEFT_PARENTHESIS: node, i = parse_parenthesis(tokens, i) elif token.type == NUMBER: node = Node(NUMBER, token.value) + elif token.subtype == UNARY: + node = Node(OPERATOR, subtype=UNARY) + node.left = Node(OPERATOR, subtype=token.value) if current_node is None: current_node = node elif current_node.type == NUMBER: raise ValueError("Not a valid expression") - elif current_node.type in OPERATORS: + elif current_node.type == OPERATOR: if current_node.left is None: current_node.left = node node.parent = current_node @@ -169,13 +183,13 @@ def build_tree(tokens): else: raise ValueError("Not a valid expression") - elif token.type in OPERATORS: - node = Node(token.type, None) + elif token.type == OPERATOR: + node = Node(token.type, subtype=token.subtype, value=token.value) if current_node is None: raise ValueError("Not a valid expression") - elif current_node.type in [NUMBER] + OPERATORS: + elif current_node.type in [NUMBER, OPERATOR]: while current_node.parent is not None: - if PRECEDENCE[current_node.parent.type] < PRECEDENCE[node.type]: + if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]: node.parent = current_node.parent if node.parent is not None: node.parent.right = node @@ -196,21 +210,22 @@ def build_tree(tokens): def expr_eval(node): if node.type == NUMBER: return node.value - elif node.type == MULTIPLY: - return expr_eval(node.left) * expr_eval(node.right) - elif node.type == PLUS: - return expr_eval(node.left) + expr_eval(node.right) - elif node.type == MINUS: - return expr_eval(node.left) - expr_eval(node.right) - elif node.type == POW: - return expr_eval(node.left) ** expr_eval(node.right) + elif node.type == OPERATOR: + if node.subtype == UNARY: + return unary_operators.get(node.left.subtype)(expr_eval(node.right)) + else: + return operators.get(node.subtype)(expr_eval(node.left), expr_eval(node.right)) def main(*argv): data = argv[0] + # data = "2 + 3 * -4" + # data = "(3 + 3 * 2) * -4" tokens = tokenize(data) + # print(tokens) node1 = build_tree(tokens) print(expr_eval(node1)) + print("hooy") if __name__ == "__main__": |