summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew <saintruler@gmail.com>2021-07-12 00:46:35 +0400
committerAndrew <saintruler@gmail.com>2021-07-12 00:46:35 +0400
commitfebb2ff27f6da9aed93220b80c43b37f51bbea3b (patch)
tree07dbcdb196c88c9afa77fe553c0cab2f5cb51d6f
parentf6e4b7fe02f13b4e4228a15ddd19925fd2e93022 (diff)
Generalized parser for unary operators
-rw-r--r--tokenizer.py139
1 files changed, 77 insertions, 62 deletions
diff --git a/tokenizer.py b/tokenizer.py
index 2a9a714..d3fec34 100644
--- a/tokenizer.py
+++ b/tokenizer.py
@@ -1,25 +1,46 @@
import sys
-STATE_ANY, STATE_OPERATOR, STATE_NUMBER, *_ = range(100)
+STATE_OPERATOR, STATE_NAME, *_ = range(100)
(
- LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER,
- MULTIPLY, PLUS, MINUS, POW,
- *_
+ LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, *_
) = range(100)
-OPERATORS = [MULTIPLY, PLUS, MINUS, POW]
+
+UNARY = "unary"
PRECEDENCE = {
- PLUS: 10,
- MINUS: 10,
- MULTIPLY: 20,
- POW: 30,
+ "+": 10,
+ "-": 10,
+ "*": 20,
+ "^": 30,
+ UNARY: 40,
+}
+
+unary_operators = {
+ "-": lambda a: -a,
+ "+": lambda a: a
+}
+
+operators = {
+ "+": lambda a, b: a + b,
+ "-": lambda a, b: a - b,
+ "*": lambda a, b: a * b,
+ "^": lambda a, b: a ** b,
}
+OPERATOR_SYMBOLS = "*+-/%&~^|#$.:<=>@"
+NUMBER_SYMBOLS = "0123456789"
+NAME_SYMBOLS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?"
+
+
+class ParserError(Exception):
+ pass
+
class Node:
- def __init__(self, node_type, value):
+ def __init__(self, node_type, value=None, subtype=None):
self.type = node_type
self.value = value
+ self.subtype = subtype
self.parent = None
self.right = None
self.left = None
@@ -34,21 +55,16 @@ class Node:
return "Node(RIGHT_PARENTHESIS)"
if self.type == NUMBER:
return f"Node({self.value})"
- if self.type == MULTIPLY:
- return "Node(MULTIPLY)"
- if self.type == PLUS:
- return "Node(PLUS)"
- if self.type == MINUS:
- return "Node(MINUS)"
- if self.type == POW:
- return "Node(POW)"
+ if self.type == OPERATOR:
+ return f"Node({self.subtype})"
__str__ = __repr__
class Token:
- def __init__(self, node_type, value):
+ def __init__(self, node_type, value=None, subtype=None):
self.type = node_type
self.value = value
+ self.subtype = subtype
def __eq__(self, other):
return self.type == other.type and self.value == other.value
@@ -60,17 +76,21 @@ class Token:
return "Token(RIGHT_PARENTHESIS)"
if self.type == NUMBER:
return f"Token({self.value})"
- if self.type == MULTIPLY:
- return "Token(MULTIPLY)"
- if self.type == PLUS:
- return "Token(PLUS)"
- if self.type == MINUS:
- return "Token(MINUS)"
- if self.type == POW:
- return "Token(POW)"
+ if self.type == OPERATOR:
+ return f"Token({self.subtype})"
__str__ = __repr__
+def parse_operator(line, start):
+ operator = ""
+ i = start
+ while i < len(line) and line[i] in OPERATOR_SYMBOLS:
+ operator += line[i]
+ i += 1
+
+ return operator, i - 1
+
+
def parse_number(line, start):
number = ""
if line[start] in "+-":
@@ -78,15 +98,15 @@ def parse_number(line, start):
start += 1
i = start
- while i < len(line) and line[i] in "0123456789":
+ while i < len(line) and line[i] in NUMBER_SYMBOLS:
number += line[i]
i += 1
- return int(number), i - 1
+ return float(number), i - 1
def tokenize(line):
- state = STATE_NUMBER
+ state = STATE_NAME
tokens = []
i = 0
@@ -94,28 +114,19 @@ def tokenize(line):
char = line[i]
if char == "(":
tokens.append(Token(LEFT_PARENTHESIS, None))
- state = STATE_NUMBER
+ state = STATE_NAME
elif char == ")":
tokens.append(Token(RIGHT_PARENTHESIS, None))
state = STATE_OPERATOR
- elif char == "+" and state == STATE_OPERATOR:
- tokens.append(Token(PLUS, None))
- state = STATE_NUMBER
- elif char == "-" and state == STATE_OPERATOR:
- tokens.append(Token(MINUS, None))
- state = STATE_NUMBER
- elif char == "*":
- if state == STATE_OPERATOR:
- tokens.append(Token(MULTIPLY, None))
- state = STATE_NUMBER
- else:
- raise ValueError("Line is not a valid expression")
- elif char == "^":
+ elif char in OPERATOR_SYMBOLS:
if state == STATE_OPERATOR:
- tokens.append(Token(POW, None))
- state = STATE_NUMBER
- else:
- raise ValueError("Line is not a valid expression")
+ val, i = parse_operator(line, i)
+ tokens.append(Token(OPERATOR, subtype=val))
+ state = STATE_NAME
+ elif state == STATE_NAME:
+ val, i = parse_operator(line, i)
+ tokens.append(Token(OPERATOR, subtype=UNARY, value=val))
+ state = STATE_NAME
elif char in "-+0123456789":
val, i = parse_number(line, i)
tokens.append(Token(NUMBER, val))
@@ -148,17 +159,20 @@ def build_tree(tokens):
current_node = None
while i < len(tokens):
token = tokens[i]
- if token.type in [LEFT_PARENTHESIS, NUMBER]:
+ if token.type in [LEFT_PARENTHESIS, NUMBER] or token.subtype == UNARY:
if token.type == LEFT_PARENTHESIS:
node, i = parse_parenthesis(tokens, i)
elif token.type == NUMBER:
node = Node(NUMBER, token.value)
+ elif token.subtype == UNARY:
+ node = Node(OPERATOR, subtype=UNARY)
+ node.left = Node(OPERATOR, subtype=token.value)
if current_node is None:
current_node = node
elif current_node.type == NUMBER:
raise ValueError("Not a valid expression")
- elif current_node.type in OPERATORS:
+ elif current_node.type == OPERATOR:
if current_node.left is None:
current_node.left = node
node.parent = current_node
@@ -169,13 +183,13 @@ def build_tree(tokens):
else:
raise ValueError("Not a valid expression")
- elif token.type in OPERATORS:
- node = Node(token.type, None)
+ elif token.type == OPERATOR:
+ node = Node(token.type, subtype=token.subtype, value=token.value)
if current_node is None:
raise ValueError("Not a valid expression")
- elif current_node.type in [NUMBER] + OPERATORS:
+ elif current_node.type in [NUMBER, OPERATOR]:
while current_node.parent is not None:
- if PRECEDENCE[current_node.parent.type] < PRECEDENCE[node.type]:
+ if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]:
node.parent = current_node.parent
if node.parent is not None:
node.parent.right = node
@@ -196,21 +210,22 @@ def build_tree(tokens):
def expr_eval(node):
if node.type == NUMBER:
return node.value
- elif node.type == MULTIPLY:
- return expr_eval(node.left) * expr_eval(node.right)
- elif node.type == PLUS:
- return expr_eval(node.left) + expr_eval(node.right)
- elif node.type == MINUS:
- return expr_eval(node.left) - expr_eval(node.right)
- elif node.type == POW:
- return expr_eval(node.left) ** expr_eval(node.right)
+ elif node.type == OPERATOR:
+ if node.subtype == UNARY:
+ return unary_operators.get(node.left.subtype)(expr_eval(node.right))
+ else:
+ return operators.get(node.subtype)(expr_eval(node.left), expr_eval(node.right))
def main(*argv):
data = argv[0]
+ # data = "2 + 3 * -4"
+ # data = "(3 + 3 * 2) * -4"
tokens = tokenize(data)
+ # print(tokens)
node1 = build_tree(tokens)
print(expr_eval(node1))
+ print("hooy")
if __name__ == "__main__":