summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew <saintruler@gmail.com>2021-07-12 01:31:27 +0400
committerAndrew <saintruler@gmail.com>2021-07-12 01:31:27 +0400
commitabb8ad61e3605a5e17e05b2f300d2b7277bd424a (patch)
treef116c820ef35431ee761c44c5ba30922b2cc80ba
parentfebb2ff27f6da9aed93220b80c43b37f51bbea3b (diff)
Added ability to call functions
-rw-r--r--tokenizer.py130
1 files changed, 105 insertions, 25 deletions
diff --git a/tokenizer.py b/tokenizer.py
index d3fec34..c5bac82 100644
--- a/tokenizer.py
+++ b/tokenizer.py
@@ -3,10 +3,11 @@ import sys
STATE_OPERATOR, STATE_NAME, *_ = range(100)
(
- LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, *_
+ LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, SEMICOLON, COMMA, SYMBOL, *_
) = range(100)
UNARY = "unary"
+FUNCALL = "funcall"
PRECEDENCE = {
"+": 10,
"-": 10,
@@ -15,6 +16,12 @@ PRECEDENCE = {
UNARY: 40,
}
+OPERATOR_CHARS = "*+-/%&~^|#$.:<=>@"
+NUMBER_CHARS = "0123456789"
+SYMBOL_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?"
+
+# RUNTIME
+
unary_operators = {
"-": lambda a: -a,
"+": lambda a: a
@@ -27,9 +34,22 @@ operators = {
"^": lambda a, b: a ** b,
}
-OPERATOR_SYMBOLS = "*+-/%&~^|#$.:<=>@"
-NUMBER_SYMBOLS = "0123456789"
-NAME_SYMBOLS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?"
+runtime = {
+ "x": 10
+}
+
+
+def factorial(n):
+ if n <= 1:
+ return 1
+ return n * factorial(n - 1)
+
+
+runtime_functions = {
+ "factorial": factorial
+}
+
+# END OF RUNTIME
class ParserError(Exception):
@@ -57,6 +77,12 @@ class Node:
return f"Node({self.value})"
if self.type == OPERATOR:
return f"Node({self.subtype})"
+ if self.type == COMMA:
+ return f"Node(COMMA)"
+ if self.type == SYMBOL:
+ return f"Node(SYMBOL {self.value})"
+ if self.type == FUNCALL:
+ return f"Node(FUNCALL {self.value})"
__str__ = __repr__
@@ -78,31 +104,33 @@ class Token:
return f"Token({self.value})"
if self.type == OPERATOR:
return f"Token({self.subtype})"
+ if self.type == COMMA:
+ return f"Token(COMMA)"
+ if self.type == SYMBOL:
+ return f"Token(SYMBOL {self.value})"
__str__ = __repr__
-def parse_operator(line, start):
- operator = ""
+def parse_token(line, start, charset):
+ token = ""
i = start
- while i < len(line) and line[i] in OPERATOR_SYMBOLS:
- operator += line[i]
+ while i < len(line) and line[i] in charset:
+ token += line[i]
i += 1
+ return token, i - 1
- return operator, i - 1
+
+def parse_operator(line, start):
+ return parse_token(line, start, OPERATOR_CHARS)
def parse_number(line, start):
- number = ""
- if line[start] in "+-":
- number += line[start]
- start += 1
+ val, i = parse_token(line, start, NUMBER_CHARS)
+ return float(val), i
- i = start
- while i < len(line) and line[i] in NUMBER_SYMBOLS:
- number += line[i]
- i += 1
- return float(number), i - 1
+def parse_symbol(line, start):
+ return parse_token(line, start, SYMBOL_CHARS)
def tokenize(line):
@@ -118,7 +146,10 @@ def tokenize(line):
elif char == ")":
tokens.append(Token(RIGHT_PARENTHESIS, None))
state = STATE_OPERATOR
- elif char in OPERATOR_SYMBOLS:
+ elif char == ",":
+ tokens.append(Token(COMMA, None))
+ state = STATE_NAME
+ elif char in OPERATOR_CHARS:
if state == STATE_OPERATOR:
val, i = parse_operator(line, i)
tokens.append(Token(OPERATOR, subtype=val))
@@ -127,10 +158,14 @@ def tokenize(line):
val, i = parse_operator(line, i)
tokens.append(Token(OPERATOR, subtype=UNARY, value=val))
state = STATE_NAME
- elif char in "-+0123456789":
+ elif char in NUMBER_CHARS:
val, i = parse_number(line, i)
tokens.append(Token(NUMBER, val))
state = STATE_OPERATOR
+ elif char in SYMBOL_CHARS:
+ val, i = parse_symbol(line, i)
+ tokens.append(Token(SYMBOL, val))
+ state = STATE_OPERATOR
elif char != " ":
raise ValueError("Line is not a valid expression")
i += 1
@@ -154,19 +189,57 @@ def parse_parenthesis(tokens, start):
return node, end
+def parse_args(tokens, start):
+ end = start
+ depth = 0
+ while True:
+ token = tokens[end]
+ if token.type == RIGHT_PARENTHESIS:
+ depth -= 1
+ if token.type == LEFT_PARENTHESIS:
+ depth += 1
+ if depth == 0 and token.type == RIGHT_PARENTHESIS:
+ break
+ end += 1
+
+ args = []
+ arg = []
+ for token in tokens[start + 1: end]:
+ if token.type != COMMA:
+ arg.append(token)
+ else:
+ args.append(arg.copy())
+ arg.clear()
+ if len(arg) != 0:
+ args.append(arg)
+
+ return list(map(build_tree, args))
+
+
def build_tree(tokens):
+ state = STATE_NAME
i = 0
current_node = None
while i < len(tokens):
token = tokens[i]
- if token.type in [LEFT_PARENTHESIS, NUMBER] or token.subtype == UNARY:
+ if state == STATE_OPERATOR and token.type == LEFT_PARENTHESIS:
+ args = parse_args(tokens, i)
+ current_node.type = FUNCALL
+ current_node.value = (current_node.value, args)
+ elif token.type in [LEFT_PARENTHESIS, NUMBER, SYMBOL] or token.subtype == UNARY:
if token.type == LEFT_PARENTHESIS:
node, i = parse_parenthesis(tokens, i)
+ state = STATE_NAME
elif token.type == NUMBER:
- node = Node(NUMBER, token.value)
+ node = Node(NUMBER, value=token.value)
+ state = STATE_OPERATOR
+ elif token.type == SYMBOL:
+ node = Node(SYMBOL, value=token.value)
+ state = STATE_OPERATOR
elif token.subtype == UNARY:
node = Node(OPERATOR, subtype=UNARY)
node.left = Node(OPERATOR, subtype=token.value)
+ state = STATE_NAME
if current_node is None:
current_node = node
@@ -182,12 +255,11 @@ def build_tree(tokens):
current_node = node
else:
raise ValueError("Not a valid expression")
-
elif token.type == OPERATOR:
node = Node(token.type, subtype=token.subtype, value=token.value)
if current_node is None:
raise ValueError("Not a valid expression")
- elif current_node.type in [NUMBER, OPERATOR]:
+ elif current_node.type in [NUMBER, OPERATOR, FUNCALL]:
while current_node.parent is not None:
if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]:
node.parent = current_node.parent
@@ -199,7 +271,9 @@ def build_tree(tokens):
node.left = current_node
current_node = node
else:
+ print(current_node, token)
raise ValueError("Not a valid expression")
+ state = STATE_NAME
i += 1
while current_node.parent is not None:
@@ -210,6 +284,12 @@ def build_tree(tokens):
def expr_eval(node):
if node.type == NUMBER:
return node.value
+ elif node.type == SYMBOL:
+ return runtime.get(node.value)
+ elif node.type == FUNCALL:
+ fun_name = node.value[0]
+ args = list(map(expr_eval, node.value[1]))
+ return runtime_functions.get(fun_name)(*args)
elif node.type == OPERATOR:
if node.subtype == UNARY:
return unary_operators.get(node.left.subtype)(expr_eval(node.right))
@@ -221,11 +301,11 @@ def main(*argv):
data = argv[0]
# data = "2 + 3 * -4"
# data = "(3 + 3 * 2) * -4"
+ # data = "(2 + factorial(4, )) * 9"
tokens = tokenize(data)
# print(tokens)
node1 = build_tree(tokens)
print(expr_eval(node1))
- print("hooy")
if __name__ == "__main__":