diff options
| author | Andrew <saintruler@gmail.com> | 2021-07-12 01:31:27 +0400 |
|---|---|---|
| committer | Andrew <saintruler@gmail.com> | 2021-07-12 01:31:27 +0400 |
| commit | abb8ad61e3605a5e17e05b2f300d2b7277bd424a (patch) | |
| tree | f116c820ef35431ee761c44c5ba30922b2cc80ba | |
| parent | febb2ff27f6da9aed93220b80c43b37f51bbea3b (diff) | |
Added ability to call functions
| -rw-r--r-- | tokenizer.py | 130 |
1 files changed, 105 insertions, 25 deletions
diff --git a/tokenizer.py b/tokenizer.py index d3fec34..c5bac82 100644 --- a/tokenizer.py +++ b/tokenizer.py @@ -3,10 +3,11 @@ import sys STATE_OPERATOR, STATE_NAME, *_ = range(100) ( - LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, *_ + LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, SEMICOLON, COMMA, SYMBOL, *_ ) = range(100) UNARY = "unary" +FUNCALL = "funcall" PRECEDENCE = { "+": 10, "-": 10, @@ -15,6 +16,12 @@ PRECEDENCE = { UNARY: 40, } +OPERATOR_CHARS = "*+-/%&~^|#$.:<=>@" +NUMBER_CHARS = "0123456789" +SYMBOL_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?" + +# RUNTIME + unary_operators = { "-": lambda a: -a, "+": lambda a: a @@ -27,9 +34,22 @@ operators = { "^": lambda a, b: a ** b, } -OPERATOR_SYMBOLS = "*+-/%&~^|#$.:<=>@" -NUMBER_SYMBOLS = "0123456789" -NAME_SYMBOLS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?" +runtime = { + "x": 10 +} + + +def factorial(n): + if n <= 1: + return 1 + return n * factorial(n - 1) + + +runtime_functions = { + "factorial": factorial +} + +# END OF RUNTIME class ParserError(Exception): @@ -57,6 +77,12 @@ class Node: return f"Node({self.value})" if self.type == OPERATOR: return f"Node({self.subtype})" + if self.type == COMMA: + return f"Node(COMMA)" + if self.type == SYMBOL: + return f"Node(SYMBOL {self.value})" + if self.type == FUNCALL: + return f"Node(FUNCALL {self.value})" __str__ = __repr__ @@ -78,31 +104,33 @@ class Token: return f"Token({self.value})" if self.type == OPERATOR: return f"Token({self.subtype})" + if self.type == COMMA: + return f"Token(COMMA)" + if self.type == SYMBOL: + return f"Token(SYMBOL {self.value})" __str__ = __repr__ -def parse_operator(line, start): - operator = "" +def parse_token(line, start, charset): + token = "" i = start - while i < len(line) and line[i] in OPERATOR_SYMBOLS: - operator += line[i] + while i < len(line) and line[i] in charset: + token += line[i] i += 1 + return token, i - 1 - return operator, i - 1 + +def parse_operator(line, start): + return parse_token(line, start, OPERATOR_CHARS) def parse_number(line, start): - number = "" - if line[start] in "+-": - number += line[start] - start += 1 + val, i = parse_token(line, start, NUMBER_CHARS) + return float(val), i - i = start - while i < len(line) and line[i] in NUMBER_SYMBOLS: - number += line[i] - i += 1 - return float(number), i - 1 +def parse_symbol(line, start): + return parse_token(line, start, SYMBOL_CHARS) def tokenize(line): @@ -118,7 +146,10 @@ def tokenize(line): elif char == ")": tokens.append(Token(RIGHT_PARENTHESIS, None)) state = STATE_OPERATOR - elif char in OPERATOR_SYMBOLS: + elif char == ",": + tokens.append(Token(COMMA, None)) + state = STATE_NAME + elif char in OPERATOR_CHARS: if state == STATE_OPERATOR: val, i = parse_operator(line, i) tokens.append(Token(OPERATOR, subtype=val)) @@ -127,10 +158,14 @@ def tokenize(line): val, i = parse_operator(line, i) tokens.append(Token(OPERATOR, subtype=UNARY, value=val)) state = STATE_NAME - elif char in "-+0123456789": + elif char in NUMBER_CHARS: val, i = parse_number(line, i) tokens.append(Token(NUMBER, val)) state = STATE_OPERATOR + elif char in SYMBOL_CHARS: + val, i = parse_symbol(line, i) + tokens.append(Token(SYMBOL, val)) + state = STATE_OPERATOR elif char != " ": raise ValueError("Line is not a valid expression") i += 1 @@ -154,19 +189,57 @@ def parse_parenthesis(tokens, start): return node, end +def parse_args(tokens, start): + end = start + depth = 0 + while True: + token = tokens[end] + if token.type == RIGHT_PARENTHESIS: + depth -= 1 + if token.type == LEFT_PARENTHESIS: + depth += 1 + if depth == 0 and token.type == RIGHT_PARENTHESIS: + break + end += 1 + + args = [] + arg = [] + for token in tokens[start + 1: end]: + if token.type != COMMA: + arg.append(token) + else: + args.append(arg.copy()) + arg.clear() + if len(arg) != 0: + args.append(arg) + + return list(map(build_tree, args)) + + def build_tree(tokens): + state = STATE_NAME i = 0 current_node = None while i < len(tokens): token = tokens[i] - if token.type in [LEFT_PARENTHESIS, NUMBER] or token.subtype == UNARY: + if state == STATE_OPERATOR and token.type == LEFT_PARENTHESIS: + args = parse_args(tokens, i) + current_node.type = FUNCALL + current_node.value = (current_node.value, args) + elif token.type in [LEFT_PARENTHESIS, NUMBER, SYMBOL] or token.subtype == UNARY: if token.type == LEFT_PARENTHESIS: node, i = parse_parenthesis(tokens, i) + state = STATE_NAME elif token.type == NUMBER: - node = Node(NUMBER, token.value) + node = Node(NUMBER, value=token.value) + state = STATE_OPERATOR + elif token.type == SYMBOL: + node = Node(SYMBOL, value=token.value) + state = STATE_OPERATOR elif token.subtype == UNARY: node = Node(OPERATOR, subtype=UNARY) node.left = Node(OPERATOR, subtype=token.value) + state = STATE_NAME if current_node is None: current_node = node @@ -182,12 +255,11 @@ def build_tree(tokens): current_node = node else: raise ValueError("Not a valid expression") - elif token.type == OPERATOR: node = Node(token.type, subtype=token.subtype, value=token.value) if current_node is None: raise ValueError("Not a valid expression") - elif current_node.type in [NUMBER, OPERATOR]: + elif current_node.type in [NUMBER, OPERATOR, FUNCALL]: while current_node.parent is not None: if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]: node.parent = current_node.parent @@ -199,7 +271,9 @@ def build_tree(tokens): node.left = current_node current_node = node else: + print(current_node, token) raise ValueError("Not a valid expression") + state = STATE_NAME i += 1 while current_node.parent is not None: @@ -210,6 +284,12 @@ def build_tree(tokens): def expr_eval(node): if node.type == NUMBER: return node.value + elif node.type == SYMBOL: + return runtime.get(node.value) + elif node.type == FUNCALL: + fun_name = node.value[0] + args = list(map(expr_eval, node.value[1])) + return runtime_functions.get(fun_name)(*args) elif node.type == OPERATOR: if node.subtype == UNARY: return unary_operators.get(node.left.subtype)(expr_eval(node.right)) @@ -221,11 +301,11 @@ def main(*argv): data = argv[0] # data = "2 + 3 * -4" # data = "(3 + 3 * 2) * -4" + # data = "(2 + factorial(4, )) * 9" tokens = tokenize(data) # print(tokens) node1 = build_tree(tokens) print(expr_eval(node1)) - print("hooy") if __name__ == "__main__": |