1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
from consts import *
class TokenizerError(Exception):
pass
class Token:
def __init__(self, node_type, value=None, subtype=None):
self.type = node_type
self.value = value
self.subtype = subtype
def __eq__(self, other):
return self.type == other.type and self.value == other.value
def __repr__(self):
if self.type == LEFT_PARENTHESIS:
return "Token(LEFT_PARENTHESIS)"
if self.type == RIGHT_PARENTHESIS:
return "Token(RIGHT_PARENTHESIS)"
if self.type == NUMBER:
return f"Token({self.value})"
if self.type == OPERATOR:
return f"Token({self.subtype})"
if self.type == COMMA:
return f"Token(COMMA)"
if self.type == SYMBOL:
return f"Token(SYMBOL {self.value})"
__str__ = __repr__
def parse_token(line, start, charset):
token = ""
i = start
while i < len(line) and line[i] in charset:
token += line[i]
i += 1
return token, i - 1
def parse_operator(line, start):
return parse_token(line, start, OPERATOR_CHARS)
def parse_number(line, start):
val, i = parse_token(line, start, NUMBER_CHARS)
return float(val), i
def parse_symbol(line, start):
return parse_token(line, start, SYMBOL_CHARS)
def tokenize(line):
state = STATE_NAME
tokens = []
i = 0
while i < len(line):
char = line[i]
if char == "(":
tokens.append(Token(LEFT_PARENTHESIS, None))
state = STATE_NAME
elif char == ")":
tokens.append(Token(RIGHT_PARENTHESIS, None))
state = STATE_OPERATOR
elif char == ",":
tokens.append(Token(COMMA, None))
state = STATE_NAME
elif char in OPERATOR_CHARS:
if state == STATE_OPERATOR:
val, i = parse_operator(line, i)
tokens.append(Token(OPERATOR, subtype=val))
state = STATE_NAME
elif state == STATE_NAME:
val, i = parse_operator(line, i)
tokens.append(Token(OPERATOR, subtype=UNARY, value=val))
state = STATE_NAME
elif char in NUMBER_CHARS:
val, i = parse_number(line, i)
tokens.append(Token(NUMBER, val))
state = STATE_OPERATOR
elif char in SYMBOL_CHARS:
val, i = parse_symbol(line, i)
tokens.append(Token(SYMBOL, val))
state = STATE_OPERATOR
elif char != " ":
raise ValueError("Line is not a valid expression")
i += 1
return tokens
|