1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
from consts import *
class TokenizerError(Exception):
pass
class Token:
def __init__(self, node_type, *, value=None, subtype=None):
self.type = node_type
self.value = value
self.subtype = subtype
def __eq__(self, other):
return self.type == other.type and self.value == other.value
def __repr__(self):
if self.type == TokenType.LEFT_PARENTHESIS:
return "Token(LEFT_PARENTHESIS)"
if self.type == TokenType.RIGHT_PARENTHESIS:
return "Token(RIGHT_PARENTHESIS)"
if self.type == TokenType.NUMBER:
return f"Token({self.value})"
if self.type == TokenType.OPERATOR:
return f"Token({self.subtype})"
if self.type == TokenType.COMMA:
return f"Token(COMMA)"
if self.type == TokenType.SYMBOL:
return f"Token(SYMBOL {self.value})"
if self.type == TokenType.KEYWORD:
return f"Token(KEYWORD {keywords_repr[self.subtype]})"
if self.type == TokenType.EQUALS:
return f"Token(=)"
if self.type == TokenType.SEMICOLON:
return f"Token(;)"
if self.type == TokenType.LEFT_BRACE:
return "Token(LEFT_BRACE)"
if self.type == TokenType.LEFT_BRACE:
return "Token(RIGHT_BRACE)"
return f"Token(repr not defined)"
__str__ = __repr__
def parse_token(line, start, charset):
token = ""
i = start
while i < len(line) and line[i] in charset:
token += line[i]
i += 1
return token, i - 1
def parse_operator(line, start):
return parse_token(line, start, OPERATOR_CHARS)
def parse_number(line, start):
val, i = parse_token(line, start, NUMBER_CHARS)
return float(val), i
def parse_symbol(line, start):
return parse_token(line, start, SYMBOL_CHARS)
def tokenize(line):
state = State.NAME
tokens = []
i = 0
while i < len(line):
char = line[i]
if char == "(":
tokens.append(Token(TokenType.LEFT_PARENTHESIS))
state = State.NAME
elif char == ")":
tokens.append(Token(TokenType.RIGHT_PARENTHESIS))
state = State.OPERATOR
elif char == ",":
tokens.append(Token(TokenType.COMMA))
state = State.NAME
elif char == ";":
tokens.append(Token(TokenType.SEMICOLON))
state = State.NAME
elif char == "{":
tokens.append(Token(TokenType.LEFT_BRACE))
state = State.NAME
elif char == "}":
tokens.append(Token(TokenType.RIGHT_BRACE))
state = State.NAME
elif char in OPERATOR_CHARS:
val, i = parse_operator(line, i)
if val == "=":
tokens.append(Token(TokenType.EQUALS))
state = State.NAME
elif state == State.OPERATOR:
tokens.append(Token(TokenType.OPERATOR, subtype=val))
state = State.NAME
elif state == State.NAME:
tokens.append(Token(TokenType.OPERATOR, subtype=TokenType.UNARY, value=val))
state = State.NAME
elif char in NUMBER_CHARS:
val, i = parse_number(line, i)
tokens.append(Token(TokenType.NUMBER, value=val))
state = State.OPERATOR
elif char in SYMBOL_CHARS:
val, i = parse_symbol(line, i)
if val in keywords:
tokens.append(Token(TokenType.KEYWORD, subtype=keywords[val]))
state = State.NAME
else:
tokens.append(Token(TokenType.SYMBOL, value=val))
state = State.OPERATOR
elif char != " ":
raise ValueError("Line is not a valid expression")
i += 1
return tokens
|