summaryrefslogtreecommitdiff
path: root/tokenizer.py
diff options
context:
space:
mode:
Diffstat (limited to 'tokenizer.py')
-rw-r--r--tokenizer.py52
1 files changed, 39 insertions, 13 deletions
diff --git a/tokenizer.py b/tokenizer.py
index 13b824a..4fd534c 100644
--- a/tokenizer.py
+++ b/tokenizer.py
@@ -6,7 +6,7 @@ class TokenizerError(Exception):
class Token:
- def __init__(self, node_type, value=None, subtype=None):
+ def __init__(self, node_type, *, value=None, subtype=None):
self.type = node_type
self.value = value
self.subtype = subtype
@@ -27,6 +27,19 @@ class Token:
return f"Token(COMMA)"
if self.type == TokenType.SYMBOL:
return f"Token(SYMBOL {self.value})"
+ if self.type == TokenType.KEYWORD:
+ return f"Token(KEYWORD {keywords_repr[self.subtype]})"
+ if self.type == TokenType.EQUALS:
+ return f"Token(=)"
+ if self.type == TokenType.SEMICOLON:
+ return f"Token(;)"
+ if self.type == TokenType.LEFT_BRACE:
+ return "Token(LEFT_BRACE)"
+ if self.type == TokenType.LEFT_BRACE:
+ return "Token(RIGHT_BRACE)"
+
+
+ return f"Token(repr not defined)"
__str__ = __repr__
@@ -60,35 +73,48 @@ def tokenize(line):
while i < len(line):
char = line[i]
if char == "(":
- tokens.append(Token(TokenType.LEFT_PARENTHESIS, None))
+ tokens.append(Token(TokenType.LEFT_PARENTHESIS))
state = State.NAME
elif char == ")":
- tokens.append(Token(TokenType.RIGHT_PARENTHESIS, None))
+ tokens.append(Token(TokenType.RIGHT_PARENTHESIS))
state = State.OPERATOR
elif char == ",":
- tokens.append(Token(TokenType.COMMA, None))
+ tokens.append(Token(TokenType.COMMA))
+ state = State.NAME
+ elif char == ";":
+ tokens.append(Token(TokenType.SEMICOLON))
+ state = State.NAME
+ elif char == "{":
+ tokens.append(Token(TokenType.LEFT_BRACE))
+ state = State.NAME
+ elif char == "}":
+ tokens.append(Token(TokenType.RIGHT_BRACE))
state = State.NAME
elif char in OPERATOR_CHARS:
- if state == State.OPERATOR:
- val, i = parse_operator(line, i)
+ val, i = parse_operator(line, i)
+ if val == "=":
+ tokens.append(Token(TokenType.EQUALS))
+ state = State.NAME
+ elif state == State.OPERATOR:
tokens.append(Token(TokenType.OPERATOR, subtype=val))
state = State.NAME
elif state == State.NAME:
- val, i = parse_operator(line, i)
- tokens.append(Token(TokenType.OPERATOR, subtype=UNARY, value=val))
+ tokens.append(Token(TokenType.OPERATOR, subtype=TokenType.UNARY, value=val))
state = State.NAME
elif char in NUMBER_CHARS:
val, i = parse_number(line, i)
- tokens.append(Token(TokenType.NUMBER, val))
+ tokens.append(Token(TokenType.NUMBER, value=val))
state = State.OPERATOR
elif char in SYMBOL_CHARS:
val, i = parse_symbol(line, i)
- tokens.append(Token(TokenType.SYMBOL, val))
- state = State.OPERATOR
+ if val in keywords:
+ tokens.append(Token(TokenType.KEYWORD, subtype=keywords[val]))
+ state = State.NAME
+ else:
+ tokens.append(Token(TokenType.SYMBOL, value=val))
+ state = State.OPERATOR
elif char != " ":
raise ValueError("Line is not a valid expression")
i += 1
return tokens
-
-