from consts import * class TokenizerError(Exception): pass class Token: def __init__(self, node_type, *, value=None, subtype=None): self.type = node_type self.value = value self.subtype = subtype def __eq__(self, other): return self.type == other.type and self.value == other.value def __repr__(self): if self.type == TokenType.LEFT_PARENTHESIS: return "Token(LEFT_PARENTHESIS)" if self.type == TokenType.RIGHT_PARENTHESIS: return "Token(RIGHT_PARENTHESIS)" if self.type == TokenType.NUMBER: return f"Token({self.value})" if self.type == TokenType.OPERATOR: return f"Token({self.subtype})" if self.type == TokenType.COMMA: return f"Token(COMMA)" if self.type == TokenType.SYMBOL: return f"Token(SYMBOL {self.value})" if self.type == TokenType.KEYWORD: return f"Token(KEYWORD {keywords_repr[self.subtype]})" if self.type == TokenType.EQUALS: return f"Token(=)" if self.type == TokenType.SEMICOLON: return f"Token(;)" if self.type == TokenType.LEFT_BRACE: return "Token(LEFT_BRACE)" if self.type == TokenType.LEFT_BRACE: return "Token(RIGHT_BRACE)" return f"Token(repr not defined)" __str__ = __repr__ def parse_token(line, start, charset): token = "" i = start while i < len(line) and line[i] in charset: token += line[i] i += 1 return token, i - 1 def parse_operator(line, start): return parse_token(line, start, OPERATOR_CHARS) def parse_number(line, start): val, i = parse_token(line, start, NUMBER_CHARS) return float(val), i def parse_symbol(line, start): return parse_token(line, start, SYMBOL_CHARS) def tokenize(line): state = State.NAME tokens = [] i = 0 while i < len(line): char = line[i] if char == "(": tokens.append(Token(TokenType.LEFT_PARENTHESIS)) state = State.NAME elif char == ")": tokens.append(Token(TokenType.RIGHT_PARENTHESIS)) state = State.OPERATOR elif char == ",": tokens.append(Token(TokenType.COMMA)) state = State.NAME elif char == ";": tokens.append(Token(TokenType.SEMICOLON)) state = State.NAME elif char == "{": tokens.append(Token(TokenType.LEFT_BRACE)) state = State.NAME elif char == "}": tokens.append(Token(TokenType.RIGHT_BRACE)) state = State.NAME elif char in OPERATOR_CHARS: val, i = parse_operator(line, i) if val == "=": tokens.append(Token(TokenType.EQUALS)) state = State.NAME elif state == State.OPERATOR: tokens.append(Token(TokenType.OPERATOR, subtype=val)) state = State.NAME elif state == State.NAME: tokens.append(Token(TokenType.OPERATOR, subtype=TokenType.UNARY, value=val)) state = State.NAME elif char in NUMBER_CHARS: val, i = parse_number(line, i) tokens.append(Token(TokenType.NUMBER, value=val)) state = State.OPERATOR elif char in SYMBOL_CHARS: val, i = parse_symbol(line, i) if val in keywords: tokens.append(Token(TokenType.KEYWORD, subtype=keywords[val])) state = State.NAME else: tokens.append(Token(TokenType.SYMBOL, value=val)) state = State.OPERATOR elif char != " ": raise ValueError("Line is not a valid expression") i += 1 return tokens