from consts import * class TokenizerError(Exception): pass class Token: def __init__(self, node_type, value=None, subtype=None): self.type = node_type self.value = value self.subtype = subtype def __eq__(self, other): return self.type == other.type and self.value == other.value def __repr__(self): if self.type == LEFT_PARENTHESIS: return "Token(LEFT_PARENTHESIS)" if self.type == RIGHT_PARENTHESIS: return "Token(RIGHT_PARENTHESIS)" if self.type == NUMBER: return f"Token({self.value})" if self.type == OPERATOR: return f"Token({self.subtype})" if self.type == COMMA: return f"Token(COMMA)" if self.type == SYMBOL: return f"Token(SYMBOL {self.value})" __str__ = __repr__ def parse_token(line, start, charset): token = "" i = start while i < len(line) and line[i] in charset: token += line[i] i += 1 return token, i - 1 def parse_operator(line, start): return parse_token(line, start, OPERATOR_CHARS) def parse_number(line, start): val, i = parse_token(line, start, NUMBER_CHARS) return float(val), i def parse_symbol(line, start): return parse_token(line, start, SYMBOL_CHARS) def tokenize(line): state = STATE_NAME tokens = [] i = 0 while i < len(line): char = line[i] if char == "(": tokens.append(Token(LEFT_PARENTHESIS, None)) state = STATE_NAME elif char == ")": tokens.append(Token(RIGHT_PARENTHESIS, None)) state = STATE_OPERATOR elif char == ",": tokens.append(Token(COMMA, None)) state = STATE_NAME elif char in OPERATOR_CHARS: if state == STATE_OPERATOR: val, i = parse_operator(line, i) tokens.append(Token(OPERATOR, subtype=val)) state = STATE_NAME elif state == STATE_NAME: val, i = parse_operator(line, i) tokens.append(Token(OPERATOR, subtype=UNARY, value=val)) state = STATE_NAME elif char in NUMBER_CHARS: val, i = parse_number(line, i) tokens.append(Token(NUMBER, val)) state = STATE_OPERATOR elif char in SYMBOL_CHARS: val, i = parse_symbol(line, i) tokens.append(Token(SYMBOL, val)) state = STATE_OPERATOR elif char != " ": raise ValueError("Line is not a valid expression") i += 1 return tokens