summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew <saintruler@gmail.com>2021-07-12 12:32:15 +0400
committerAndrew <saintruler@gmail.com>2021-07-12 12:32:15 +0400
commit49df60f32aca6428706bc895b7e48ab2d68444b5 (patch)
treed78ba857afd5940c63ba4da11a9324b94f8c448f
parente3ae2c6c9b3a9d766b76f790ed18621765ed890d (diff)
Added ability to set variables.
-rw-r--r--consts.py33
-rw-r--r--main.py38
-rw-r--r--parser.py137
-rw-r--r--tokenizer.py52
4 files changed, 203 insertions, 57 deletions
diff --git a/consts.py b/consts.py
index 86a8eef..0a595c2 100644
--- a/consts.py
+++ b/consts.py
@@ -4,18 +4,43 @@ class State:
class TokenType:
(
- LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, OPERATOR, SEMICOLON, COMMA, SYMBOL, *_
+ LEFT_PARENTHESIS, RIGHT_PARENTHESIS,
+ LEFT_BRACE, RIGHT_BRACE,
+ NUMBER, OPERATOR, SYMBOL, KEYWORD, UNARY,
+ SEMICOLON, COMMA, EQUALS, *_
) = range(100)
-UNARY = "unary"
-FUNCALL = "funcall"
+class NodeType:
+ (
+ NUMBER, OPERATOR, SYMBOL, FUNCALL, UNARY, ASSIGNMENT, *_
+ ) = range(100)
+
+
+class Keyword:
+ LET, STRUCT, FN, RETURN, IF, ELSE, FOR, WHILE, CONTINUE, BREAK, *_ = range(100)
+
+
+keywords = {
+ "let": Keyword.LET, "struct": Keyword.STRUCT,
+ "fn": Keyword.FN, "return": Keyword.RETURN,
+ "if": Keyword.IF, "else": Keyword.ELSE,
+ "for": Keyword.FOR, "while": Keyword.WHILE, "continue": Keyword.CONTINUE, "break": Keyword.BREAK,
+}
+
+keywords_repr = {keywords[i]: i for i in keywords}
+
+types = [
+ "i8", "i16", "i32", "i64",
+ "f32", "f64",
+]
+
PRECEDENCE = {
"+": 10,
"-": 10,
"*": 20,
"^": 30,
- UNARY: 40,
+ NodeType.UNARY: 40,
}
OPERATOR_CHARS = "*+-/%&~^|#$.:<=>@"
diff --git a/main.py b/main.py
index a86d293..0ba9d47 100644
--- a/main.py
+++ b/main.py
@@ -2,7 +2,7 @@ import sys
from consts import *
from tokenizer import tokenize
-from parser import build_tree
+from parser import parse, parse_expr
# RUNTIME
@@ -37,33 +37,45 @@ runtime_functions = {
# END OF RUNTIME
-def expr_eval(node):
- if node.type == NUMBER:
+def _eval(node):
+ if node.type == NodeType.NUMBER:
return node.value
- elif node.type == SYMBOL:
+
+ elif node.type == NodeType.SYMBOL:
return runtime.get(node.value)
- elif node.type == FUNCALL:
+
+ elif node.type == NodeType.FUNCALL:
fun_name = node.value[0]
- args = list(map(expr_eval, node.value[1]))
+ args = list(map(_eval, node.value[1]))
return runtime_functions.get(fun_name)(*args)
- elif node.type == OPERATOR:
- if node.subtype == UNARY:
- return unary_operators.get(node.left.subtype)(expr_eval(node.right))
+
+ elif node.type == NodeType.OPERATOR:
+ if node.subtype == NodeType.UNARY:
+ return unary_operators.get(node.left.subtype)(_eval(node.right))
else:
- return operators.get(node.subtype)(expr_eval(node.left), expr_eval(node.right))
+ return operators.get(node.subtype)(_eval(node.left), _eval(node.right))
+
+ elif node.type == NodeType.ASSIGNMENT:
+ runtime[node.left.value] = _eval(node.right)
def main(*argv):
if len(argv) != 0:
data = argv[0]
else:
- data = "2 + 3 * 4"
+ # data = "2 + 3 * 4"
+ data = "let a = 2 + 3; let e = 4;"
# data = "(3 + 3 * 2) * -4"
# data = "(2 + factorial(4, )) * 9"
tokens = tokenize(data)
- node1 = build_tree(tokens)
- print(expr_eval(node1))
+ # print(tokens)
+ statements = parse(tokens)
+ for statement in statements:
+ _eval(statement)
+ # node, _ = parse_expr(tokens, 0)
+ # print(expr_eval(node))
+ print(1)
if __name__ == "__main__":
diff --git a/parser.py b/parser.py
index a9b2a46..19fa952 100644
--- a/parser.py
+++ b/parser.py
@@ -1,4 +1,9 @@
from consts import *
+from tokenizer import Token
+
+
+class ParserError(Exception):
+ pass
class Node:
@@ -14,20 +19,22 @@ class Node:
return self.type == other.type and self.value == other.value
def __repr__(self):
- if self.type == TokenType.LEFT_PARENTHESIS:
- return "Node(LEFT_PARENTHESIS)"
- if self.type == TokenType.RIGHT_PARENTHESIS:
- return "Node(RIGHT_PARENTHESIS)"
- if self.type == TokenType.NUMBER:
+ # NUMBER, OPERATOR, SYMBOL, FUNCALL, UNARY, ASSIGNMENT, *_
+ if self.type == NodeType.NUMBER:
return f"Node({self.value})"
- if self.type == TokenType.OPERATOR:
+ if self.type == NodeType.OPERATOR:
return f"Node({self.subtype})"
- if self.type == TokenType.COMMA:
- return f"Node(COMMA)"
- if self.type == TokenType.SYMBOL:
+ if self.type == NodeType.SYMBOL:
return f"Node(SYMBOL {self.value})"
- if self.type == FUNCALL:
+ if self.type == NodeType.FUNCALL:
return f"Node(FUNCALL {self.value})"
+ if self.type == NodeType.UNARY:
+ return f"Node(UNARY {self.subtype})"
+ if self.type == NodeType.ASSIGNMENT:
+ return f"Node(ASSIGNMENT)"
+
+ return "Node(repr not defined)"
+
__str__ = __repr__
@@ -43,7 +50,7 @@ def parse_parenthesis(tokens, start):
if depth == 0 and token.type == TokenType.RIGHT_PARENTHESIS:
break
end += 1
- node = build_tree(tokens[start + 1: end])
+ node = parse_expr(tokens[start + 1: end])
return node, end
@@ -71,39 +78,45 @@ def parse_args(tokens, start):
if len(arg) != 0:
args.append(arg)
- return list(map(build_tree, args))
+ return list(map(parse_expr, args))
-def build_tree(tokens):
+def parse_expr(tokens, start):
state = State.NAME
- i = 0
+ i = start
current_node = None
while i < len(tokens):
token = tokens[i]
+
if state == State.OPERATOR and token.type == TokenType.LEFT_PARENTHESIS:
args = parse_args(tokens, i)
- current_node.type = FUNCALL
+ current_node.type = NodeType.FUNCALL
current_node.value = (current_node.value, args)
- elif token.type in [TokenType.LEFT_PARENTHESIS, TokenType.NUMBER, TokenType.SYMBOL] or token.subtype == UNARY:
+
+ elif token.type in [TokenType.LEFT_PARENTHESIS, TokenType.NUMBER, TokenType.SYMBOL] or token.subtype == TokenType.UNARY:
+
if token.type == TokenType.LEFT_PARENTHESIS:
node, i = parse_parenthesis(tokens, i)
state = State.NAME
+
elif token.type == TokenType.NUMBER:
- node = Node(TokenType.NUMBER, value=token.value)
+ node = Node(NodeType.NUMBER, value=token.value)
state = State.OPERATOR
+
elif token.type == TokenType.SYMBOL:
- node = Node(TokenType.SYMBOL, value=token.value)
+ node = Node(NodeType.SYMBOL, value=token.value)
state = State.OPERATOR
- elif token.subtype == UNARY:
- node = Node(TokenType.OPERATOR, subtype=UNARY)
- node.left = Node(TokenType.OPERATOR, subtype=token.value)
+
+ elif token.subtype == TokenType.UNARY:
+ node = Node(NodeType.OPERATOR, subtype=NodeType.UNARY)
+ node.left = Node(NodeType.OPERATOR, subtype=token.value)
state = State.NAME
if current_node is None:
current_node = node
- elif current_node.type == TokenType.NUMBER:
+ elif current_node.type == NodeType.NUMBER:
raise ValueError("Not a valid expression")
- elif current_node.type == TokenType.OPERATOR:
+ elif current_node.type == NodeType.OPERATOR:
if current_node.left is None:
current_node.left = node
node.parent = current_node
@@ -113,11 +126,12 @@ def build_tree(tokens):
current_node = node
else:
raise ValueError("Not a valid expression")
+
elif token.type == TokenType.OPERATOR:
- node = Node(token.type, subtype=token.subtype, value=token.value)
+ node = Node(NodeType.OPERATOR, subtype=token.subtype, value=token.value)
if current_node is None:
raise ValueError("Not a valid expression")
- elif current_node.type in [TokenType.NUMBER, TokenType.OPERATOR, FUNCALL]:
+ elif current_node.type in [NodeType.NUMBER, NodeType.OPERATOR, NodeType.FUNCALL]:
while current_node.parent is not None:
if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]:
node.parent = current_node.parent
@@ -129,11 +143,80 @@ def build_tree(tokens):
node.left = current_node
current_node = node
else:
- print(current_node, token)
raise ValueError("Not a valid expression")
state = State.NAME
+
+ elif token.type == TokenType.SEMICOLON:
+ break
+
i += 1
while current_node.parent is not None:
current_node = current_node.parent
- return current_node
+ return current_node, i - 1
+
+
+def parse_let_statement(tokens: list[Token], start: int):
+ class LetStates:
+ (WAIT_LET, WAIT_SYMBOL, WAIT_EQUALS, WAIT_EXPR, WAIT_SEMICOLON, PARSE_END, *_) = range(100)
+
+ root_node = Node(NodeType.ASSIGNMENT)
+ state = LetStates.WAIT_LET
+ i = start
+ while i < len(tokens):
+ token = tokens[i]
+ if state == LetStates.WAIT_LET:
+ if token.type == TokenType.KEYWORD and token.subtype == Keyword.LET:
+ state = LetStates.WAIT_SYMBOL
+ else:
+ raise ParserError(f"Waited for let, got {token}")
+
+ elif state == LetStates.WAIT_SYMBOL:
+ if token.type == TokenType.SYMBOL:
+ node = Node(NodeType.SYMBOL, value=token.value)
+ root_node.left = node
+ state = LetStates.WAIT_EQUALS
+ else:
+ raise ParserError(f"Waited for symbol, got {token}")
+
+ elif state == LetStates.WAIT_EQUALS:
+ if token.type == TokenType.EQUALS:
+ state = LetStates.WAIT_EXPR
+ else:
+ raise ParserError(f"Waited for equals, got {token}")
+
+ elif state == LetStates.WAIT_EXPR:
+ node, i = parse_expr(tokens, i)
+ root_node.right = node
+ state = LetStates.WAIT_SEMICOLON
+
+ elif state == LetStates.WAIT_SEMICOLON:
+ if token.type == TokenType.SEMICOLON:
+ state = LetStates.PARSE_END
+ break
+ else:
+ raise ParserError(f"Waited for semicolon, got {token}")
+
+ i += 1
+
+ if state != LetStates.PARSE_END:
+ raise ParserError("Statement ended unexpectedly")
+
+ return root_node, i
+
+
+def parse(tokens: list[Token]):
+ _statements = [
+ [Keyword.LET, TokenType.SYMBOL, TokenType.EQUALS, "EXPR", TokenType.SEMICOLON],
+ [Keyword.FN, TokenType.SYMBOL, TokenType.LEFT_PARENTHESIS, "ARGS", TokenType.RIGHT_PARENTHESIS, TokenType.LEFT_BRACE, ["STATEMENTS"], TokenType.RIGHT_BRACE],
+ ]
+
+ statements = []
+
+ i = 0
+ while i < len(tokens):
+ if tokens[i].type == TokenType.KEYWORD and tokens[i].subtype == Keyword.LET:
+ node, i = parse_let_statement(tokens, i)
+ statements.append(node)
+ i += 1
+ return statements
diff --git a/tokenizer.py b/tokenizer.py
index 13b824a..4fd534c 100644
--- a/tokenizer.py
+++ b/tokenizer.py
@@ -6,7 +6,7 @@ class TokenizerError(Exception):
class Token:
- def __init__(self, node_type, value=None, subtype=None):
+ def __init__(self, node_type, *, value=None, subtype=None):
self.type = node_type
self.value = value
self.subtype = subtype
@@ -27,6 +27,19 @@ class Token:
return f"Token(COMMA)"
if self.type == TokenType.SYMBOL:
return f"Token(SYMBOL {self.value})"
+ if self.type == TokenType.KEYWORD:
+ return f"Token(KEYWORD {keywords_repr[self.subtype]})"
+ if self.type == TokenType.EQUALS:
+ return f"Token(=)"
+ if self.type == TokenType.SEMICOLON:
+ return f"Token(;)"
+ if self.type == TokenType.LEFT_BRACE:
+ return "Token(LEFT_BRACE)"
+ if self.type == TokenType.LEFT_BRACE:
+ return "Token(RIGHT_BRACE)"
+
+
+ return f"Token(repr not defined)"
__str__ = __repr__
@@ -60,35 +73,48 @@ def tokenize(line):
while i < len(line):
char = line[i]
if char == "(":
- tokens.append(Token(TokenType.LEFT_PARENTHESIS, None))
+ tokens.append(Token(TokenType.LEFT_PARENTHESIS))
state = State.NAME
elif char == ")":
- tokens.append(Token(TokenType.RIGHT_PARENTHESIS, None))
+ tokens.append(Token(TokenType.RIGHT_PARENTHESIS))
state = State.OPERATOR
elif char == ",":
- tokens.append(Token(TokenType.COMMA, None))
+ tokens.append(Token(TokenType.COMMA))
+ state = State.NAME
+ elif char == ";":
+ tokens.append(Token(TokenType.SEMICOLON))
+ state = State.NAME
+ elif char == "{":
+ tokens.append(Token(TokenType.LEFT_BRACE))
+ state = State.NAME
+ elif char == "}":
+ tokens.append(Token(TokenType.RIGHT_BRACE))
state = State.NAME
elif char in OPERATOR_CHARS:
- if state == State.OPERATOR:
- val, i = parse_operator(line, i)
+ val, i = parse_operator(line, i)
+ if val == "=":
+ tokens.append(Token(TokenType.EQUALS))
+ state = State.NAME
+ elif state == State.OPERATOR:
tokens.append(Token(TokenType.OPERATOR, subtype=val))
state = State.NAME
elif state == State.NAME:
- val, i = parse_operator(line, i)
- tokens.append(Token(TokenType.OPERATOR, subtype=UNARY, value=val))
+ tokens.append(Token(TokenType.OPERATOR, subtype=TokenType.UNARY, value=val))
state = State.NAME
elif char in NUMBER_CHARS:
val, i = parse_number(line, i)
- tokens.append(Token(TokenType.NUMBER, val))
+ tokens.append(Token(TokenType.NUMBER, value=val))
state = State.OPERATOR
elif char in SYMBOL_CHARS:
val, i = parse_symbol(line, i)
- tokens.append(Token(TokenType.SYMBOL, val))
- state = State.OPERATOR
+ if val in keywords:
+ tokens.append(Token(TokenType.KEYWORD, subtype=keywords[val]))
+ state = State.NAME
+ else:
+ tokens.append(Token(TokenType.SYMBOL, value=val))
+ state = State.OPERATOR
elif char != " ":
raise ValueError("Line is not a valid expression")
i += 1
return tokens
-
-