summaryrefslogtreecommitdiff
path: root/parser.py
diff options
context:
space:
mode:
authorAndrew <saintruler@gmail.com>2021-07-12 12:32:15 +0400
committerAndrew <saintruler@gmail.com>2021-07-12 12:32:15 +0400
commit49df60f32aca6428706bc895b7e48ab2d68444b5 (patch)
treed78ba857afd5940c63ba4da11a9324b94f8c448f /parser.py
parente3ae2c6c9b3a9d766b76f790ed18621765ed890d (diff)
Added ability to set variables.
Diffstat (limited to 'parser.py')
-rw-r--r--parser.py137
1 files changed, 110 insertions, 27 deletions
diff --git a/parser.py b/parser.py
index a9b2a46..19fa952 100644
--- a/parser.py
+++ b/parser.py
@@ -1,4 +1,9 @@
from consts import *
+from tokenizer import Token
+
+
+class ParserError(Exception):
+ pass
class Node:
@@ -14,20 +19,22 @@ class Node:
return self.type == other.type and self.value == other.value
def __repr__(self):
- if self.type == TokenType.LEFT_PARENTHESIS:
- return "Node(LEFT_PARENTHESIS)"
- if self.type == TokenType.RIGHT_PARENTHESIS:
- return "Node(RIGHT_PARENTHESIS)"
- if self.type == TokenType.NUMBER:
+ # NUMBER, OPERATOR, SYMBOL, FUNCALL, UNARY, ASSIGNMENT, *_
+ if self.type == NodeType.NUMBER:
return f"Node({self.value})"
- if self.type == TokenType.OPERATOR:
+ if self.type == NodeType.OPERATOR:
return f"Node({self.subtype})"
- if self.type == TokenType.COMMA:
- return f"Node(COMMA)"
- if self.type == TokenType.SYMBOL:
+ if self.type == NodeType.SYMBOL:
return f"Node(SYMBOL {self.value})"
- if self.type == FUNCALL:
+ if self.type == NodeType.FUNCALL:
return f"Node(FUNCALL {self.value})"
+ if self.type == NodeType.UNARY:
+ return f"Node(UNARY {self.subtype})"
+ if self.type == NodeType.ASSIGNMENT:
+ return f"Node(ASSIGNMENT)"
+
+ return "Node(repr not defined)"
+
__str__ = __repr__
@@ -43,7 +50,7 @@ def parse_parenthesis(tokens, start):
if depth == 0 and token.type == TokenType.RIGHT_PARENTHESIS:
break
end += 1
- node = build_tree(tokens[start + 1: end])
+ node = parse_expr(tokens[start + 1: end])
return node, end
@@ -71,39 +78,45 @@ def parse_args(tokens, start):
if len(arg) != 0:
args.append(arg)
- return list(map(build_tree, args))
+ return list(map(parse_expr, args))
-def build_tree(tokens):
+def parse_expr(tokens, start):
state = State.NAME
- i = 0
+ i = start
current_node = None
while i < len(tokens):
token = tokens[i]
+
if state == State.OPERATOR and token.type == TokenType.LEFT_PARENTHESIS:
args = parse_args(tokens, i)
- current_node.type = FUNCALL
+ current_node.type = NodeType.FUNCALL
current_node.value = (current_node.value, args)
- elif token.type in [TokenType.LEFT_PARENTHESIS, TokenType.NUMBER, TokenType.SYMBOL] or token.subtype == UNARY:
+
+ elif token.type in [TokenType.LEFT_PARENTHESIS, TokenType.NUMBER, TokenType.SYMBOL] or token.subtype == TokenType.UNARY:
+
if token.type == TokenType.LEFT_PARENTHESIS:
node, i = parse_parenthesis(tokens, i)
state = State.NAME
+
elif token.type == TokenType.NUMBER:
- node = Node(TokenType.NUMBER, value=token.value)
+ node = Node(NodeType.NUMBER, value=token.value)
state = State.OPERATOR
+
elif token.type == TokenType.SYMBOL:
- node = Node(TokenType.SYMBOL, value=token.value)
+ node = Node(NodeType.SYMBOL, value=token.value)
state = State.OPERATOR
- elif token.subtype == UNARY:
- node = Node(TokenType.OPERATOR, subtype=UNARY)
- node.left = Node(TokenType.OPERATOR, subtype=token.value)
+
+ elif token.subtype == TokenType.UNARY:
+ node = Node(NodeType.OPERATOR, subtype=NodeType.UNARY)
+ node.left = Node(NodeType.OPERATOR, subtype=token.value)
state = State.NAME
if current_node is None:
current_node = node
- elif current_node.type == TokenType.NUMBER:
+ elif current_node.type == NodeType.NUMBER:
raise ValueError("Not a valid expression")
- elif current_node.type == TokenType.OPERATOR:
+ elif current_node.type == NodeType.OPERATOR:
if current_node.left is None:
current_node.left = node
node.parent = current_node
@@ -113,11 +126,12 @@ def build_tree(tokens):
current_node = node
else:
raise ValueError("Not a valid expression")
+
elif token.type == TokenType.OPERATOR:
- node = Node(token.type, subtype=token.subtype, value=token.value)
+ node = Node(NodeType.OPERATOR, subtype=token.subtype, value=token.value)
if current_node is None:
raise ValueError("Not a valid expression")
- elif current_node.type in [TokenType.NUMBER, TokenType.OPERATOR, FUNCALL]:
+ elif current_node.type in [NodeType.NUMBER, NodeType.OPERATOR, NodeType.FUNCALL]:
while current_node.parent is not None:
if PRECEDENCE[current_node.parent.subtype] < PRECEDENCE[node.subtype]:
node.parent = current_node.parent
@@ -129,11 +143,80 @@ def build_tree(tokens):
node.left = current_node
current_node = node
else:
- print(current_node, token)
raise ValueError("Not a valid expression")
state = State.NAME
+
+ elif token.type == TokenType.SEMICOLON:
+ break
+
i += 1
while current_node.parent is not None:
current_node = current_node.parent
- return current_node
+ return current_node, i - 1
+
+
+def parse_let_statement(tokens: list[Token], start: int):
+ class LetStates:
+ (WAIT_LET, WAIT_SYMBOL, WAIT_EQUALS, WAIT_EXPR, WAIT_SEMICOLON, PARSE_END, *_) = range(100)
+
+ root_node = Node(NodeType.ASSIGNMENT)
+ state = LetStates.WAIT_LET
+ i = start
+ while i < len(tokens):
+ token = tokens[i]
+ if state == LetStates.WAIT_LET:
+ if token.type == TokenType.KEYWORD and token.subtype == Keyword.LET:
+ state = LetStates.WAIT_SYMBOL
+ else:
+ raise ParserError(f"Waited for let, got {token}")
+
+ elif state == LetStates.WAIT_SYMBOL:
+ if token.type == TokenType.SYMBOL:
+ node = Node(NodeType.SYMBOL, value=token.value)
+ root_node.left = node
+ state = LetStates.WAIT_EQUALS
+ else:
+ raise ParserError(f"Waited for symbol, got {token}")
+
+ elif state == LetStates.WAIT_EQUALS:
+ if token.type == TokenType.EQUALS:
+ state = LetStates.WAIT_EXPR
+ else:
+ raise ParserError(f"Waited for equals, got {token}")
+
+ elif state == LetStates.WAIT_EXPR:
+ node, i = parse_expr(tokens, i)
+ root_node.right = node
+ state = LetStates.WAIT_SEMICOLON
+
+ elif state == LetStates.WAIT_SEMICOLON:
+ if token.type == TokenType.SEMICOLON:
+ state = LetStates.PARSE_END
+ break
+ else:
+ raise ParserError(f"Waited for semicolon, got {token}")
+
+ i += 1
+
+ if state != LetStates.PARSE_END:
+ raise ParserError("Statement ended unexpectedly")
+
+ return root_node, i
+
+
+def parse(tokens: list[Token]):
+ _statements = [
+ [Keyword.LET, TokenType.SYMBOL, TokenType.EQUALS, "EXPR", TokenType.SEMICOLON],
+ [Keyword.FN, TokenType.SYMBOL, TokenType.LEFT_PARENTHESIS, "ARGS", TokenType.RIGHT_PARENTHESIS, TokenType.LEFT_BRACE, ["STATEMENTS"], TokenType.RIGHT_BRACE],
+ ]
+
+ statements = []
+
+ i = 0
+ while i < len(tokens):
+ if tokens[i].type == TokenType.KEYWORD and tokens[i].subtype == Keyword.LET:
+ node, i = parse_let_statement(tokens, i)
+ statements.append(node)
+ i += 1
+ return statements