AeThex-Engine-Core/engine/modules/aethex_lang/aethex_parser.cpp
2026-02-24 01:55:30 -07:00

678 lines
19 KiB
C++

/**************************************************************************/
/* aethex_parser.cpp */
/**************************************************************************/
/* This file is part of: */
/* AETHEX ENGINE */
/* https://aethex.foundation */
/**************************************************************************/
/* Copyright (c) 2026-present AeThex Labs. */
/**************************************************************************/
#include "aethex_parser.h"
bool AeThexParser::is_at_end() const {
return current >= tokens.size() || tokens[current].type == AeThexTokenizer::TK_EOF;
}
AeThexTokenizer::Token AeThexParser::peek() const {
if (current >= tokens.size()) {
AeThexTokenizer::Token eof;
eof.type = AeThexTokenizer::TK_EOF;
return eof;
}
return tokens[current];
}
AeThexTokenizer::Token AeThexParser::previous() const {
return tokens[current - 1];
}
AeThexTokenizer::Token AeThexParser::advance() {
if (!is_at_end()) {
current++;
}
return previous();
}
bool AeThexParser::check(AeThexTokenizer::TokenType type) const {
if (is_at_end()) return false;
return peek().type == type;
}
bool AeThexParser::match(AeThexTokenizer::TokenType type) {
if (check(type)) {
advance();
return true;
}
return false;
}
void AeThexParser::consume(AeThexTokenizer::TokenType type, const String &message) {
if (check(type)) {
advance();
return;
}
error(message);
}
void AeThexParser::error(const String &message) {
ParseError err;
err.message = message;
err.line = peek().line;
err.column = peek().column;
errors.push_back(err);
}
void AeThexParser::synchronize() {
advance();
while (!is_at_end()) {
if (previous().type == AeThexTokenizer::TK_NEWLINE) return;
switch (peek().type) {
case AeThexTokenizer::TK_REALITY:
case AeThexTokenizer::TK_JOURNEY:
case AeThexTokenizer::TK_BEACON:
case AeThexTokenizer::TK_ARTIFACT:
case AeThexTokenizer::TK_WHEN:
case AeThexTokenizer::TK_TRAVERSE:
case AeThexTokenizer::TK_WHILE:
case AeThexTokenizer::TK_RETURN:
return;
default:
break;
}
advance();
}
}
Error AeThexParser::parse(const Vector<AeThexTokenizer::Token> &p_tokens) {
tokens = p_tokens;
current = 0;
errors.clear();
if (root) {
memdelete(root);
root = nullptr;
}
// Skip initial newlines
while (match(AeThexTokenizer::TK_NEWLINE)) {}
// Parse top-level reality block
if (check(AeThexTokenizer::TK_REALITY)) {
root = parse_reality();
} else {
error("Expected 'reality' block at start of file");
return ERR_PARSE_ERROR;
}
if (!errors.is_empty()) {
return ERR_PARSE_ERROR;
}
return OK;
}
AeThexParser::ASTNode *AeThexParser::parse_reality() {
consume(AeThexTokenizer::TK_REALITY, "Expected 'reality'");
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_REALITY;
node->line = previous().line;
// Get reality name
consume(AeThexTokenizer::TK_IDENTIFIER, "Expected reality name");
node->value = previous().value;
// Parse block
consume(AeThexTokenizer::TK_BRACE_OPEN, "Expected '{' after reality name");
while (!check(AeThexTokenizer::TK_BRACE_CLOSE) && !is_at_end()) {
// Skip newlines
while (match(AeThexTokenizer::TK_NEWLINE)) {}
if (check(AeThexTokenizer::TK_BRACE_CLOSE)) break;
if (check(AeThexTokenizer::TK_JOURNEY)) {
node->children.push_back(parse_journey());
} else if (check(AeThexTokenizer::TK_BEACON)) {
node->children.push_back(parse_beacon());
} else if (check(AeThexTokenizer::TK_IDENTIFIER)) {
// Attribute like "platforms: [...]"
String attr_name = advance().value;
consume(AeThexTokenizer::TK_COLON, "Expected ':' after attribute name");
// Simple value parsing
if (check(AeThexTokenizer::TK_BRACKET_OPEN)) {
advance();
String values;
while (!check(AeThexTokenizer::TK_BRACKET_CLOSE) && !is_at_end()) {
if (check(AeThexTokenizer::TK_IDENTIFIER)) {
if (!values.is_empty()) values += ",";
values += advance().value;
}
match(AeThexTokenizer::TK_COMMA);
}
consume(AeThexTokenizer::TK_BRACKET_CLOSE, "Expected ']'");
node->attributes[attr_name] = values;
} else if (check(AeThexTokenizer::TK_IDENTIFIER)) {
node->attributes[attr_name] = advance().value;
}
} else {
// Try to parse as statement
ASTNode *stmt = parse_statement();
if (stmt) {
node->children.push_back(stmt);
}
}
}
consume(AeThexTokenizer::TK_BRACE_CLOSE, "Expected '}' after reality block");
return node;
}
AeThexParser::ASTNode *AeThexParser::parse_journey() {
consume(AeThexTokenizer::TK_JOURNEY, "Expected 'journey'");
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_JOURNEY;
node->line = previous().line;
// Get function name
consume(AeThexTokenizer::TK_IDENTIFIER, "Expected journey name");
node->value = previous().value;
// Parse parameters
consume(AeThexTokenizer::TK_PAREN_OPEN, "Expected '(' after journey name");
while (!check(AeThexTokenizer::TK_PAREN_CLOSE) && !is_at_end()) {
if (check(AeThexTokenizer::TK_IDENTIFIER)) {
ASTNode *param = memnew(ASTNode);
param->type = ASTNode::NODE_IDENTIFIER;
param->value = advance().value;
node->children.push_back(param);
}
match(AeThexTokenizer::TK_COMMA);
}
consume(AeThexTokenizer::TK_PAREN_CLOSE, "Expected ')' after parameters");
// Parse body
consume(AeThexTokenizer::TK_BRACE_OPEN, "Expected '{' before journey body");
while (!check(AeThexTokenizer::TK_BRACE_CLOSE) && !is_at_end()) {
while (match(AeThexTokenizer::TK_NEWLINE)) {}
if (check(AeThexTokenizer::TK_BRACE_CLOSE)) break;
ASTNode *stmt = parse_statement();
if (stmt) {
node->children.push_back(stmt);
}
}
consume(AeThexTokenizer::TK_BRACE_CLOSE, "Expected '}' after journey body");
return node;
}
AeThexParser::ASTNode *AeThexParser::parse_beacon() {
consume(AeThexTokenizer::TK_BEACON, "Expected 'beacon'");
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_BEACON;
node->line = previous().line;
consume(AeThexTokenizer::TK_IDENTIFIER, "Expected beacon name");
node->value = previous().value;
// Parse parameters if present
if (match(AeThexTokenizer::TK_PAREN_OPEN)) {
while (!check(AeThexTokenizer::TK_PAREN_CLOSE) && !is_at_end()) {
if (check(AeThexTokenizer::TK_IDENTIFIER)) {
ASTNode *param = memnew(ASTNode);
param->type = ASTNode::NODE_IDENTIFIER;
param->value = advance().value;
node->children.push_back(param);
}
match(AeThexTokenizer::TK_COMMA);
}
consume(AeThexTokenizer::TK_PAREN_CLOSE, "Expected ')' after beacon parameters");
}
return node;
}
AeThexParser::ASTNode *AeThexParser::parse_statement() {
// Skip newlines
while (match(AeThexTokenizer::TK_NEWLINE)) {}
if (match(AeThexTokenizer::TK_LET) || match(AeThexTokenizer::TK_CONST)) {
bool is_const = previous().type == AeThexTokenizer::TK_CONST;
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_VARIABLE;
node->line = previous().line;
node->attributes["const"] = is_const ? "true" : "false";
consume(AeThexTokenizer::TK_IDENTIFIER, "Expected variable name");
node->value = previous().value;
if (match(AeThexTokenizer::TK_EQUAL)) {
node->children.push_back(parse_expression());
}
return node;
}
if (match(AeThexTokenizer::TK_NOTIFY)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_NOTIFY;
node->line = previous().line;
node->children.push_back(parse_expression());
return node;
}
if (match(AeThexTokenizer::TK_REVEAL)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_REVEAL;
node->line = previous().line;
if (!check(AeThexTokenizer::TK_NEWLINE) && !check(AeThexTokenizer::TK_BRACE_CLOSE)) {
node->children.push_back(parse_expression());
}
return node;
}
if (match(AeThexTokenizer::TK_WHEN)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_IF;
node->line = previous().line;
// Condition
node->children.push_back(parse_expression());
// Then block
consume(AeThexTokenizer::TK_BRACE_OPEN, "Expected '{' after when condition");
ASTNode *then_block = memnew(ASTNode);
then_block->type = ASTNode::NODE_LITERAL;
then_block->value = "then";
while (!check(AeThexTokenizer::TK_BRACE_CLOSE) && !is_at_end()) {
while (match(AeThexTokenizer::TK_NEWLINE)) {}
if (check(AeThexTokenizer::TK_BRACE_CLOSE)) break;
ASTNode *stmt = parse_statement();
if (stmt) then_block->children.push_back(stmt);
}
consume(AeThexTokenizer::TK_BRACE_CLOSE, "Expected '}' after when block");
node->children.push_back(then_block);
// Otherwise (else) block
while (match(AeThexTokenizer::TK_NEWLINE)) {}
if (match(AeThexTokenizer::TK_OTHERWISE)) {
consume(AeThexTokenizer::TK_BRACE_OPEN, "Expected '{' after otherwise");
ASTNode *else_block = memnew(ASTNode);
else_block->type = ASTNode::NODE_LITERAL;
else_block->value = "else";
while (!check(AeThexTokenizer::TK_BRACE_CLOSE) && !is_at_end()) {
while (match(AeThexTokenizer::TK_NEWLINE)) {}
if (check(AeThexTokenizer::TK_BRACE_CLOSE)) break;
ASTNode *stmt = parse_statement();
if (stmt) else_block->children.push_back(stmt);
}
consume(AeThexTokenizer::TK_BRACE_CLOSE, "Expected '}' after otherwise block");
node->children.push_back(else_block);
}
return node;
}
if (match(AeThexTokenizer::TK_SYNC)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_SYNC;
node->line = previous().line;
node->children.push_back(parse_expression());
if (match(AeThexTokenizer::TK_ACROSS)) {
if (match(AeThexTokenizer::TK_ALL)) {
node->attributes["targets"] = "all";
} else if (match(AeThexTokenizer::TK_BRACKET_OPEN)) {
String targets;
while (!check(AeThexTokenizer::TK_BRACKET_CLOSE) && !is_at_end()) {
if (check(AeThexTokenizer::TK_IDENTIFIER)) {
if (!targets.is_empty()) targets += ",";
targets += advance().value;
}
match(AeThexTokenizer::TK_COMMA);
}
consume(AeThexTokenizer::TK_BRACKET_CLOSE, "Expected ']'");
node->attributes["targets"] = targets;
}
}
return node;
}
// Expression statement
ASTNode *expr = parse_expression();
return expr;
}
AeThexParser::ASTNode *AeThexParser::parse_expression() {
return parse_assignment();
}
AeThexParser::ASTNode *AeThexParser::parse_assignment() {
ASTNode *expr = parse_or();
if (match(AeThexTokenizer::TK_EQUAL)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_ASSIGNMENT;
node->line = previous().line;
node->children.push_back(expr);
node->children.push_back(parse_assignment());
return node;
}
return expr;
}
AeThexParser::ASTNode *AeThexParser::parse_or() {
ASTNode *left = parse_and();
while (match(AeThexTokenizer::TK_OR)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_BINARY_OP;
node->value = "or";
node->line = previous().line;
node->children.push_back(left);
node->children.push_back(parse_and());
left = node;
}
return left;
}
AeThexParser::ASTNode *AeThexParser::parse_and() {
ASTNode *left = parse_equality();
while (match(AeThexTokenizer::TK_AND)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_BINARY_OP;
node->value = "and";
node->line = previous().line;
node->children.push_back(left);
node->children.push_back(parse_equality());
left = node;
}
return left;
}
AeThexParser::ASTNode *AeThexParser::parse_equality() {
ASTNode *left = parse_comparison();
while (match(AeThexTokenizer::TK_EQUAL_EQUAL) || match(AeThexTokenizer::TK_NOT_EQUAL)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_BINARY_OP;
node->value = previous().type == AeThexTokenizer::TK_EQUAL_EQUAL ? "==" : "!=";
node->line = previous().line;
node->children.push_back(left);
node->children.push_back(parse_comparison());
left = node;
}
return left;
}
AeThexParser::ASTNode *AeThexParser::parse_comparison() {
ASTNode *left = parse_term();
while (match(AeThexTokenizer::TK_LESS) || match(AeThexTokenizer::TK_LESS_EQUAL) ||
match(AeThexTokenizer::TK_GREATER) || match(AeThexTokenizer::TK_GREATER_EQUAL)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_BINARY_OP;
switch (previous().type) {
case AeThexTokenizer::TK_LESS: node->value = "<"; break;
case AeThexTokenizer::TK_LESS_EQUAL: node->value = "<="; break;
case AeThexTokenizer::TK_GREATER: node->value = ">"; break;
case AeThexTokenizer::TK_GREATER_EQUAL: node->value = ">="; break;
default: break;
}
node->line = previous().line;
node->children.push_back(left);
node->children.push_back(parse_term());
left = node;
}
return left;
}
AeThexParser::ASTNode *AeThexParser::parse_term() {
ASTNode *left = parse_factor();
while (match(AeThexTokenizer::TK_PLUS) || match(AeThexTokenizer::TK_MINUS)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_BINARY_OP;
node->value = previous().type == AeThexTokenizer::TK_PLUS ? "+" : "-";
node->line = previous().line;
node->children.push_back(left);
node->children.push_back(parse_factor());
left = node;
}
return left;
}
AeThexParser::ASTNode *AeThexParser::parse_factor() {
ASTNode *left = parse_unary();
while (match(AeThexTokenizer::TK_STAR) || match(AeThexTokenizer::TK_SLASH) ||
match(AeThexTokenizer::TK_PERCENT)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_BINARY_OP;
switch (previous().type) {
case AeThexTokenizer::TK_STAR: node->value = "*"; break;
case AeThexTokenizer::TK_SLASH: node->value = "/"; break;
case AeThexTokenizer::TK_PERCENT: node->value = "%"; break;
default: break;
}
node->line = previous().line;
node->children.push_back(left);
node->children.push_back(parse_unary());
left = node;
}
return left;
}
AeThexParser::ASTNode *AeThexParser::parse_unary() {
if (match(AeThexTokenizer::TK_NOT) || match(AeThexTokenizer::TK_MINUS)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_UNARY_OP;
node->value = previous().type == AeThexTokenizer::TK_NOT ? "not" : "-";
node->line = previous().line;
node->children.push_back(parse_unary());
return node;
}
return parse_call();
}
AeThexParser::ASTNode *AeThexParser::parse_call() {
ASTNode *expr = parse_primary();
while (true) {
if (match(AeThexTokenizer::TK_PAREN_OPEN)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_CALL;
node->line = previous().line;
node->children.push_back(expr);
// Arguments
while (!check(AeThexTokenizer::TK_PAREN_CLOSE) && !is_at_end()) {
node->children.push_back(parse_expression());
match(AeThexTokenizer::TK_COMMA);
}
consume(AeThexTokenizer::TK_PAREN_CLOSE, "Expected ')' after arguments");
expr = node;
} else if (match(AeThexTokenizer::TK_DOT)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_MEMBER;
node->line = previous().line;
node->children.push_back(expr);
consume(AeThexTokenizer::TK_IDENTIFIER, "Expected property name after '.'");
ASTNode *member = memnew(ASTNode);
member->type = ASTNode::NODE_IDENTIFIER;
member->value = previous().value;
node->children.push_back(member);
expr = node;
} else if (match(AeThexTokenizer::TK_BRACKET_OPEN)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_MEMBER;
node->line = previous().line;
node->children.push_back(expr);
node->children.push_back(parse_expression());
consume(AeThexTokenizer::TK_BRACKET_CLOSE, "Expected ']' after index");
expr = node;
} else {
break;
}
}
return expr;
}
AeThexParser::ASTNode *AeThexParser::parse_primary() {
if (match(AeThexTokenizer::TK_TRUE)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_LITERAL;
node->value = "true";
node->line = previous().line;
return node;
}
if (match(AeThexTokenizer::TK_FALSE)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_LITERAL;
node->value = "false";
node->line = previous().line;
return node;
}
if (match(AeThexTokenizer::TK_NULL)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_LITERAL;
node->value = "null";
node->line = previous().line;
return node;
}
if (match(AeThexTokenizer::TK_NUMBER)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_LITERAL;
node->value = previous().value;
node->attributes["type"] = "number";
node->line = previous().line;
return node;
}
if (match(AeThexTokenizer::TK_STRING) || match(AeThexTokenizer::TK_TEMPLATE_STRING)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_LITERAL;
node->value = previous().value;
node->attributes["type"] = "string";
node->line = previous().line;
return node;
}
if (match(AeThexTokenizer::TK_IDENTIFIER)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_IDENTIFIER;
node->value = previous().value;
node->line = previous().line;
return node;
}
if (match(AeThexTokenizer::TK_SELF)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_IDENTIFIER;
node->value = "self";
node->line = previous().line;
return node;
}
if (match(AeThexTokenizer::TK_PAREN_OPEN)) {
ASTNode *expr = parse_expression();
consume(AeThexTokenizer::TK_PAREN_CLOSE, "Expected ')' after expression");
return expr;
}
if (match(AeThexTokenizer::TK_BRACKET_OPEN)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_ARRAY;
node->line = previous().line;
while (!check(AeThexTokenizer::TK_BRACKET_CLOSE) && !is_at_end()) {
node->children.push_back(parse_expression());
match(AeThexTokenizer::TK_COMMA);
}
consume(AeThexTokenizer::TK_BRACKET_CLOSE, "Expected ']' after array");
return node;
}
if (match(AeThexTokenizer::TK_BRACE_OPEN)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_DICT;
node->line = previous().line;
while (!check(AeThexTokenizer::TK_BRACE_CLOSE) && !is_at_end()) {
// Key
ASTNode *key = parse_expression();
consume(AeThexTokenizer::TK_COLON, "Expected ':' after dictionary key");
ASTNode *value = parse_expression();
node->children.push_back(key);
node->children.push_back(value);
match(AeThexTokenizer::TK_COMMA);
}
consume(AeThexTokenizer::TK_BRACE_CLOSE, "Expected '}' after dictionary");
return node;
}
if (match(AeThexTokenizer::TK_NEW)) {
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_CALL;
node->value = "new";
node->line = previous().line;
consume(AeThexTokenizer::TK_IDENTIFIER, "Expected class name after 'new'");
ASTNode *class_name = memnew(ASTNode);
class_name->type = ASTNode::NODE_IDENTIFIER;
class_name->value = previous().value;
node->children.push_back(class_name);
if (match(AeThexTokenizer::TK_PAREN_OPEN)) {
while (!check(AeThexTokenizer::TK_PAREN_CLOSE) && !is_at_end()) {
node->children.push_back(parse_expression());
match(AeThexTokenizer::TK_COMMA);
}
consume(AeThexTokenizer::TK_PAREN_CLOSE, "Expected ')' after constructor arguments");
}
return node;
}
// No match - create an error node
error("Expected expression");
ASTNode *node = memnew(ASTNode);
node->type = ASTNode::NODE_LITERAL;
node->value = "";
node->line = peek().line;
advance();
return node;
}