AeThex-Engine-Core/engine/modules/aethex_lang/aethex_tokenizer.cpp
2026-02-24 01:55:30 -07:00

283 lines
8 KiB
C++

/**************************************************************************/
/* aethex_tokenizer.cpp */
/**************************************************************************/
/* This file is part of: */
/* AETHEX ENGINE */
/* https://aethex.foundation */
/**************************************************************************/
/* Copyright (c) 2026-present AeThex Labs. */
/**************************************************************************/
#include "aethex_tokenizer.h"
#include <cctype>
char AeThexTokenizer::peek(int offset) const {
int idx = pos + offset;
if (idx >= source.length()) {
return '\0';
}
return source[idx];
}
char AeThexTokenizer::advance() {
char c = source[pos++];
if (c == '\n') {
line++;
column = 1;
} else {
column++;
}
return c;
}
bool AeThexTokenizer::match(char expected) {
if (peek() == expected) {
advance();
return true;
}
return false;
}
void AeThexTokenizer::skip_whitespace() {
while (pos < source.length()) {
char c = peek();
if (c == ' ' || c == '\t' || c == '\r') {
advance();
} else if (c == '#') {
skip_comment();
} else {
break;
}
}
}
void AeThexTokenizer::skip_comment() {
// Skip until end of line
while (pos < source.length() && peek() != '\n') {
advance();
}
}
AeThexTokenizer::Token AeThexTokenizer::make_token(TokenType type, const String &value) {
Token token;
token.type = type;
token.value = value;
token.line = line;
token.column = column;
return token;
}
AeThexTokenizer::Token AeThexTokenizer::scan_string(char quote) {
String value;
while (pos < source.length() && peek() != quote) {
if (peek() == '\\' && pos + 1 < source.length()) {
advance(); // Skip backslash
char escaped = advance();
switch (escaped) {
case 'n': value += '\n'; break;
case 't': value += '\t'; break;
case 'r': value += '\r'; break;
case '\\': value += '\\'; break;
default: value += escaped; break;
}
} else {
value += advance();
}
}
if (pos >= source.length()) {
return make_token(TK_ERROR, "Unterminated string");
}
advance(); // Closing quote
return make_token(quote == '`' ? TK_TEMPLATE_STRING : TK_STRING, value);
}
AeThexTokenizer::Token AeThexTokenizer::scan_number() {
String value;
while (pos < source.length() && (isdigit(peek()) || peek() == '.')) {
value += advance();
}
return make_token(TK_NUMBER, value);
}
AeThexTokenizer::Token AeThexTokenizer::scan_identifier() {
String value;
while (pos < source.length() && (isalnum(peek()) || peek() == '_')) {
value += advance();
}
TokenType type = check_keyword(value);
return make_token(type, value);
}
AeThexTokenizer::TokenType AeThexTokenizer::check_keyword(const String &identifier) {
// Core constructs
if (identifier == "reality") return TK_REALITY;
if (identifier == "journey") return TK_JOURNEY;
if (identifier == "portal") return TK_PORTAL;
if (identifier == "beacon") return TK_BEACON;
if (identifier == "artifact") return TK_ARTIFACT;
if (identifier == "essence") return TK_ESSENCE;
if (identifier == "chronicle") return TK_CHRONICLE;
// Control flow
if (identifier == "when") return TK_WHEN;
if (identifier == "otherwise") return TK_OTHERWISE;
if (identifier == "traverse") return TK_TRAVERSE;
if (identifier == "while") return TK_WHILE;
if (identifier == "break") return TK_BREAK;
if (identifier == "continue") return TK_CONTINUE;
if (identifier == "return") return TK_RETURN;
if (identifier == "yield") return TK_YIELD;
// Data
if (identifier == "let") return TK_LET;
if (identifier == "const") return TK_CONST;
if (identifier == "mut") return TK_MUT;
if (identifier == "new") return TK_NEW;
// Platform
if (identifier == "platform") return TK_PLATFORM;
if (identifier == "sync") return TK_SYNC;
if (identifier == "async") return TK_ASYNC;
if (identifier == "await") return TK_AWAIT;
if (identifier == "across") return TK_ACROSS;
if (identifier == "all") return TK_ALL;
// Actions
if (identifier == "notify") return TK_NOTIFY;
if (identifier == "reveal") return TK_REVEAL;
if (identifier == "summon") return TK_SUMMON;
if (identifier == "banish") return TK_BANISH;
// Literals
if (identifier == "true") return TK_TRUE;
if (identifier == "false") return TK_FALSE;
if (identifier == "null") return TK_NULL;
if (identifier == "self") return TK_SELF;
if (identifier == "super") return TK_SUPER;
// Logical operators
if (identifier == "and") return TK_AND;
if (identifier == "or") return TK_OR;
if (identifier == "not") return TK_NOT;
return TK_IDENTIFIER;
}
Error AeThexTokenizer::tokenize(const String &p_source) {
source = p_source;
pos = 0;
line = 1;
column = 1;
tokens.clear();
while (pos < source.length()) {
skip_whitespace();
if (pos >= source.length()) {
break;
}
char c = peek();
// Newline
if (c == '\n') {
tokens.push_back(make_token(TK_NEWLINE));
advance();
continue;
}
// String literals
if (c == '"' || c == '\'' || c == '`') {
advance();
tokens.push_back(scan_string(c));
continue;
}
// Numbers
if (isdigit(c)) {
tokens.push_back(scan_number());
continue;
}
// Identifiers and keywords
if (isalpha(c) || c == '_') {
tokens.push_back(scan_identifier());
continue;
}
// Operators and punctuation
advance();
switch (c) {
case '+':
if (match('=')) tokens.push_back(make_token(TK_PLUS_EQUAL));
else tokens.push_back(make_token(TK_PLUS));
break;
case '-':
if (match('>')) tokens.push_back(make_token(TK_ARROW));
else if (match('=')) tokens.push_back(make_token(TK_MINUS_EQUAL));
else tokens.push_back(make_token(TK_MINUS));
break;
case '*': tokens.push_back(make_token(TK_STAR)); break;
case '/': tokens.push_back(make_token(TK_SLASH)); break;
case '%': tokens.push_back(make_token(TK_PERCENT)); break;
case '^': tokens.push_back(make_token(TK_CARET)); break;
case '=':
if (match('=')) tokens.push_back(make_token(TK_EQUAL_EQUAL));
else if (match('>')) tokens.push_back(make_token(TK_FAT_ARROW));
else tokens.push_back(make_token(TK_EQUAL));
break;
case '!':
if (match('=')) tokens.push_back(make_token(TK_NOT_EQUAL));
else tokens.push_back(make_token(TK_NOT));
break;
case '<':
if (match('=')) tokens.push_back(make_token(TK_LESS_EQUAL));
else tokens.push_back(make_token(TK_LESS));
break;
case '>':
if (match('=')) tokens.push_back(make_token(TK_GREATER_EQUAL));
else tokens.push_back(make_token(TK_GREATER));
break;
case '&':
if (match('&')) tokens.push_back(make_token(TK_AND));
break;
case '|':
if (match('|')) tokens.push_back(make_token(TK_OR));
break;
case ':': tokens.push_back(make_token(TK_COLON)); break;
case ';': tokens.push_back(make_token(TK_SEMICOLON)); break;
case ',': tokens.push_back(make_token(TK_COMMA)); break;
case '.': tokens.push_back(make_token(TK_DOT)); break;
case '(': tokens.push_back(make_token(TK_PAREN_OPEN)); break;
case ')': tokens.push_back(make_token(TK_PAREN_CLOSE)); break;
case '[': tokens.push_back(make_token(TK_BRACKET_OPEN)); break;
case ']': tokens.push_back(make_token(TK_BRACKET_CLOSE)); break;
case '{': tokens.push_back(make_token(TK_BRACE_OPEN)); break;
case '}': tokens.push_back(make_token(TK_BRACE_CLOSE)); break;
default:
tokens.push_back(make_token(TK_ERROR, String("Unexpected character: ") + c));
break;
}
}
tokens.push_back(make_token(TK_EOF));
return OK;
}
String AeThexTokenizer::token_type_to_string(TokenType type) {
switch (type) {
case TK_IDENTIFIER: return "IDENTIFIER";
case TK_NUMBER: return "NUMBER";
case TK_STRING: return "STRING";
case TK_REALITY: return "REALITY";
case TK_JOURNEY: return "JOURNEY";
case TK_WHEN: return "WHEN";
case TK_NOTIFY: return "NOTIFY";
case TK_EOF: return "EOF";
default: return "UNKNOWN";
}
}