/**************************************************************************/ /* aethex_tokenizer.cpp */ /**************************************************************************/ /* This file is part of: */ /* AETHEX ENGINE */ /* https://aethex.foundation */ /**************************************************************************/ /* Copyright (c) 2026-present AeThex Labs. */ /**************************************************************************/ #include "aethex_tokenizer.h" #include char AeThexTokenizer::peek(int offset) const { int idx = pos + offset; if (idx >= source.length()) { return '\0'; } return source[idx]; } char AeThexTokenizer::advance() { char c = source[pos++]; if (c == '\n') { line++; column = 1; } else { column++; } return c; } bool AeThexTokenizer::match(char expected) { if (peek() == expected) { advance(); return true; } return false; } void AeThexTokenizer::skip_whitespace() { while (pos < source.length()) { char c = peek(); if (c == ' ' || c == '\t' || c == '\r') { advance(); } else if (c == '#') { skip_comment(); } else { break; } } } void AeThexTokenizer::skip_comment() { // Skip until end of line while (pos < source.length() && peek() != '\n') { advance(); } } AeThexTokenizer::Token AeThexTokenizer::make_token(TokenType type, const String &value) { Token token; token.type = type; token.value = value; token.line = line; token.column = column; return token; } AeThexTokenizer::Token AeThexTokenizer::scan_string(char quote) { String value; while (pos < source.length() && peek() != quote) { if (peek() == '\\' && pos + 1 < source.length()) { advance(); // Skip backslash char escaped = advance(); switch (escaped) { case 'n': value += '\n'; break; case 't': value += '\t'; break; case 'r': value += '\r'; break; case '\\': value += '\\'; break; default: value += escaped; break; } } else { value += advance(); } } if (pos >= source.length()) { return make_token(TK_ERROR, "Unterminated string"); } advance(); // Closing quote return make_token(quote == '`' ? TK_TEMPLATE_STRING : TK_STRING, value); } AeThexTokenizer::Token AeThexTokenizer::scan_number() { String value; while (pos < source.length() && (isdigit(peek()) || peek() == '.')) { value += advance(); } return make_token(TK_NUMBER, value); } AeThexTokenizer::Token AeThexTokenizer::scan_identifier() { String value; while (pos < source.length() && (isalnum(peek()) || peek() == '_')) { value += advance(); } TokenType type = check_keyword(value); return make_token(type, value); } AeThexTokenizer::TokenType AeThexTokenizer::check_keyword(const String &identifier) { // Core constructs if (identifier == "reality") return TK_REALITY; if (identifier == "journey") return TK_JOURNEY; if (identifier == "portal") return TK_PORTAL; if (identifier == "beacon") return TK_BEACON; if (identifier == "artifact") return TK_ARTIFACT; if (identifier == "essence") return TK_ESSENCE; if (identifier == "chronicle") return TK_CHRONICLE; // Control flow if (identifier == "when") return TK_WHEN; if (identifier == "otherwise") return TK_OTHERWISE; if (identifier == "traverse") return TK_TRAVERSE; if (identifier == "while") return TK_WHILE; if (identifier == "break") return TK_BREAK; if (identifier == "continue") return TK_CONTINUE; if (identifier == "return") return TK_RETURN; if (identifier == "yield") return TK_YIELD; // Data if (identifier == "let") return TK_LET; if (identifier == "const") return TK_CONST; if (identifier == "mut") return TK_MUT; if (identifier == "new") return TK_NEW; // Platform if (identifier == "platform") return TK_PLATFORM; if (identifier == "sync") return TK_SYNC; if (identifier == "async") return TK_ASYNC; if (identifier == "await") return TK_AWAIT; if (identifier == "across") return TK_ACROSS; if (identifier == "all") return TK_ALL; // Actions if (identifier == "notify") return TK_NOTIFY; if (identifier == "reveal") return TK_REVEAL; if (identifier == "summon") return TK_SUMMON; if (identifier == "banish") return TK_BANISH; // Literals if (identifier == "true") return TK_TRUE; if (identifier == "false") return TK_FALSE; if (identifier == "null") return TK_NULL; if (identifier == "self") return TK_SELF; if (identifier == "super") return TK_SUPER; // Logical operators if (identifier == "and") return TK_AND; if (identifier == "or") return TK_OR; if (identifier == "not") return TK_NOT; return TK_IDENTIFIER; } Error AeThexTokenizer::tokenize(const String &p_source) { source = p_source; pos = 0; line = 1; column = 1; tokens.clear(); while (pos < source.length()) { skip_whitespace(); if (pos >= source.length()) { break; } char c = peek(); // Newline if (c == '\n') { tokens.push_back(make_token(TK_NEWLINE)); advance(); continue; } // String literals if (c == '"' || c == '\'' || c == '`') { advance(); tokens.push_back(scan_string(c)); continue; } // Numbers if (isdigit(c)) { tokens.push_back(scan_number()); continue; } // Identifiers and keywords if (isalpha(c) || c == '_') { tokens.push_back(scan_identifier()); continue; } // Operators and punctuation advance(); switch (c) { case '+': if (match('=')) tokens.push_back(make_token(TK_PLUS_EQUAL)); else tokens.push_back(make_token(TK_PLUS)); break; case '-': if (match('>')) tokens.push_back(make_token(TK_ARROW)); else if (match('=')) tokens.push_back(make_token(TK_MINUS_EQUAL)); else tokens.push_back(make_token(TK_MINUS)); break; case '*': tokens.push_back(make_token(TK_STAR)); break; case '/': tokens.push_back(make_token(TK_SLASH)); break; case '%': tokens.push_back(make_token(TK_PERCENT)); break; case '^': tokens.push_back(make_token(TK_CARET)); break; case '=': if (match('=')) tokens.push_back(make_token(TK_EQUAL_EQUAL)); else if (match('>')) tokens.push_back(make_token(TK_FAT_ARROW)); else tokens.push_back(make_token(TK_EQUAL)); break; case '!': if (match('=')) tokens.push_back(make_token(TK_NOT_EQUAL)); else tokens.push_back(make_token(TK_NOT)); break; case '<': if (match('=')) tokens.push_back(make_token(TK_LESS_EQUAL)); else tokens.push_back(make_token(TK_LESS)); break; case '>': if (match('=')) tokens.push_back(make_token(TK_GREATER_EQUAL)); else tokens.push_back(make_token(TK_GREATER)); break; case '&': if (match('&')) tokens.push_back(make_token(TK_AND)); break; case '|': if (match('|')) tokens.push_back(make_token(TK_OR)); break; case ':': tokens.push_back(make_token(TK_COLON)); break; case ';': tokens.push_back(make_token(TK_SEMICOLON)); break; case ',': tokens.push_back(make_token(TK_COMMA)); break; case '.': tokens.push_back(make_token(TK_DOT)); break; case '(': tokens.push_back(make_token(TK_PAREN_OPEN)); break; case ')': tokens.push_back(make_token(TK_PAREN_CLOSE)); break; case '[': tokens.push_back(make_token(TK_BRACKET_OPEN)); break; case ']': tokens.push_back(make_token(TK_BRACKET_CLOSE)); break; case '{': tokens.push_back(make_token(TK_BRACE_OPEN)); break; case '}': tokens.push_back(make_token(TK_BRACE_CLOSE)); break; default: tokens.push_back(make_token(TK_ERROR, String("Unexpected character: ") + c)); break; } } tokens.push_back(make_token(TK_EOF)); return OK; } String AeThexTokenizer::token_type_to_string(TokenType type) { switch (type) { case TK_IDENTIFIER: return "IDENTIFIER"; case TK_NUMBER: return "NUMBER"; case TK_STRING: return "STRING"; case TK_REALITY: return "REALITY"; case TK_JOURNEY: return "JOURNEY"; case TK_WHEN: return "WHEN"; case TK_NOTIFY: return "NOTIFY"; case TK_EOF: return "EOF"; default: return "UNKNOWN"; } }