AeThex-Engine-Core/engine/misc/scripts/unicode_ranges_fetch.py
MrPiglr 9dddce666d
🚀 AeThex Engine v1.0 - Complete Fork
- Forked from Godot Engine 4.7-dev (MIT License)
- Rebranded to AeThex Engine with cyan/purple theme
- Added AI-powered development assistant module
- Integrated Claude API for code completion & error fixing
- Custom hexagon logo and branding
- Multi-platform CI/CD (Windows, Linux, macOS)
- Built Linux editor binary (151MB)
- Complete source code with all customizations

Tech Stack:
- C++ game engine core
- AI Module: Claude 3.5 Sonnet integration
- Build: SCons, 14K+ source files
- License: MIT (Godot) + Custom (AeThex features)

Ready for Windows build via GitHub Actions!
2026-02-23 05:01:56 +00:00

100 lines
2.5 KiB
Python
Executable file

#!/usr/bin/env python3
# Script used to dump char ranges from
# the Unicode Character Database to the `unicode_ranges.inc` file.
# NOTE: This script is deliberately not integrated into the build system;
# you should run it manually whenever you want to update the data.
from __future__ import annotations
import os
import sys
from typing import Final
from urllib.request import urlopen
if __name__ == "__main__":
sys.path.insert(1, os.path.join(os.path.dirname(__file__), "../../"))
from methods import generate_copyright_header
URL: Final[str] = "https://www.unicode.org/Public/17.0.0/ucd/Blocks.txt"
ranges: list[tuple[str, str, str]] = []
exclude_blocks: set[str] = {
"High Surrogates",
"High Private Use Surrogates",
"Low Surrogates",
"Variation Selectors",
"Specials",
"Egyptian Hieroglyph Format Controls",
"Tags",
"Variation Selectors Supplement",
}
def parse_unicode_data() -> None:
lines: list[str] = [line.decode("utf-8") for line in urlopen(URL)]
for line in lines:
if line.startswith("#") or not line.strip():
continue
split_line: list[str] = line.split(";")
char_range: str = split_line[0].strip()
block: str = split_line[1].strip()
if block in exclude_blocks:
continue
range_start, range_end = char_range.split("..")
ranges.append((f"0x{range_start}", f"0x{range_end}", block))
def make_array(array_name: str, ranges: list[tuple[str, str, str]]) -> str:
result: str = f"static UniRange {array_name}[] = {{\n"
for start, end, block in ranges:
result += f'\t{{ {start}, {end}, U"{block}" }},\n'
result += """\t{ 0x10FFFF, 0x10FFFF, String() }
};\n\n"""
return result
def generate_unicode_ranges_inc() -> None:
parse_unicode_data()
source: str = generate_copyright_header("unicode_ranges.inc")
source += f"""
// This file was generated using the `misc/scripts/unicode_ranges_fetch.py` script.
#ifndef UNICODE_RANGES_INC
#define UNICODE_RANGES_INC
// Unicode Character Blocks
// Source: {URL}
struct UniRange {{
\tint32_t start;
\tint32_t end;
\tString name;
}};\n\n"""
source += make_array("unicode_ranges", ranges)
source += "#endif // UNICODE_RANGES_INC\n"
unicode_ranges_path: str = os.path.join(os.path.dirname(__file__), "../../editor/import/unicode_ranges.inc")
with open(unicode_ranges_path, "w", newline="\n") as f:
f.write(source)
print("`unicode_ranges.inc` generated successfully.")
if __name__ == "__main__":
generate_unicode_ranges_inc()