Crusader_Decomp/tools/poc_crusader_usecode_parser.py

3230 lines
119 KiB
Python
Raw Normal View History

from __future__ import annotations
import argparse
2026-03-26 00:37:17 +01:00
import ast
import csv
import hashlib
import json
2026-03-25 23:32:13 +01:00
import re
from functools import lru_cache
from dataclasses import dataclass
from pathlib import Path
from typing import Any
REPO_ROOT = Path(__file__).resolve().parents[1]
EXTRACTED_ROOT = REPO_ROOT / "USECODE" / "EUSECODE_extracted"
CLASS_EVENT_INDEX = EXTRACTED_ROOT / "class_event_index.tsv"
CLASS_LAYOUT_INDEX = EXTRACTED_ROOT / "class_layout_index.tsv"
2026-03-25 23:32:13 +01:00
RUNTIME_VM_IR_INDEX = EXTRACTED_ROOT / "runtime_vm_ir.tsv"
CHUNKS_DIR = EXTRACTED_ROOT / "chunks"
2026-03-26 00:37:17 +01:00
UNKCOFFS_DIR = REPO_ROOT / "tools" / "unkcoffs"
DEFAULT_GAME_VARIANT = "regret"
INTRINSIC_HINT_PATHS = {
"regret": UNKCOFFS_DIR / "regret_ints.py",
"remorse": UNKCOFFS_DIR / "remorse_ints.py",
}
def resolve_extracted_root(extracted_root: Path | str | None = None) -> Path:
if extracted_root is None:
return EXTRACTED_ROOT
return Path(extracted_root)
def extracted_root_paths(extracted_root: Path | str | None = None) -> tuple[Path, Path, Path, Path]:
root = resolve_extracted_root(extracted_root)
return (
root / "class_event_index.tsv",
root / "class_layout_index.tsv",
root / "runtime_vm_ir.tsv",
root / "chunks",
)
def repo_relative_path(path: Path) -> str:
try:
return str(path.relative_to(REPO_ROOT)).replace("\\", "/")
except ValueError:
return str(path).replace("\\", "/")
def infer_flex_path(extracted_root: Path | str | None = None) -> str:
root = resolve_extracted_root(extracted_root)
parent = root.parent
if parent == REPO_ROOT:
return "EUSECODE.FLX"
return f"{repo_relative_path(parent)}/EUSECODE.FLX"
EVENT_NAME_HINTS = {
0x00: "look",
0x01: "use",
0x02: "anim",
0x03: "setActivity",
0x04: "cachein",
0x05: "hit",
0x06: "gotHit",
0x07: "hatch",
0x08: "schedule",
0x09: "release",
0x0A: "equip",
0x0B: "unequip",
0x0C: "combine",
0x0D: "func0D",
0x0E: "calledFromAnim",
0x0F: "enterFastArea",
0x10: "leaveFastArea",
0x11: "cast",
0x12: "justMoved",
0x13: "avatarStoleSomething",
0x14: "animGetHit",
0x15: "func15",
0x16: "func16",
0x17: "func17",
0x18: "func18",
0x19: "func19",
0x1A: "func1A",
0x1B: "func1B",
0x1C: "func1C",
0x1D: "func1D",
0x1E: "func1E",
0x1F: "func1F",
}
# Intrinsic table extracted from Pentagram ConvertUsecodeCrusader.h
# Source note: "current discovered intrinsics are for regret1.21 only"
# This is used as a hint only ordinal mapping may differ between builds.
2026-03-26 00:37:17 +01:00
BASE_INTRINSIC_HINTS: dict[int, str] = {
0x0000: "Intrinsic0000()",
0x0001: "Item::getFrame(void)",
0x0002: "Item::setFrame(uint16)",
0x0003: "Item::getMapNum(void)",
0x0004: "Item::getStatus(void)",
0x0005: "Item::orStatus(sint16)",
0x0006: "Item::callEvent0A(sint16)",
0x0007: "Intrinsic0007()",
0x0008: "Item::isNpc(void)",
0x0009: "Item::getZ(void)",
0x000A: "Intrinsic000A()",
0x000B: "Item::getQLo(void)",
0x000C: "Item::destroy(void)",
0x000D: "Intrinsic000D()",
0x000E: "Item::getX(void)",
0x000F: "Item::getY(void)",
0x0010: "Intrinsic0010()",
0x0011: "Item::getType(void)",
0x0012: "Intrinsic0012()",
0x0013: "Intrinsic0013()",
0x0014: "Item::legal_create(uint16,uint16,uint16,uint16,uint16)",
0x0015: "Item::andStatus(void)",
0x0016: "Intrinsic0016()",
0x0017: "Intrinsic00C3()",
0x0018: "Intrinsic00DA()",
0x0019: "Intrinsic0019()",
0x001A: "Item::create(uint16,uint16)",
0x001B: "Item::pop(uint16,uint16,uint8)",
0x001C: "Intrinsic00FA()",
0x001D: "Item::push(void)",
0x001E: "Intrinsic001E()",
0x001F: "Item::getQLo(void)",
0x0020: "Item::setQLo(sint16)",
0x0021: "Item::getQHi(void)",
0x0022: "Item::setQHi(sint16)",
0x0023: "Intrinsic0023()",
0x0024: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0025: "Item::getCY(void)",
0x0026: "Item::getCX(void)",
0x0027: "Intrinsic0027()",
0x0028: "Item::setNpcNum(sint16)",
0x0029: "Intrinsic0029()",
0x002A: "Intrinsic002A()",
0x002B: "Item::pop(void)",
0x002C: "Intrinsic002C()",
0x002D: "Item::isCompletelyOn(uint16)",
0x002E: "Item::pop(uint16)",
0x002F: "Intrinsic002F()",
0x0030: "Intrinsic0030()",
0x0031: "Item::getFamily(void)",
0x0032: "Item::destroyContents(void)",
0x0033: "Intrinsic0033()",
0x0034: "Item::getDirToItem(uint16)",
0x0035: "Intrinsic0035()",
0x0036: "Intrinsic0036()",
0x0037: "Intrinsic0037()",
0x0038: "Item::andStatus(void)",
0x0039: "Kernel::resetRef(uint16,ProcessType)",
0x003A: "Item::touch(void)",
0x003B: "Egg::getEggId(void)",
0x003C: "Intrinsic003C()",
0x003D: "Intrinsic003D()",
0x003E: "Item::callEvent11(sint16)",
0x003F: "Intrinsic003F()",
0x0040: "Intrinsic0040()",
0x0041: "Item::isOn(uint16)",
0x0042: "Item::getQHi(void)",
0x0043: "Item::isOn(uint16)",
0x0044: "Item::getQHi(void)",
0x0045: "Item::isOn(uint16)",
0x0046: "Item::getQHi(void)",
0x0047: "Item::isOn(uint16)",
0x0048: "Item::getQHi(void)",
0x0049: "Item::isOn(uint16)",
0x004A: "Item::getQHi(void)",
0x004B: "Item::isOn(uint16)",
0x004C: "Item::getQHi(void)",
0x004D: "Intrinsic004D()",
0x004E: "Npc::isDead(void)",
0x004F: "Intrinsic009C()",
0x0050: "Intrinsic0050()",
0x0051: "Intrinsic0051()",
0x0052: "Intrinsic0052()",
0x0053: "Intrinsic00BD()",
0x0054: "Intrinsic0054()",
0x0055: "Intrinsic0055()",
0x0056: "Intrinsic0056()",
0x0057: "Intrinsic0057()",
0x0058: "Item::use(void)",
0x0059: "Item::setQuantity(sint16)",
0x005A: "Intrinsic005A()",
0x005B: "Item::getSurfaceWeight(void)",
0x005C: "Intrinsic005C()",
0x005D: "Item::setFrame(uint16)",
0x005E: "Intrinsic00DA()",
0x005F: "Intrinsic005F()",
0x0060: "Intrinsic0060()",
0x0061: "Intrinsic0061()",
0x0062: "Intrinsic0062()",
0x0063: "Item::legal_create(uint16,uint16,WorldPoint&)",
0x0064: "Item::getPoint(WorldPoint&)",
0x0065: "Item::legal_move(WorldPoint&,uint16,uint16)",
0x0066: "Item::fall(void)",
0x0067: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0068: "Kernel::getNumProcesses(uint16,ProcessType)",
0x0069: "Item::getCY(void)",
0x006A: "Intrinsic006A()",
0x006B: "Intrinsic006B()",
0x006C: "Intrinsic006C()",
0x006D: "Intrinsic006D()",
0x006E: "Intrinsic006E()",
0x006F: "Item::isInNpc(void)",
0x0070: "Intrinsic0070()",
0x0071: "Intrinsic0071()",
0x0072: "Intrinsic0072()",
0x0073: "Intrinsic0073()",
0x0074: "Npc::isDead(void)",
0x0075: "Item::getNpcNum(void)",
0x0076: "IntrinsicReturn0",
0x0077: "Intrinsic0077()",
0x0078: "Item::callEvent0B(sint16)",
0x0079: "Item::andStatus(void)",
0x007A: "Item::move(uint16,uint16,uint8)",
0x007B: "Intrinsic007B()",
0x007C: "Intrinsic007C()",
0x007D: "Intrinsic007D()",
0x007E: "Intrinsic007E()",
0x007F: "Intrinsic007F()",
0x0080: "Intrinsic0080()",
0x0081: "Intrinsic0081()",
0x0082: "Intrinsic0082()",
0x0083: "Intrinsic0083()",
0x0084: "Intrinsic0084()",
0x0085: "Intrinsic0085()",
0x0086: "teleportToEgg(sint16,int,uint8)",
0x0087: "Intrinsic0087()",
0x0088: "Intrinsic0088()",
0x0089: "Intrinsic00BD()",
0x008A: "Item::getQuality(void)",
0x008B: "Item::setQuality(sint16)",
0x008C: "Intrinsic008C()",
0x008D: "Intrinsic008D()",
0x008E: "Intrinsic008E()",
0x008F: "Camera::getX(void)",
0x0090: "Camera::getY(void)",
0x0091: "Item::setMapNum(sint16)",
0x0092: "Item::getNpcNum(void)",
0x0093: "Item::shoot(WorldPoint&,sint16,sint16)",
0x0094: "Intrinsic0094()",
0x0095: "Item::enterFastArea(void)",
0x0096: "Intrinsic00CA()",
0x0097: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0098: "Item::getNpcNum(void)",
0x0099: "Intrinsic0099()",
0x009A: "teleportToEgg(sint16,uint8)",
0x009B: "Intrinsic009B()",
0x009C: "Intrinsic009C()",
0x009D: "Intrinsic009D()",
0x009E: "Intrinsic009E()",
0x009F: "Intrinsic009F()",
0x00A0: "Item::andStatus(void)",
0x00A1: "Item::getUnkEggType(void)",
0x00A2: "Egg::setEggXRange(uint16)",
0x00A3: "Item::setFrame(uint16)",
0x00A4: "Item::overlaps(uint16)",
0x00A5: "Item::isOn(uint16)",
0x00A6: "Item::getQHi(void)",
0x00A7: "Intrinsic00DA()",
0x00A8: "Item::getCY(void)",
0x00A9: "Intrinsic00A9()",
0x00AA: "Item::isOn(uint16)",
0x00AB: "Npc::isDead(void)",
0x00AC: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00AD: "Intrinsic00AD()",
0x00AE: "Item::getQHi(void)",
0x00AF: "Item::andStatus(void)",
0x00B0: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00B1: "Item::andStatus(void)",
0x00B2: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00B3: "Item::andStatus(void)",
0x00B4: "Item::getDirToCoords(uint16,uint16)",
0x00B5: "Intrinsic00B5()",
0x00B6: "Intrinsic00B6()",
0x00B7: "Item::getNpcNum(void)",
0x00B8: "Item::getCY(void)",
0x00B9: "Item::isOn(uint16)",
0x00BA: "Item::getFootpad(sint16&,sint16&,sint16&)",
0x00BB: "Npc::isDead(void)",
0x00BC: "Intrinsic00BC()",
0x00BD: "Intrinsic00BD()",
0x00BE: "Intrinsic00BE()",
0x00BF: "Item::andStatus(void)",
0x00C0: "Intrinsic00C0()",
0x00C1: "Intrinsic00C1()",
0x00C2: "IntrinsicReturn0",
0x00C3: "Intrinsic00C3()",
0x00C4: "Item::getQHi(void)",
0x00C5: "Item::setQuality(sint16)",
0x00C6: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00C7: "Intrinsic00C7()",
0x00C8: "Intrinsic00C8()",
0x00C9: "Item::callEvent0A(sint16)",
0x00CA: "Intrinsic00CA()",
0x00CB: "Item::isOn(uint16)",
0x00CC: "Intrinsic00CC()",
0x00CD: "Intrinsic00CD()",
0x00CE: "Item::getQHi(void)",
0x00CF: "Item::isOn(uint16)",
0x00D0: "Intrinsic00D0()",
0x00D1: "Intrinsic00D1()",
0x00D2: "Intrinsic00D2()",
0x00D3: "Intrinsic00FA()",
0x00D4: "Camera::getY(void)",
0x00D5: "Intrinsic00D5()",
0x00D6: "Intrinsic00D6()",
0x00D7: "Intrinsic00D7()",
0x00D8: "Intrinsic00D8()",
0x00D9: "Intrinsic00D9()",
0x00DA: "Intrinsic00DA()",
0x00DB: "Intrinsic00DB()",
0x00DC: "Item::getQLo(void)",
0x00DD: "Item::getQHi(void)",
0x00DE: "Item::getNpcNum(void)",
0x00DF: "Intrinsic00DF()",
0x00E0: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E1: "Intrinsic00FA()",
0x00E2: "Item::getQLo(void)",
0x00E3: "Item::getCY(void)",
0x00E4: "Item::getNpcNum(void)",
0x00E5: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E6: "Item::getNpcNum(void)",
0x00E7: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E8: "Item::getNpcNum(void)",
0x00E9: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EA: "Item::getNpcNum(void)",
0x00EB: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EC: "Item::getNpcNum(void)",
0x00ED: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EE: "Item::getNpcNum(void)",
0x00EF: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F0: "Item::getNpcNum(void)",
0x00F1: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F2: "Item::getNpcNum(void)",
0x00F3: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F4: "Item::getNpcNum(void)",
0x00F5: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F6: "Item::getNpcNum(void)",
0x00F7: "Item::andStatus(void)",
0x00F8: "Intrinsic00FA()",
0x00F9: "Item::getQLo(void)",
0x00FA: "Intrinsic00FA()",
0x00FB: "Intrinsic00FB()",
0x00FC: "Intrinsic00FC()",
0x00FD: "Item::getQLo(void)",
0x00FE: "Intrinsic00FE()",
0x00FF: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0100: "Item::andStatus(void)",
0x0101: "Item::isOn(uint16)",
0x0102: "Npc::isDead(void)",
0x0103: "Intrinsic00BD()",
0x0104: "Item::getQHi(void)",
0x0105: "Intrinsic00DA()",
0x0106: "Intrinsic00FA()",
0x0107: "Item::getQLo(void)",
0x0108: "Item::isOn(uint16)",
0x0109: "Item::getQHi(void)",
0x010A: "Item::isOn(uint16)",
0x010B: "Item::getQHi(void)",
0x010C: "Item::hurl(sint16,sint16,sint16,sint16)",
0x010D: "Item::getNpcNum(void)",
0x010E: "Item::getCY(void)",
0x010F: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0110: "Item::isOn(uint16)",
0x0111: "Intrinsic0111()",
0x0112: "IntrinsicReturn0",
0x0113: "Npc::isDead(void)",
0x0114: "Intrinsic0088()",
0x0115: "Intrinsic00C1()",
0x0116: "Item::getQHi(void)",
0x0117: "Intrinsic00BD()",
0x0118: "Item::andStatus(void)",
0x0119: "Item::getNpcNum(void)",
0x011A: "Item::andStatus(void)",
0x011B: "Item::getNpcNum(void)",
0x011C: "Intrinsic011C()",
0x011D: "Item::andStatus(void)",
0x011E: "Item::getNpcNum(void)",
0x011F: "Item::AvatarStoleSomehting(uint16)",
0x0120: "Item::andStatus(void)",
0x0121: "Item::getNpcNum(void)",
0x0122: "Item::getQ(void)",
0x0123: "Item::setQ(uint)",
0x0124: "Item::andStatus(void)",
0x0125: "Item::getNpcNum(void)",
0x0126: "Item::andStatus(void)",
0x0127: "Item::getNpcNum(void)",
0x0128: "Item::andStatus(void)",
0x0129: "Item::getNpcNum(void)",
0x012A: "Item::andStatus(void)",
0x012B: "Item::getNpcNum(void)",
0x012C: "Item::andStatus(void)",
0x012D: "Item::getNpcNum(void)",
0x012E: "Intrinsic00C3()",
0x012F: "Item::andStatus(void)",
0x0130: "Item::getNpcNum(void)",
0x0131: "Intrinsic0131()",
0x0132: "Item::andStatus(void)",
0x0133: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0134: "Item::andStatus(void)",
0x0135: "Camera::getY(void)",
0x0136: "Camera::getZ(void)",
0x0137: "Intrinsic0137()",
0x0138: "Intrinsic009C()",
0x0139: "Item::getTypeFlagCrusader(sint16)",
0x013A: "Item::getNpcNum(void)",
0x013B: "Item::hurl(sint16,sint16,sint16,sint16)",
0x013C: "Item::getCY(void)",
0x013D: "Item::getCZ(void)",
0x013E: "Item::setFrame(uint16)",
0x013F: "Intrinsic013F()",
0x0140: "Intrinsic0140()",
0x0141: "Intrinsic0141()",
0x0142: "Intrinsic0142()",
0x0143: "Npc::isDead(void)",
0x0144: "Intrinsic00FA()",
0x0145: "Intrinsic0145()",
0x0146: "Intrinsic0146()",
0x0147: "Intrinsic0147()",
0x0148: "Item::getNpcNum(void)",
0x0149: "Item::getQLo(void)",
0x014A: "Item::andStatus(void)",
0x014B: "Intrinsic014B()",
0x014C: "Intrinsic014C()",
0x014D: "Intrinsic014D()",
0x014E: "Intrinsic003C()",
0x014F: "Egg::getEggXRange(void)",
0x0150: "Intrinsic009C()",
0x0151: "Intrinsic0072()",
0x0152: "Item::setFrame(uint16)",
0x0153: "Intrinsic00C1()",
0x0154: "Intrinsic00C3()",
0x0155: "Intrinsic00C1()",
0x0156: "Item::isOn(uint16)",
0x0157: "Intrinsic00C3()",
0x0158: "Intrinsic00FA()",
0x0159: "Item::getQHi(void)",
0x015A: "Item::getQLo(void)",
0x015B: "Intrinsic00C1()",
0x015C: "Intrinsic00C3()",
0x015D: "Intrinsic015D()",
}
2026-03-26 00:37:17 +01:00
VARIANT_INTRINSIC_CALLSITE_HINTS: dict[str, dict[tuple[int, int], str]] = {
"regret": {
(0x001E, 0x10): "Item::I_fireWeapon(Item *, x, y, z, byte, int, byte)",
},
"remorse": {},
}
CLASS_EVENT_NAME_HINTS: dict[tuple[int, int], str] = {
(0x0A0C, 0x32): "waitNTimerTicks",
}
LOOP_SELECTOR_FIELD_HINTS = {
0x3A: "family",
0x40: "shape",
}
SHAPE_CATALOG_FILENAME = "usecode_shape_catalog.csv"
SHAPE_CATALOG_FILENAMES = {
"remorse": "usecode_shape_catalog_remorse.csv",
"regret": "usecode_shape_catalog_regret.csv",
}
SHAPE_CATALOG_FIELDNAMES = ["shape_code", "human_readable_id", "description"]
NUMERIC_SHAPE_LITERAL_PATTERN = r"(?:0x[0-9A-Fa-f]+|\d+)"
SHAPE_REFERENCE_PATTERNS = (
re.compile(rf"(?P<prefix>\bshape=)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"),
re.compile(
rf"(?P<prefix>\bItem\.(?:getShape|getType)\([^\)\n]*\)\s*(?:==|!=|<=|>=|<|>)\s*)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"
),
re.compile(rf"(?P<prefix>\bItem\.create\(\s*[^,\n]+,\s*)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"),
re.compile(rf"(?P<prefix>\bItem\.legal_create\(\s*)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"),
)
ShapeCatalog = dict[int, dict[str, str]]
def infer_shape_catalog_variant(extracted_root: Path | str | None = None, game_variant: str | None = None) -> str | None:
normalized = normalize_game_variant(game_variant)
if normalized is not None:
return normalized
root = resolve_extracted_root(extracted_root)
inferred = infer_game_variant_from_path(root)
if inferred is not None:
return inferred
try:
relative_root = root.resolve().relative_to(REPO_ROOT.resolve())
except ValueError:
relative_root = None
if relative_root is not None:
relative_parts = tuple(part.lower() for part in relative_root.parts)
if relative_parts[:2] == ("usecode", "eusecode_extracted"):
return "remorse"
if relative_parts[:3] == ("usecode", "regret", "regret_usecode_extracted"):
return "regret"
return None
def default_shape_catalog_path(
extracted_root: Path | str | None = None,
game_variant: str | None = None,
) -> Path:
root = resolve_extracted_root(extracted_root)
variant = infer_shape_catalog_variant(root, game_variant)
filename = SHAPE_CATALOG_FILENAMES.get(variant, SHAPE_CATALOG_FILENAME)
return root / filename
def format_shape_code(shape_code: int) -> str:
return f"0x{shape_code:04X}"
def load_shape_catalog(path: Path | str | None) -> ShapeCatalog:
if path is None:
return {}
shape_path = Path(path)
if not shape_path.exists():
return {}
catalog: ShapeCatalog = {}
with shape_path.open("r", encoding="utf-8", newline="") as handle:
reader = csv.DictReader(handle)
for row in reader:
shape_code = try_parse_int((row.get("shape_code") or "").strip())
if shape_code is None:
continue
catalog[shape_code] = {
"shape_code": format_shape_code(shape_code),
"human_readable_id": (row.get("human_readable_id") or "").strip(),
"description": row.get("description") or "",
}
return catalog
def shape_catalog_identifier(shape_code: int, shape_catalog: ShapeCatalog | None = None) -> str | None:
if not shape_catalog:
return None
row = shape_catalog.get(shape_code)
if row is None:
return None
human_readable_id = (row.get("human_readable_id") or "").strip()
if not human_readable_id:
return None
return sanitize_identifier(human_readable_id)
def format_shape_reference(
shape_code: int,
shape_catalog: ShapeCatalog | None = None,
frame_expr: str | None = None,
) -> str:
base = shape_catalog_identifier(shape_code, shape_catalog) or format_shape_code(shape_code)
if frame_expr is None or not frame_expr.strip():
return base
return f"{base}[{frame_expr}]"
def iter_shape_code_matches(text: str):
for pattern in SHAPE_REFERENCE_PATTERNS:
for match in pattern.finditer(text):
shape_code = try_parse_int(match.group("value"))
if shape_code is not None:
yield shape_code
def collect_shape_codes_from_pseudocode(text: str) -> set[int]:
return set(iter_shape_code_matches(text))
def apply_shape_catalog_to_pseudocode(text: str, shape_catalog: ShapeCatalog | None = None) -> str:
if not shape_catalog:
return text
def replace_match(match: re.Match[str]) -> str:
shape_code = try_parse_int(match.group("value"))
if shape_code is None:
return match.group(0)
shape_id = shape_catalog_identifier(shape_code, shape_catalog)
if shape_id is None:
return match.group(0)
return f"{match.group('prefix')}{shape_id}"
rendered = text
for pattern in SHAPE_REFERENCE_PATTERNS:
rendered = pattern.sub(replace_match, rendered)
return rendered
def generic_loop_selector_call(name: str, arguments: list[tuple[str, str]]) -> str:
rendered_args = ", ".join(f"{label}={expr}" for label, expr in arguments)
return f"{name}({rendered_args})"
2026-03-26 00:37:17 +01:00
def normalize_game_variant(value: str | None) -> str | None:
if value is None:
return None
normalized = value.strip().lower()
if not normalized or normalized == "auto":
return None
if normalized not in INTRINSIC_HINT_PATHS:
raise ValueError(f"Unsupported Crusader variant: {value}")
return normalized
def infer_game_variant_from_path(path: Path | None) -> str | None:
if path is None:
return None
lowered_parts = [part.lower() for part in path.parts]
if any("regret" in part for part in lowered_parts):
return "regret"
if any("remorse" in part for part in lowered_parts):
return "remorse"
return None
def resolve_game_variant(game_variant: str | None = None, source_root: Path | None = None) -> str:
normalized = normalize_game_variant(game_variant)
if normalized is not None:
return normalized
inferred = infer_game_variant_from_path(source_root)
if inferred is not None:
return inferred
return DEFAULT_GAME_VARIANT
def load_intrinsic_hints_from_file(path: Path) -> dict[int, str]:
if not path.exists():
return {}
try:
module = ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
except (OSError, SyntaxError):
return {}
for node in module.body:
if not isinstance(node, ast.Assign):
continue
if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name):
continue
if node.targets[0].id != "intrinsics":
continue
try:
values = ast.literal_eval(node.value)
except (SyntaxError, ValueError):
return {}
if not isinstance(values, list):
return {}
return {
index: str(value)
for index, value in enumerate(values)
if isinstance(value, str) and value.strip()
}
return {}
def normalize_intrinsic_hint(name: str) -> str:
normalized = name.strip()
normalized = re.sub(r"^(?:unsigned|signed|void|byte|char|short|long|int\d+|uint\d+|sint\d+)\s+(?=[A-Za-z_])", "", normalized)
normalized = re.sub(r"(?<![A-Za-z])udioProcess::", "AudioProcess::", normalized)
normalized = normalized.replace("MusicProcess:I_", "MusicProcess::I_")
normalized = normalized.replace("Somehting", "Something")
normalized = normalized.replace("Actor::I_setDead())", "Actor::I_setDead()")
return normalized
def build_intrinsic_hints(game_variant: str | None = None, source_root: Path | None = None) -> dict[int, str]:
variant = resolve_game_variant(game_variant, source_root)
hints = {index: normalize_intrinsic_hint(name) for index, name in BASE_INTRINSIC_HINTS.items()}
for index, name in load_intrinsic_hints_from_file(INTRINSIC_HINT_PATHS[variant]).items():
normalized = normalize_intrinsic_hint(name)
existing = hints.get(index)
if existing is None or not normalized.startswith("Intrinsic") or existing.startswith("Intrinsic"):
hints[index] = normalized
return hints
_INTRINSIC_HINTS_CACHE: dict[str, dict[int, str]] = {}
def get_intrinsic_hints(game_variant: str | None = None, source_root: Path | None = None) -> dict[int, str]:
variant = resolve_game_variant(game_variant, source_root)
cached = _INTRINSIC_HINTS_CACHE.get(variant)
if cached is None:
cached = build_intrinsic_hints(variant)
_INTRINSIC_HINTS_CACHE[variant] = cached
return cached
def get_intrinsic_callsite_hints(game_variant: str | None = None, source_root: Path | None = None) -> dict[tuple[int, int], str]:
variant = resolve_game_variant(game_variant, source_root)
return VARIANT_INTRINSIC_CALLSITE_HINTS.get(variant, {})
INTRINSIC_HINTS = get_intrinsic_hints(DEFAULT_GAME_VARIANT)
NO_ARG_MNEMONICS = {
0x08: "pop_result",
0x12: "pop_temp",
2026-03-25 23:32:13 +01:00
0x13: "pop_temp_dword",
0x14: "add",
0x15: "add_dword",
0x16: "concat",
0x17: "append_list",
0x1C: "sub",
0x1D: "sub_dword",
0x1E: "mul",
0x1F: "mul_dword",
0x20: "div",
0x21: "div_dword",
0x22: "mod",
0x23: "mod_dword",
0x24: "cmp",
0x25: "cmp_dword",
0x26: "strcmp",
2026-03-25 23:32:13 +01:00
0x27: "cmp_huge",
0x28: "lt",
0x29: "lt_dword",
0x2A: "le",
0x2B: "le_dword",
0x2C: "gt",
0x2D: "gt_dword",
0x2E: "ge",
0x2F: "ge_dword",
0x30: "not",
0x31: "not_dword",
0x32: "and",
0x33: "and_dword",
0x34: "or",
0x35: "or_dword",
0x36: "ne",
0x37: "ne_dword",
0x39: "bit_and",
0x3A: "bit_or",
0x3B: "bit_not",
0x3C: "lsh",
0x3D: "rsh",
0x50: "ret",
0x53: "suspend",
0x59: "push_pid",
0x5D: "push_retval_byte",
0x5E: "push_retval_word",
0x5F: "push_retval_dword",
0x60: "word_to_dword",
0x61: "dword_to_word",
2026-03-25 23:32:13 +01:00
0x68: "copy_string",
0x6A: "ptr_to_string",
0x6B: "str_to_ptr",
0x6D: "push_process_result",
0x73: "loopnext",
0x77: "set_info",
0x78: "process_exclude",
0x7A: "end",
}
def parse_int(value: str) -> int:
return int(value, 0)
def try_parse_int(value: str) -> int | None:
try:
return parse_int(value)
except (TypeError, ValueError):
return None
def signed_byte(value: int) -> int:
return value - 0x100 if value & 0x80 else value
def bp_repr(value: int) -> str:
disp = signed_byte(value)
sign = "+" if disp >= 0 else "-"
return f"[BP{sign}{abs(disp):02X}h]"
def sp_repr(value: int) -> str:
disp = signed_byte(value)
sign = "+" if disp >= 0 else "-"
return f"[SP{sign}{abs(disp):02X}h]"
@dataclass
class ParseResult:
op: dict[str, Any] | None
next_offset: int
end_reason: str | None = None
unknown_tail: bytes | None = None
2026-03-25 23:32:13 +01:00
@dataclass
class DebugSymbolRecord:
index: int
unknown1: int
type_id: int
type_char: str
bp_offset: int
bp_repr: str
unknown3: int
name: str
@dataclass
class DebugSymbolParseResult:
debug_symbols: list[DebugSymbolRecord]
end_offset: int
has_end_opcode: bool
trailing_bytes: bytes
@dataclass
class FieldTagRecord:
tag_id: int
bp_offset: int
value_kind: int
name: str
@dataclass
class FieldTagParseResult:
field_tags: list[FieldTagRecord]
end_offset: int
trailing_bytes: bytes
class BodyReader:
def __init__(self, data: bytes, offset: int = 0) -> None:
self.data = data
self.offset = offset
def read_u8(self) -> int:
value = self.data[self.offset]
self.offset += 1
return value
def read_u16(self) -> int:
value = int.from_bytes(self.data[self.offset:self.offset + 2], "little")
self.offset += 2
return value
def read_u32(self) -> int:
value = int.from_bytes(self.data[self.offset:self.offset + 4], "little")
self.offset += 4
return value
def read_cstring(self) -> str:
chars: list[str] = []
while self.offset < len(self.data):
byte = self.read_u8()
if byte == 0:
break
chars.append(chr(byte))
return "".join(chars)
def read_fixed_string(self, length: int) -> str:
raw = self.data[self.offset:self.offset + length]
self.offset += length
return raw.decode("latin-1", errors="replace").rstrip("\x00")
def op_record(start: int, absolute_start: int, opcode: int, raw_bytes: bytes, mnemonic: str, operands: dict[str, Any]) -> dict[str, Any]:
return {
"offset": start,
"absolute_body_offset": absolute_start,
"opcode": opcode,
"mnemonic": mnemonic,
"raw_bytes": raw_bytes.hex(),
"operands": operands,
}
2026-03-26 00:37:17 +01:00
def parse_one_op(
body: bytes,
start: int,
intrinsic_hints: dict[int, str] | None = None,
intrinsic_callsite_hints: dict[tuple[int, int], str] | None = None,
) -> ParseResult:
reader = BodyReader(body, start)
opcode = reader.read_u8()
operands: dict[str, Any] = {}
mnemonic = NO_ARG_MNEMONICS.get(opcode)
2026-03-26 00:37:17 +01:00
active_intrinsic_hints = intrinsic_hints or INTRINSIC_HINTS
active_callsite_hints = intrinsic_callsite_hints or get_intrinsic_callsite_hints(DEFAULT_GAME_VARIANT)
if opcode == 0x00:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_byte"
elif opcode == 0x01:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_word"
elif opcode == 0x02:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_dword"
elif opcode == 0x03:
bp_offset = reader.read_u8()
size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "size": size}
mnemonic = "pop_local_blob"
2026-03-25 23:32:13 +01:00
elif opcode == 0x04:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_member_byte"
elif opcode == 0x05:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_member_word"
elif opcode == 0x06:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_member_dword"
elif opcode == 0x07:
bp_offset = reader.read_u8()
size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "size": size}
mnemonic = "pop_member_blob"
elif opcode == 0x09:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {
"bp_offset": bp_offset,
"target": bp_repr(bp_offset),
"element_size": element_size,
"slist_flag": slist_flag,
}
mnemonic = "pop_list_element"
elif opcode == 0x0A:
value = reader.read_u8()
operands = {"value_u8": value, "value_signed": signed_byte(value)}
mnemonic = "push_byte_immediate"
elif opcode == 0x0B:
operands = {"value_u16": reader.read_u16()}
mnemonic = "push_word_immediate"
elif opcode == 0x0C:
operands = {"value_u32": reader.read_u32()}
mnemonic = "push_dword_immediate"
elif opcode == 0x0D:
declared_length = reader.read_u16()
text = reader.read_cstring()
operands = {"declared_length": declared_length, "string": text}
mnemonic = "push_string_immediate"
elif opcode == 0x0E:
element_size = reader.read_u8()
count = reader.read_u8()
operands = {"element_size": element_size, "count": count}
mnemonic = "create_list"
elif opcode == 0x0F:
arg_bytes = reader.read_u8()
intrinsic_ordinal = reader.read_u16()
operands = {
"intrinsic_ordinal": intrinsic_ordinal,
2026-03-26 00:37:17 +01:00
"arg_bytes": arg_bytes,
"intrinsic_name_hint": active_callsite_hints.get((intrinsic_ordinal, arg_bytes), active_intrinsic_hints.get(intrinsic_ordinal)),
}
mnemonic = "call_intrinsic"
2026-03-25 23:32:13 +01:00
elif opcode == 0x10:
operands = {"target_offset": reader.read_u16()}
mnemonic = "call_near"
elif opcode == 0x11:
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
operands = {
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
}
mnemonic = "call_class_event"
2026-03-25 23:32:13 +01:00
elif opcode in {0x18, 0x19, 0x1A, 0x1B}:
element_size = reader.read_u8()
operands = {"element_size": element_size}
2026-03-25 23:32:13 +01:00
mnemonic = {
0x18: "append_unique_inline",
0x19: "append_unique_indirect",
0x1A: "remove_matching_indirect",
0x1B: "remove_matching_inline",
}[opcode]
elif opcode == 0x38:
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {"element_size": element_size, "slist_flag": slist_flag}
mnemonic = "in_list"
2026-03-25 23:32:13 +01:00
elif opcode in {0x3E, 0x3F, 0x40, 0x41, 0x43, 0x46, 0x47, 0x48, 0x49, 0x4B, 0x62, 0x63, 0x64, 0x69}:
bp_offset = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset)}
mnemonic = {
0x3E: "push_local_byte",
0x3F: "push_local_word",
0x40: "push_local_dword",
0x41: "push_local_string",
0x43: "push_local_slist",
2026-03-25 23:32:13 +01:00
0x46: "push_member_byte",
0x47: "push_member_word",
0x48: "push_member_dword",
0x49: "push_member_huge",
0x4B: "push_local_addr",
0x62: "free_local_string",
0x63: "free_local_slist",
0x64: "free_local_list",
0x69: "push_string_ptr",
}[opcode]
elif opcode == 0x42:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "element_size": element_size}
mnemonic = "push_local_list"
elif opcode == 0x44:
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {"element_size": element_size, "slist_flag": slist_flag}
mnemonic = "push_list_element"
elif opcode == 0x45:
byte0 = reader.read_u8()
byte1 = reader.read_u8()
operands = {"value_a": byte0, "value_b": byte1}
mnemonic = "push_huge"
elif opcode in {0x4C, 0x4D}:
size = reader.read_u8()
operands = {"size": size}
mnemonic = {0x4C: "push_indirect", 0x4D: "pop_indirect"}[opcode]
elif opcode in {0x4E, 0x4F}:
global_id = reader.read_u16()
size = reader.read_u8()
operands = {"global_id": global_id, "size": size}
mnemonic = {0x4E: "push_global", 0x4F: "pop_global"}[opcode]
elif opcode in {0x51, 0x52}:
relative = reader.read_u16()
signed_relative = relative - 0x10000 if relative & 0x8000 else relative
target = reader.offset + signed_relative
operands = {"relative_u16": relative, "relative_signed": signed_relative, "target_offset": target}
mnemonic = {0x51: "jne", 0x52: "jmp"}[opcode]
elif opcode == 0x54:
arg0 = reader.read_u8()
arg1 = reader.read_u8()
operands = {"arg0": arg0, "arg1": arg1}
mnemonic = "implies"
elif opcode == 0x57:
arg_bytes = reader.read_u8()
this_size = reader.read_u8()
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
operands = {
"arg_bytes": arg_bytes,
"this_size": this_size,
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
}
mnemonic = "spawn"
elif opcode == 0x58:
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
inline_offset = reader.read_u16()
this_size = reader.read_u8()
unknown = reader.read_u8()
operands = {
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
"inline_offset": inline_offset,
"this_size": this_size,
"unknown": unknown,
}
mnemonic = "spawn_inline"
elif opcode == 0x5A:
operands = {"local_bytes": reader.read_u8()}
mnemonic = "init"
elif opcode == 0x5B:
operands = {"line_number": reader.read_u16()}
mnemonic = "line_number"
elif opcode == 0x5C:
relative = reader.read_u16()
symbol_offset = reader.offset + (relative - 0x10000 if relative & 0x8000 else relative)
symbol = reader.read_fixed_string(8)
trailing_zero = reader.read_u8()
operands = {
"symbol_offset": symbol_offset,
"symbol": symbol,
"trailing_zero": trailing_zero,
}
mnemonic = "symbol_info"
elif opcode in {0x65, 0x66, 0x67, 0x6E, 0x6F, 0x74}:
value = reader.read_u8()
operands = {"value_u8": value}
if opcode in {0x65, 0x66, 0x67}:
operands["target"] = sp_repr(value)
mnemonic = {
0x65: "free_stack_string",
0x66: "free_stack_list",
0x67: "free_stack_slist",
0x6E: "add_sp",
0x6F: "push_stack_addr",
0x74: "loopscr",
}[opcode]
elif opcode == 0x6C:
bp_offset = reader.read_u8()
copy_type = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "copy_type": copy_type}
mnemonic = "param_pid_chg"
elif opcode == 0x70:
current_var = reader.read_u8()
string_bytes = reader.read_u8()
loop_type = reader.read_u8()
operands = {"current_var": current_var, "string_bytes": string_bytes, "loop_type": loop_type}
mnemonic = "loop"
elif opcode in {0x75, 0x76}:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
branch = reader.read_u16()
signed_branch = branch - 0x10000 if branch & 0x8000 else branch
target = reader.offset + signed_branch
operands = {
"bp_offset": bp_offset,
"target_var": bp_repr(bp_offset),
"element_size": element_size,
"relative_u16": branch,
"relative_signed": signed_branch,
"target_offset": target,
}
mnemonic = {0x75: "foreach_list", 0x76: "foreach_slist"}[opcode]
elif opcode == 0x79:
operands = {"global_id": reader.read_u16()}
mnemonic = "global_address"
elif mnemonic is None:
return ParseResult(op=None, next_offset=start, end_reason="unknown_opcode", unknown_tail=body[start:])
raw = body[start:reader.offset]
op = op_record(start, start, opcode, raw, mnemonic, operands)
end_reason = "end_opcode" if opcode == 0x7A else None
return ParseResult(op=op, next_offset=reader.offset, end_reason=end_reason)
def load_tsv_rows(path: Path) -> list[dict[str, str]]:
with path.open("r", encoding="utf-8", newline="") as handle:
return list(csv.DictReader(handle, delimiter="\t"))
@lru_cache(maxsize=None)
def load_class_name_hints_by_id(extracted_root_key: str) -> dict[int, str]:
_, class_layout_index, _, _ = extracted_root_paths(Path(extracted_root_key))
hints: dict[int, str] = {}
for row in load_tsv_rows(class_layout_index):
class_id = try_parse_int(row.get("class_id", ""))
class_name = (row.get("class_name_hint") or "").strip()
if class_id is None or not class_name:
continue
hints[class_id] = class_name
return hints
def class_name_hints_by_id(extracted_root: Path | str | None = None) -> dict[int, str]:
root = resolve_extracted_root(extracted_root)
return load_class_name_hints_by_id(str(root))
2026-03-26 00:37:17 +01:00
def find_chunk_file(entry_index: int, extracted_root: Path | str | None = None) -> Path:
_, _, _, chunks_dir = extracted_root_paths(extracted_root)
matches = sorted(chunks_dir.glob(f"chunk_{entry_index:03d}_*.bin"))
2026-03-25 23:32:13 +01:00
if not matches:
2026-03-26 00:37:17 +01:00
matches = sorted(chunks_dir.glob(f"chunk_{entry_index}_*.bin"))
if not matches:
raise FileNotFoundError(f"No chunk file found for entry_index={entry_index}")
return matches[0]
2026-03-26 00:37:17 +01:00
def select_rows(class_name: str, slot: int, extracted_root: Path | str | None = None) -> tuple[dict[str, str], dict[str, str]]:
class_event_index, class_layout_index, _, _ = extracted_root_paths(extracted_root)
event_rows = load_tsv_rows(class_event_index)
layout_rows = load_tsv_rows(class_layout_index)
event_row = next(
(
row for row in event_rows
if row["class_name_hint"].upper() == class_name.upper()
and try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
),
None,
)
if event_row is None:
raise KeyError(f"No class_event_index row found for class={class_name} slot=0x{slot:02X}")
if not event_row["derived_body_start"] or not event_row["derived_body_end"]:
raise ValueError(f"Selected row has no derived body range for class={class_name} slot=0x{slot:02X}")
entry_index = parse_int(event_row["entry_index"])
layout_row = next(
(row for row in layout_rows if try_parse_int(row.get("entry_index", "")) == entry_index),
None,
)
if layout_row is None:
raise KeyError(f"No class_layout_index row found for entry_index={entry_index}")
return event_row, layout_row
2026-03-26 00:37:17 +01:00
def load_runtime_ir_rows(extracted_root: Path | str | None = None) -> list[dict[str, str]]:
_, _, runtime_vm_ir_index, _ = extracted_root_paths(extracted_root)
return load_tsv_rows(runtime_vm_ir_index)
2026-03-25 23:32:13 +01:00
2026-03-26 00:37:17 +01:00
def runtime_stage_hints(ops: list[dict[str, Any]], extracted_root: Path | str | None = None) -> list[dict[str, str]]:
2026-03-25 23:32:13 +01:00
opcode_values = {op["opcode"] for op in ops}
hints: list[dict[str, str]] = []
2026-03-26 00:37:17 +01:00
for row in load_runtime_ir_rows(extracted_root):
2026-03-25 23:32:13 +01:00
opcode_or_lane = row.get("opcode_or_lane", "")
if opcode_or_lane.lower().startswith("opcode 0x"):
opcode_value = try_parse_int(opcode_or_lane.split()[1])
if opcode_value is None or opcode_value not in opcode_values:
continue
elif row.get("stage_address") not in {"000d:177c", "000d:1acb", "000d:2104", "000d:21ed", "000d:22bc"}:
continue
hints.append(row)
return hints
2026-03-26 00:37:17 +01:00
def annotation_hints(event_row: dict[str, str], payload_shape_hint: str, ops: list[dict[str, Any]], extracted_root: Path | str | None = None) -> dict[str, Any]:
slot = parse_int(event_row["slot"])
return {
"runtime_family": "slot-backed-owner-loaded-body",
"payload_shape_hint": payload_shape_hint,
"compiled_anchors": [
{"address": "000d:46ec", "role": "context_create_from_slot"},
2026-03-25 23:32:13 +01:00
{"address": "000d:0988", "role": "referent_chain_mutator"},
{"address": "000d:177c", "role": "push_frame_word_literal"},
{"address": "000d:1acb", "role": "compare_stream_dword_and_push_bool"},
{"address": "000d:208b", "role": "materialize_or_forward_value"},
{"address": "000d:21ed", "role": "prepend_inline_payload"},
{"address": "000d:22bc", "role": "matrix_pushback_stage"},
2026-03-25 23:32:13 +01:00
{"address": "000d:2104", "role": "finalize_to_outptr"},
{"address": "000d:ebe3", "role": "opcode_sequence_run"},
],
2026-03-26 00:37:17 +01:00
"runtime_stage_hints": runtime_stage_hints(ops, extracted_root),
"slot_taxonomy": {"slot": slot, "event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot)},
}
def infer_payload_shape(slot: int) -> str | None:
if slot in {0x10, 0x12}:
return "none"
if slot in {0x0A, 0x0B, 0x11, 0x14}:
return "word"
if slot == 0x13:
return "signed_word"
return None
2026-03-25 23:32:13 +01:00
def parse_debug_symbols(body: bytes, start: int) -> DebugSymbolParseResult | None:
if start >= len(body):
return None
if body[start] == 0x7A:
return DebugSymbolParseResult(debug_symbols=[], end_offset=start + 1, has_end_opcode=True, trailing_bytes=body[start + 1:])
reader = BodyReader(body, start)
count = reader.read_u8()
debug_symbols: list[DebugSymbolRecord] = []
try:
for index in range(count):
unknown1 = reader.read_u8()
type_id = reader.read_u8()
bp_offset = reader.read_u8()
unknown3 = reader.read_u8()
name = reader.read_cstring()
type_char = chr(type_id) if 0x20 <= type_id <= 0x7E else "."
debug_symbols.append(
DebugSymbolRecord(
index=index,
unknown1=unknown1,
type_id=type_id,
type_char=type_char,
bp_offset=bp_offset,
bp_repr=bp_repr(bp_offset),
unknown3=unknown3,
name=name,
)
)
except IndexError:
return None
has_end_opcode = reader.offset < len(body) and body[reader.offset] == 0x7A
end_offset = reader.offset + (1 if has_end_opcode else 0)
trailing_bytes = body[end_offset:]
if not has_end_opcode:
return None
return DebugSymbolParseResult(
debug_symbols=debug_symbols,
end_offset=end_offset,
has_end_opcode=has_end_opcode,
trailing_bytes=trailing_bytes,
)
def parse_field_tags(body: bytes, start: int) -> FieldTagParseResult | None:
if start >= len(body):
return None
reader = BodyReader(body, start)
field_tags: list[FieldTagRecord] = []
try:
while reader.offset < len(body) and body[reader.offset] != 0x7A:
tag_id = reader.read_u8()
bp_offset = reader.read_u8()
value_kind = reader.read_u8()
name = reader.read_cstring()
if not name:
return None
field_tags.append(FieldTagRecord(tag_id=tag_id, bp_offset=bp_offset, value_kind=value_kind, name=name))
if reader.offset < len(body) and body[reader.offset] == 0x00:
reader.offset += 1
except IndexError:
return None
if not field_tags or reader.offset >= len(body) or body[reader.offset] != 0x7A:
return None
end_offset = reader.offset + 1
return FieldTagParseResult(field_tags=field_tags, end_offset=end_offset, trailing_bytes=body[end_offset:])
2026-04-07 17:16:44 +02:00
def classify_post_ret_metadata(body: bytes, ops: list[dict[str, Any]]) -> dict[str, Any] | None:
last_ret_index = next((index for index in range(len(ops) - 1, -1, -1) if ops[index]["mnemonic"] == "ret"), None)
if last_ret_index is None:
return None
ret_end = ops[last_ret_index]["offset"] + (len(ops[last_ret_index]["raw_bytes"]) // 2)
if len(body) - ret_end <= 1:
return None
debug_result = parse_debug_symbols(body, ret_end)
if debug_result is not None and debug_result.end_offset == len(body):
return {
"ops": ops[:last_ret_index + 1],
"end_reason": "debug_symbols_then_end",
"unknown_tail": debug_result.trailing_bytes,
"debug_symbol_offset": ret_end,
"debug_symbols": [
{
"index": symbol.index,
"unknown1": symbol.unknown1,
"type_id": symbol.type_id,
"type_char": symbol.type_char,
"bp_offset": symbol.bp_offset,
"bp_repr": symbol.bp_repr,
"unknown3": symbol.unknown3,
"name": symbol.name,
}
for symbol in debug_result.debug_symbols
],
"field_tags": [],
"end_offset": debug_result.end_offset,
}
field_tag_result = parse_field_tags(body, ret_end)
if field_tag_result is not None and field_tag_result.end_offset == len(body):
return {
"ops": ops[:last_ret_index + 1],
"end_reason": "field_tags_then_end",
"unknown_tail": field_tag_result.trailing_bytes,
"debug_symbol_offset": None,
"debug_symbols": [],
"field_tags": [
{
"tag_id": tag.tag_id,
"bp_offset": tag.bp_offset,
"bp_repr": bp_repr(tag.bp_offset),
"value_kind": tag.value_kind,
"name": tag.name,
"tag_label": f"{tag.tag_id:02X}:{tag.bp_offset:02X}{tag.value_kind:02X}->{tag.name}",
}
for tag in field_tag_result.field_tags
],
"end_offset": field_tag_result.end_offset,
}
return None
2026-03-26 00:37:17 +01:00
def parse_body_ir(
event_row: dict[str, str],
layout_row: dict[str, str],
game_variant: str | None = None,
extracted_root: Path | str | None = None,
) -> dict[str, Any]:
resolved_extracted_root = resolve_extracted_root(extracted_root)
entry_index = parse_int(event_row["entry_index"])
2026-03-26 00:37:17 +01:00
chunk_file = find_chunk_file(entry_index, resolved_extracted_root)
chunk_bytes = chunk_file.read_bytes()
2026-03-26 00:37:17 +01:00
resolved_game_variant = resolve_game_variant(game_variant, chunk_file)
intrinsic_hints = get_intrinsic_hints(resolved_game_variant, chunk_file)
intrinsic_callsite_hints = get_intrinsic_callsite_hints(resolved_game_variant, chunk_file)
target_class_name_hints = class_name_hints_by_id(resolved_extracted_root)
body_start = parse_int(event_row["derived_body_start"])
body_end = parse_int(event_row["derived_body_end"])
body = chunk_bytes[body_start:body_end]
ops: list[dict[str, Any]] = []
offset = 0
end_reason = "body_exhausted"
unknown_tail = b""
2026-03-25 23:32:13 +01:00
debug_symbols: list[dict[str, Any]] = []
debug_symbol_offset: int | None = None
field_tags: list[dict[str, Any]] = []
while offset < len(body):
2026-03-26 00:37:17 +01:00
result = parse_one_op(body, offset, intrinsic_hints, intrinsic_callsite_hints)
if result.op is not None:
operands = result.op["operands"]
if "target_class_id" in operands:
class_id = operands["target_class_id"]
operands["target_class_name_hint"] = target_class_name_hints.get(class_id)
result.op["absolute_body_offset"] = body_start + result.op["offset"]
ops.append(result.op)
if result.end_reason is not None:
end_reason = result.end_reason
unknown_tail = result.unknown_tail or b""
if result.end_reason == "end_opcode":
unknown_tail = body[result.next_offset:]
offset = result.next_offset
break
offset = result.next_offset
if offset >= len(body) and end_reason == "body_exhausted":
unknown_tail = b""
2026-03-25 23:32:13 +01:00
candidate_debug_offsets = sorted(
{
operands["symbol_offset"]
for op in ops
for operands in [op["operands"]]
if op["mnemonic"] == "symbol_info"
and isinstance(operands.get("symbol_offset"), int)
and 0 <= operands["symbol_offset"] < len(body)
}
)
2026-04-07 17:16:44 +02:00
post_ret_metadata = classify_post_ret_metadata(body, ops)
if post_ret_metadata is not None:
ops = post_ret_metadata["ops"]
debug_symbol_offset = post_ret_metadata["debug_symbol_offset"]
debug_symbols = post_ret_metadata["debug_symbols"]
field_tags = post_ret_metadata["field_tags"]
end_reason = post_ret_metadata["end_reason"]
unknown_tail = post_ret_metadata["unknown_tail"]
offset = post_ret_metadata["end_offset"]
2026-03-25 23:32:13 +01:00
last_ret_index = next((index for index in range(len(ops) - 1, -1, -1) if ops[index]["mnemonic"] == "ret"), None)
if end_reason == "unknown_opcode" and last_ret_index is not None:
ret_end = ops[last_ret_index]["offset"] + (len(ops[last_ret_index]["raw_bytes"]) // 2)
ret_debug_result = parse_debug_symbols(body, ret_end)
if ret_debug_result is not None:
ops = ops[:last_ret_index + 1]
debug_symbol_offset = ret_end
debug_symbols = [
{
"index": symbol.index,
"unknown1": symbol.unknown1,
"type_id": symbol.type_id,
"type_char": symbol.type_char,
"bp_offset": symbol.bp_offset,
"bp_repr": symbol.bp_repr,
"unknown3": symbol.unknown3,
"name": symbol.name,
}
for symbol in ret_debug_result.debug_symbols
]
end_reason = "debug_symbols_then_end"
unknown_tail = ret_debug_result.trailing_bytes
offset = ret_debug_result.end_offset
if end_reason == "unknown_opcode" and candidate_debug_offsets:
for candidate_offset in reversed(candidate_debug_offsets):
if candidate_offset != offset:
continue
debug_result = parse_debug_symbols(body, candidate_offset)
if debug_result is None:
continue
debug_symbol_offset = candidate_offset
debug_symbols = [
{
"index": symbol.index,
"unknown1": symbol.unknown1,
"type_id": symbol.type_id,
"type_char": symbol.type_char,
"bp_offset": symbol.bp_offset,
"bp_repr": symbol.bp_repr,
"unknown3": symbol.unknown3,
"name": symbol.name,
}
for symbol in debug_result.debug_symbols
]
end_reason = "debug_symbols_then_end"
unknown_tail = debug_result.trailing_bytes
offset = debug_result.end_offset
break
if end_reason == "unknown_opcode" and last_ret_index is not None:
ret_end = ops[last_ret_index]["offset"] + (len(ops[last_ret_index]["raw_bytes"]) // 2)
field_tag_result = parse_field_tags(body, ret_end)
if field_tag_result is not None:
ops = ops[:last_ret_index + 1]
field_tags = [
{
"tag_id": tag.tag_id,
"bp_offset": tag.bp_offset,
"bp_repr": bp_repr(tag.bp_offset),
"value_kind": tag.value_kind,
"name": tag.name,
"tag_label": f"{tag.tag_id:02X}:{tag.bp_offset:02X}{tag.value_kind:02X}->{tag.name}",
}
for tag in field_tag_result.field_tags
]
end_reason = "field_tags_then_end"
unknown_tail = field_tag_result.trailing_bytes
offset = field_tag_result.end_offset
slot = parse_int(event_row["slot"])
payload_shape = infer_payload_shape(slot)
return {
"schema_version": "crusader-usecode-ir-v1-poc",
"source": {
2026-03-26 00:37:17 +01:00
"game_variant": resolved_game_variant,
"flex_path": infer_flex_path(resolved_extracted_root),
"extracted_root": repo_relative_path(resolved_extracted_root),
"chunk_file": repo_relative_path(chunk_file),
},
"class": {
"entry_index": entry_index,
"object_index": parse_int(layout_row["object_index"]),
"class_id": parse_int(layout_row["class_id"]),
"class_name": layout_row["class_name_hint"],
"raw_code_base_u32": parse_int(layout_row["raw_code_base_u32"]),
"code_base_minus_one": parse_int(layout_row["code_base_minus_one"]),
"conservative_event_count": parse_int(layout_row["conservative_event_count"]),
},
"event": {
"slot": slot,
"event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot),
"raw_event_entry_word": parse_int(event_row["raw_event_entry_word"]),
"raw_code_offset": parse_int(event_row["raw_code_offset"]),
"derived_body_start": body_start,
"derived_body_end": body_end,
"derived_body_length": parse_int(event_row["derived_body_length"]),
"repeated_template_status": event_row["repeated_template_status"],
},
"body": {
"end_reason": end_reason,
"raw_body_sha1": hashlib.sha1(body).hexdigest(),
"unknown_trailing_bytes": unknown_tail.hex(),
"decoded_op_count": len(ops),
2026-03-25 23:32:13 +01:00
"debug_symbol_offset": debug_symbol_offset,
"debug_symbol_count": len(debug_symbols),
"field_tag_count": len(field_tags),
},
"ops": ops,
2026-03-25 23:32:13 +01:00
"debug_symbols": debug_symbols,
"field_tags": field_tags,
2026-03-26 00:37:17 +01:00
"annotation_hints": annotation_hints(event_row, payload_shape, ops, resolved_extracted_root),
}
# ---------------------------------------------------------------------------
# Family diff helpers
# ---------------------------------------------------------------------------
def _common_prefix_len(a: bytes, b: bytes) -> int:
limit = min(len(a), len(b))
for i in range(limit):
if a[i] != b[i]:
return i
return limit
def _common_suffix_len(a: bytes, b: bytes, prefix_len: int) -> int:
la, lb = len(a), len(b)
limit = min(la - prefix_len, lb - prefix_len)
for i in range(1, limit + 1):
if a[la - i] != b[lb - i]:
return i - 1
return limit
2026-03-26 00:37:17 +01:00
def compute_family_diff(class_name: str, slot: int, extracted_root: Path | str | None = None) -> dict[str, Any]:
"""
Find all event rows that share the same repeated_template_status family tag
as the named class/slot row, then decode each body and compute pairwise diff
statistics against the reference body.
Returns a dict with:
reference_entry entry_index for the named class/slot
family_tag repeated_template_status value used for grouping
sibling_count number of additional rows in the same family
members list of per-member records (entry, class, body stats, diff vs ref)
"""
2026-03-26 00:37:17 +01:00
class_event_index, class_layout_index, _, _ = extracted_root_paths(extracted_root)
event_rows = load_tsv_rows(class_event_index)
layout_rows = load_tsv_rows(class_layout_index)
layout_by_entry: dict[int, dict[str, str]] = {}
for row in layout_rows:
idx = try_parse_int(row.get("entry_index", ""))
if idx is not None:
layout_by_entry[idx] = row
# Locate the reference row
ref_row = next(
(
row for row in event_rows
if row["class_name_hint"].upper() == class_name.upper()
and try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
),
None,
)
if ref_row is None:
raise KeyError(f"No class_event_index row for class={class_name} slot=0x{slot:02X}")
family_tag = ref_row.get("repeated_template_status", "").strip()
ref_entry = parse_int(ref_row["entry_index"])
# Collect family members same family_tag if non-empty, else same slot
if family_tag and family_tag not in {"", "unique"}:
family_rows = [
row for row in event_rows
if row.get("repeated_template_status", "").strip() == family_tag
and try_parse_int(row.get("entry_index", "")) is not None
]
else:
# Fall back: same slot across all classes
family_rows = [
row for row in event_rows
if try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
]
# Load reference body bytes
def _load_body(row: dict[str, str]) -> bytes | None:
body_start_str = row.get("derived_body_start", "")
body_end_str = row.get("derived_body_end", "")
if not body_start_str or not body_end_str:
return None
try:
2026-03-26 00:37:17 +01:00
chunk = find_chunk_file(parse_int(row["entry_index"]), extracted_root)
data = chunk.read_bytes()
return data[parse_int(body_start_str):parse_int(body_end_str)]
except (FileNotFoundError, ValueError):
return None
ref_body = _load_body(ref_row)
if ref_body is None:
raise ValueError(f"Cannot load reference body for class={class_name} slot=0x{slot:02X}")
members: list[dict[str, Any]] = []
for row in family_rows:
entry_idx = parse_int(row["entry_index"])
body = _load_body(row)
is_ref = entry_idx == ref_entry
member: dict[str, Any] = {
"entry_index": entry_idx,
"class_name": row["class_name_hint"],
"slot": try_parse_int(row.get("slot", "")),
"body_length": len(body) if body is not None else None,
"is_reference": is_ref,
}
if body is not None and not is_ref:
prefix = _common_prefix_len(ref_body, body)
suffix = _common_suffix_len(ref_body, body, prefix)
ref_diff_window = ref_body[prefix:len(ref_body) - suffix] if suffix else ref_body[prefix:]
member_diff_window = body[prefix:len(body) - suffix] if suffix else body[prefix:]
member["diff_vs_reference"] = {
"common_prefix_bytes": prefix,
"common_suffix_bytes": suffix,
"ref_diff_window_hex": ref_diff_window.hex(),
"member_diff_window_hex": member_diff_window.hex(),
"diff_window_length_ref": len(ref_diff_window),
"diff_window_length_member": len(member_diff_window),
"identical": ref_body == body,
}
elif is_ref:
member["diff_vs_reference"] = {"identical": True, "note": "reference"}
members.append(member)
members.sort(key=lambda m: (0 if m["is_reference"] else 1, m["entry_index"]))
return {
"reference_entry": ref_entry,
"reference_class": class_name,
"slot": slot,
"family_tag": family_tag or f"slot_0x{slot:02X}_all",
"member_count": len(members),
"sibling_count": len(members) - 1,
"members": members,
}
def render_family_diff_text(diff: dict[str, Any]) -> str:
lines = [
f"Family diff: {diff['family_tag']}",
f"Reference entry={diff['reference_entry']} class={diff['reference_class']} slot=0x{diff['slot']:02X}",
f"Members: {diff['member_count']} Siblings: {diff['sibling_count']}",
"",
]
for m in diff["members"]:
tag = " [REF]" if m["is_reference"] else ""
body_len = m["body_length"] if m["body_length"] is not None else "?"
lines.append(f" entry={m['entry_index']} class={m['class_name']} slot=0x{m['slot']:02X} body_len={body_len}{tag}")
d = m.get("diff_vs_reference")
if d and not m["is_reference"]:
if d["identical"]:
lines.append(" identical to reference")
else:
lines.append(f" prefix={d['common_prefix_bytes']} suffix={d['common_suffix_bytes']}")
lines.append(f" ref_diff_window ({d['diff_window_length_ref']}B): {d['ref_diff_window_hex']}")
lines.append(f" mem_diff_window ({d['diff_window_length_member']}B): {d['member_diff_window_hex']}")
return "\n".join(lines) + "\n"
2026-03-25 23:32:13 +01:00
def build_listing_labels(ir: dict[str, Any]) -> dict[int, str]:
return {
op["operands"]["target_offset"] + ir["event"]["derived_body_start"]: f"L_{op['operands']['target_offset'] + ir['event']['derived_body_start']:04X}"
for op in ir["ops"]
if "target_offset" in op["operands"] and isinstance(op["operands"]["target_offset"], int)
}
def build_script_blocks(ir: dict[str, Any]) -> tuple[dict[int, str], list[tuple[str, list[dict[str, Any]]]]]:
ops = ir["ops"]
if not ops:
return {}, []
branch_mnemonics = {"jne", "jmp", "foreach_list", "foreach_slist"}
leaders = {ops[0]["absolute_body_offset"]}
for index, op in enumerate(ops):
target_offset = op["operands"].get("target_offset")
if isinstance(target_offset, int):
leaders.add(ir["event"]["derived_body_start"] + target_offset)
if op["mnemonic"] in branch_mnemonics and index + 1 < len(ops):
leaders.add(ops[index + 1]["absolute_body_offset"])
ordered_leaders = sorted(leaders)
label_map = {ordered_leaders[0]: "entry"}
for absolute_offset in ordered_leaders[1:]:
label_map[absolute_offset] = f"block_{absolute_offset:04X}"
blocks: list[tuple[str, list[dict[str, Any]]]] = []
current_label = label_map[ops[0]["absolute_body_offset"]]
current_ops: list[dict[str, Any]] = []
for op in ops:
absolute_offset = op["absolute_body_offset"]
if absolute_offset in label_map and current_ops and label_map[absolute_offset] != current_label:
blocks.append((current_label, current_ops))
current_label = label_map[absolute_offset]
current_ops = []
current_ops.append(op)
if current_ops:
blocks.append((current_label, current_ops))
return label_map, blocks
def format_script_string(value: str) -> str:
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
return f'"{escaped}"'
def format_generic_operands(operands: dict[str, Any], label_map: dict[int, str], body_start: int) -> str:
parts: list[str] = []
for key, value in operands.items():
if value is None or value == "":
continue
if key == "target_offset" and isinstance(value, int):
parts.append(f"target={label_map.get(body_start + value, f'0x{body_start + value:04X}')}")
continue
if key == "target_event_name_hint":
parts.append(f"event={value}")
continue
if key == "intrinsic_name_hint":
parts.append(f"hint={value}")
continue
if isinstance(value, int):
if key.endswith("_signed"):
parts.append(f"{key}={value}")
else:
parts.append(f"{key}=0x{value:X}")
continue
parts.append(f"{key}={value}")
return " ".join(parts)
def format_script_statement(op: dict[str, Any], label_map: dict[int, str], body_start: int) -> str:
mnemonic = op["mnemonic"]
operands = op["operands"]
if mnemonic == "init":
return f"init locals 0x{operands['local_bytes']:02X}"
if mnemonic == "push_byte_immediate":
return f"push 0x{operands['value_u8']:02X} ; signed {operands['value_signed']}"
if mnemonic == "push_word_immediate":
return f"push 0x{operands['value_u16']:04X}"
if mnemonic == "push_dword_immediate":
return f"push 0x{operands['value_u32']:08X}"
if mnemonic == "push_string_immediate":
return f"push {format_script_string(operands['string'])}"
if mnemonic.startswith("push_local_") or mnemonic.startswith("push_member_"):
return f"push {operands['target']}"
if mnemonic == "push_local_list":
return f"push {operands['target']}<list:{operands['element_size']}>"
if mnemonic == "push_list_element":
return f"push list_element size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}"
if mnemonic == "push_huge":
return f"push huge 0x{operands['value_a']:02X}:0x{operands['value_b']:02X}"
if mnemonic == "push_global":
return f"push global[0x{operands['global_id']:04X}] size=0x{operands['size']:X}"
if mnemonic == "push_local_addr":
return f"push &{operands['target']}"
if mnemonic == "push_string_ptr":
return f"push string_ptr {operands['target']}"
if mnemonic.startswith("pop_local_") or mnemonic.startswith("pop_member_"):
return f"pop -> {operands['target']}"
if mnemonic == "pop_local_blob" or mnemonic == "pop_member_blob":
return f"pop_blob -> {operands['target']} size=0x{operands['size']:X}"
if mnemonic == "pop_list_element":
return f"pop_list_element -> {operands['target']} elem_size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}"
if mnemonic == "pop_global":
return f"pop -> global[0x{operands['global_id']:04X}] size=0x{operands['size']:X}"
if mnemonic == "call_intrinsic":
hint = operands.get("intrinsic_name_hint") or "unknown_intrinsic"
return f"call intrinsic 0x{operands['intrinsic_ordinal']:04X} {hint} args=0x{operands['arg_bytes']:02X}"
if mnemonic == "call_near":
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"call {target}"
if mnemonic == "call_class_event":
return f"call {format_target_event_reference(operands)}"
2026-03-25 23:32:13 +01:00
if mnemonic in {"append_unique_inline", "append_unique_indirect", "remove_matching_indirect", "remove_matching_inline"}:
return f"{mnemonic} size=0x{operands['element_size']:X}"
if mnemonic == "create_list":
return f"create_list elem_size=0x{operands['element_size']:X} count=0x{operands['count']:X}"
if mnemonic == "in_list":
return f"in_list elem_size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}"
if mnemonic == "jne":
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"if pop() != 0 goto {target}"
if mnemonic == "jmp":
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"goto {target}"
if mnemonic in {"foreach_list", "foreach_slist"}:
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"{mnemonic} {operands['target_var']} elem_size=0x{operands['element_size']:X} -> {target}"
if mnemonic == "spawn":
return (
f"spawn {format_target_event_reference(operands)} "
2026-03-25 23:32:13 +01:00
f"args=0x{operands['arg_bytes']:02X} this_size=0x{operands['this_size']:02X}"
)
if mnemonic == "spawn_inline":
return (
f"spawn_inline {format_target_event_reference(operands)} "
2026-03-25 23:32:13 +01:00
f"inline=0x{operands['inline_offset']:04X} this_size=0x{operands['this_size']:02X} unk=0x{operands['unknown']:02X}"
)
if mnemonic == "line_number":
return f"line {operands['line_number']}"
if mnemonic == "symbol_info":
return f"symbol_info {operands['symbol']} -> 0x{operands['symbol_offset']:04X}"
if mnemonic == "global_address":
return f"push &global[0x{operands['global_id']:04X}]"
if mnemonic == "ret":
return "return"
if mnemonic in NO_ARG_MNEMONICS.values():
return mnemonic
rendered_operands = format_generic_operands(operands, label_map, body_start)
return f"{mnemonic} {rendered_operands}".rstrip()
def render_script(ir: dict[str, Any]) -> str:
label_map, blocks = build_script_blocks(ir)
slot_name = ir["event"]["event_name_hint"] or f"slot_{ir['event']['slot']:02X}"
lines = [
(
f"script {ir['class']['class_name']}.{slot_name} "
f"(entry={ir['class']['entry_index']}, class_id=0x{ir['class']['class_id']:04X}, slot=0x{ir['event']['slot']:02X})"
),
"{",
f" body_range 0x{ir['event']['derived_body_start']:04X}..0x{ir['event']['derived_body_end']:04X}",
f" raw_event_word 0x{ir['event']['raw_event_entry_word']:04X}",
f" raw_code_offset 0x{ir['event']['raw_code_offset']:08X}",
f" end_reason {ir['body']['end_reason']}",
]
if ir["debug_symbols"]:
lines.append(" locals")
lines.append(" {")
for symbol in ir["debug_symbols"]:
lines.append(
f" {symbol['bp_repr']} {symbol['name']} ; type=0x{symbol['type_id']:02X} ('{symbol['type_char']}') unk1=0x{symbol['unknown1']:02X} unk3=0x{symbol['unknown3']:02X}"
)
lines.append(" }")
lines.append("")
for label, ops in blocks:
lines.append(f" {label}:")
for op in ops:
statement = format_script_statement(op, label_map, ir["event"]["derived_body_start"])
lines.append(f" {statement} ; {op['absolute_body_offset']:04X}: {op['raw_bytes']}")
lines.append("")
if ir["field_tags"]:
lines.append(" field_tags")
lines.append(" {")
for tag in ir["field_tags"]:
lines.append(
f" {tag['bp_repr']} {tag['name']} ; tag=0x{tag['tag_id']:02X} kind=0x{tag['value_kind']:02X}"
)
lines.append(" }")
if ir["body"]["unknown_trailing_bytes"]:
lines.append(f" unknown_trailing_bytes {ir['body']['unknown_trailing_bytes']}")
lines.append("}")
return "\n".join(lines) + "\n"
def sanitize_identifier(name: str) -> str:
cleaned = [char if char.isalnum() or char == "_" else "_" for char in name.strip()]
identifier = "".join(cleaned).strip("_") or "var"
if identifier[0].isdigit():
identifier = f"v_{identifier}"
return identifier
def target_event_display_name(operands: dict[str, Any]) -> str:
class_id = operands["target_class_id"]
slot = operands["target_event_slot"]
return CLASS_EVENT_NAME_HINTS.get((class_id, slot)) or operands.get("target_event_name_hint") or f"slot_{slot:02X}"
def format_target_event_reference(operands: dict[str, Any]) -> str:
class_name_hint = operands.get("target_class_name_hint")
event_name = sanitize_identifier(target_event_display_name(operands))
if class_name_hint:
return f"{sanitize_identifier(class_name_hint)}.{event_name}"
return f"class_{operands['target_class_id']:04X}_{event_name}"
2026-03-25 23:32:13 +01:00
def build_local_name_map(ir: dict[str, Any]) -> dict[int, str]:
return {
symbol["bp_offset"]: sanitize_identifier(symbol["name"])
for symbol in ir["debug_symbols"]
}
def format_bp_name(bp_offset: int, local_name_map: dict[int, str]) -> str:
if bp_offset in local_name_map:
return local_name_map[bp_offset]
disp = signed_byte(bp_offset)
if disp >= 0:
return f"arg_{disp:02X}"
return f"local_{abs(disp):02X}"
def intrinsic_display_name(name_hint: str | None, ordinal: int) -> str:
if not name_hint:
return f"intrinsic_{ordinal:04X}"
display = name_hint.replace("::", ".")
2026-03-26 00:37:17 +01:00
display = re.sub(r"(?<=\.)I_", "", display)
if display.startswith("I_"):
display = display[2:]
2026-03-25 23:32:13 +01:00
paren = display.find("(")
if paren != -1:
display = display[:paren]
return display
def push_expr_from_op(op: dict[str, Any], local_name_map: dict[int, str]) -> tuple[str, int] | None:
mnemonic = op["mnemonic"]
operands = op["operands"]
if mnemonic == "push_byte_immediate":
return (str(operands["value_signed"]), 1)
if mnemonic == "push_word_immediate":
return (f"0x{operands['value_u16']:04X}", 2)
if mnemonic == "push_dword_immediate":
return (f"0x{operands['value_u32']:08X}", 4)
if mnemonic == "push_string_immediate":
return (format_script_string(operands["string"]), max(2, operands["declared_length"]))
if mnemonic in {"push_local_byte", "push_local_word", "push_local_dword", "push_local_string", "push_local_slist", "push_local_addr", "push_string_ptr"}:
return (format_bp_name(operands["bp_offset"], local_name_map), 4 if mnemonic in {"push_local_dword", "push_local_addr", "push_string_ptr"} else 2)
if mnemonic in {"push_member_byte", "push_member_word", "push_member_dword", "push_member_huge"}:
return (f"member.{format_bp_name(operands['bp_offset'], local_name_map)}", 4 if mnemonic in {"push_member_dword", "push_member_huge"} else 2)
if mnemonic == "push_local_list":
return (format_bp_name(operands["bp_offset"], local_name_map), max(2, operands["element_size"]))
if mnemonic == "push_list_element":
return (f"list_element(size=0x{operands['element_size']:X})", max(1, operands["element_size"]))
if mnemonic == "push_huge":
return (f"0x{operands['value_a']:02X}{operands['value_b']:02X}", 4)
if mnemonic == "push_global":
return (f"global[0x{operands['global_id']:04X}]", max(1, operands["size"]))
if mnemonic == "push_pid":
return ("pid", 2)
if mnemonic == "push_process_result":
return ("process_result", 2)
return None
def pop_stack_bytes(stack: list[tuple[str, int]], byte_count: int) -> list[str]:
if byte_count <= 0:
return []
parts: list[str] = []
consumed = 0
while stack and consumed < byte_count:
expr, width = stack.pop()
parts.append(expr)
consumed += max(1, width)
parts.reverse()
return parts
def combine_binary(stack: list[tuple[str, int]], operator: str, result_width: int = 2) -> None:
if len(stack) < 2:
return
right_expr, _ = stack.pop()
left_expr, _ = stack.pop()
stack.append((f"({left_expr} {operator} {right_expr})", result_width))
def evaluate_loop_setup_op(
op: dict[str, Any],
stack: list[tuple[str, int]],
local_name_map: dict[int, str],
) -> bool:
pushed = push_expr_from_op(op, local_name_map)
if pushed is not None:
stack.append(pushed)
return True
mnemonic = op["mnemonic"]
operands = op["operands"]
if mnemonic == "push_indirect":
if stack:
expr, _ = stack.pop()
stack.append((f"*({expr})", max(1, operands["size"])))
return True
if mnemonic in {"add", "add_dword"}:
combine_binary(stack, "+", 4 if mnemonic.endswith("dword") else 2)
return True
if mnemonic in {"sub", "sub_dword"}:
combine_binary(stack, "-", 4 if mnemonic.endswith("dword") else 2)
return True
if mnemonic in {"mul", "mul_dword"}:
combine_binary(stack, "*", 4 if mnemonic.endswith("dword") else 2)
return True
if mnemonic in {"div", "div_dword"}:
combine_binary(stack, "/", 4 if mnemonic.endswith("dword") else 2)
return True
if mnemonic == "line_number":
return True
return False
def normalize_loop_origin(expr: str) -> str:
normalized = expr.strip()
if normalized.startswith("*(") and normalized.endswith(")"):
return normalized[2:-1]
return normalized
def try_decode_loop_selector(
ops: list[dict[str, Any]],
start_index: int,
local_name_map: dict[int, str],
) -> tuple[str, int] | None:
selector_tokens: list[int] = []
selector_stack: list[tuple[str, int]] = []
index = start_index
while index < len(ops):
op = ops[index]
mnemonic = op["mnemonic"]
if mnemonic == "loopscr":
selector_tokens.append(op["operands"]["value_u8"])
index += 1
continue
if mnemonic == "loop":
break
if not evaluate_loop_setup_op(op, selector_stack, local_name_map):
return None
index += 1
if index >= len(ops) or ops[index]["mnemonic"] != "loop":
return None
loop_operands = ops[index]["operands"]
if loop_operands.get("string_bytes") != 0x6 or loop_operands.get("loop_type") != 0x2:
return None
if len(selector_tokens) != 4 or selector_tokens[0] != 0x24 or selector_tokens[1] != 0x3D or selector_tokens[3] != 0x25:
if selector_tokens == [0x24, 0x42] and len(selector_stack) >= 4:
current_var = format_bp_name(loop_operands["current_var"], local_name_map)
2026-04-10 00:45:41 +02:00
selector_call = generic_loop_selector_call(
"selector_0x42",
[
("arg0", selector_stack[-4][0]),
("arg1", selector_stack[-3][0]),
("arg2", selector_stack[-2][0]),
("origin", normalize_loop_origin(selector_stack[-1][0])),
],
)
return (
2026-04-10 00:45:41 +02:00
f"{current_var} in {selector_call}",
index + 1,
)
return None
selector_field = LOOP_SELECTOR_FIELD_HINTS.get(selector_tokens[2])
if selector_field is None or len(selector_stack) < 3:
return None
current_var = format_bp_name(loop_operands["current_var"], local_name_map)
selector_value = selector_stack[-3][0]
origin_expr = normalize_loop_origin(selector_stack[-1][0])
return (
f"{current_var} in nearby_items({selector_field}={selector_value}, origin={origin_expr})",
index + 1,
)
def loop_selector_statement(selector_text: str) -> str:
return f"/* loop_selector {selector_text} */"
2026-03-25 23:32:13 +01:00
def decompile_pseudocode_blocks(ir: dict[str, Any]) -> list[tuple[str, list[str]]]:
label_map, blocks = build_script_blocks(ir)
local_name_map = build_local_name_map(ir)
skip_mnemonics = {"line_number", "symbol_info", "add_sp", "init"}
pending_result: str | None
rendered_blocks: list[tuple[str, list[str]]] = []
for label, ops in blocks:
stack: list[tuple[str, int]] = []
pending_result = None
block_lines: list[str] = []
index = 0
while index < len(ops):
op = ops[index]
mnemonic = op["mnemonic"]
operands = op["operands"]
if mnemonic == "loopscr":
decoded_loop = try_decode_loop_selector(ops, index, local_name_map)
if decoded_loop is not None:
selector_text, next_index = decoded_loop
block_lines.append(loop_selector_statement(selector_text))
stack.clear()
pending_result = None
index = next_index
continue
2026-03-25 23:32:13 +01:00
pushed = push_expr_from_op(op, local_name_map)
if pushed is not None:
stack.append(pushed)
index += 1
continue
if mnemonic in skip_mnemonics:
index += 1
continue
if mnemonic == "push_indirect":
if stack:
expr, _ = stack.pop()
stack.append((f"*({expr})", max(1, operands["size"])))
index += 1
continue
if mnemonic == "set_info":
args = ", ".join(expr for expr, _ in stack) if stack else ""
stack.clear()
block_lines.append(f"set_info({args});")
index += 1
continue
if mnemonic == "process_exclude":
block_lines.append("process_exclude();")
index += 1
continue
if mnemonic == "call_intrinsic":
arg_exprs = pop_stack_bytes(stack, operands["arg_bytes"])
pending_result = f"{intrinsic_display_name(operands.get('intrinsic_name_hint'), operands['intrinsic_ordinal'])}({', '.join(arg_exprs)})"
index += 1
continue
if mnemonic == "push_retval_byte":
stack.append((pending_result or "retval", 1))
pending_result = None
index += 1
continue
if mnemonic == "push_retval_word":
stack.append((pending_result or "retval", 2))
pending_result = None
index += 1
continue
if mnemonic == "push_retval_dword":
stack.append((pending_result or "retval", 4))
pending_result = None
index += 1
continue
if mnemonic == "call_class_event":
arg_text = ", ".join(expr for expr, _ in stack)
stack.clear()
block_lines.append(f"{format_target_event_reference(operands)}({arg_text});")
2026-03-25 23:32:13 +01:00
pending_result = None
index += 1
continue
if mnemonic == "spawn":
arg_text = ", ".join(expr for expr, _ in stack)
stack.clear()
block_lines.append(f"spawn {format_target_event_reference(operands)}({arg_text});")
2026-03-25 23:32:13 +01:00
pending_result = None
index += 1
continue
if mnemonic == "spawn_inline":
arg_text = ", ".join(expr for expr, _ in stack)
stack.clear()
block_lines.append(
f"spawn_inline {format_target_event_reference(operands)}({arg_text}) /* inline=0x{operands['inline_offset']:04X} */;"
2026-03-25 23:32:13 +01:00
)
pending_result = None
index += 1
continue
if mnemonic in {"add", "add_dword"}:
combine_binary(stack, "+", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic in {"sub", "sub_dword"}:
combine_binary(stack, "-", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic in {"mul", "mul_dword"}:
combine_binary(stack, "*", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic in {"div", "div_dword"}:
combine_binary(stack, "/", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic == "bit_and":
combine_binary(stack, "&")
index += 1
continue
if mnemonic == "bit_or":
combine_binary(stack, "|")
index += 1
continue
if mnemonic == "and":
combine_binary(stack, "&&")
index += 1
continue
if mnemonic == "or":
combine_binary(stack, "||")
index += 1
continue
if mnemonic == "cmp":
combine_binary(stack, "!=")
index += 1
continue
if mnemonic == "ne":
combine_binary(stack, "!=")
index += 1
continue
if mnemonic == "lt":
combine_binary(stack, "<")
index += 1
continue
if mnemonic == "le":
combine_binary(stack, "<=")
index += 1
continue
if mnemonic == "gt":
combine_binary(stack, ">")
index += 1
continue
if mnemonic == "ge":
combine_binary(stack, ">=")
index += 1
continue
if mnemonic == "not":
if stack:
expr, width = stack.pop()
stack.append((f"(!{expr})", width))
index += 1
continue
if mnemonic == "implies":
expr = stack.pop()[0] if stack else "retval"
stack.append((f"implies({expr}, 0x{operands['arg0']:X}, 0x{operands['arg1']:X})", 1))
index += 1
continue
if mnemonic == "pop_temp":
if stack:
stack.pop()
index += 1
continue
if mnemonic == "suspend":
block_lines.append("suspend;")
stack.clear()
index += 1
continue
if mnemonic == "jne":
target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}")
condition = stack.pop()[0] if stack else "condition"
block_lines.append(f"if {condition} goto {target};")
index += 1
continue
if mnemonic == "jmp":
target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}")
block_lines.append(f"goto {target};")
stack.clear()
index += 1
continue
if mnemonic in {"foreach_list", "foreach_slist"}:
target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}")
block_lines.append(
f"{mnemonic} {format_bp_name(operands['bp_offset'], local_name_map)} -> {target};"
)
index += 1
continue
if mnemonic == "ret":
block_lines.append("return;")
stack.clear()
break
if mnemonic.startswith("pop_local_") or mnemonic.startswith("pop_member_"):
if stack:
expr, _ = stack.pop()
else:
expr = "value"
target_name = format_bp_name(operands["bp_offset"], local_name_map)
block_lines.append(f"{target_name} = {expr};")
index += 1
continue
rendered_operands = format_generic_operands(operands, label_map, ir["event"]["derived_body_start"])
block_lines.append(f"/* {mnemonic} {rendered_operands} */")
index += 1
rendered_blocks.append((label, block_lines))
return rendered_blocks
@dataclass(frozen=True)
class TerminalStatement:
kind: str
condition: str | None = None
target: str | None = None
def parse_terminal_statement(statement: str) -> TerminalStatement | None:
if statement == "return;":
return TerminalStatement("return")
goto_match = re.fullmatch(r"goto ([A-Za-z0-9_]+);", statement)
if goto_match is not None:
return TerminalStatement("goto", target=goto_match.group(1))
if_match = re.fullmatch(r"if (.+) goto ([A-Za-z0-9_]+);", statement)
if if_match is not None:
return TerminalStatement("if", condition=if_match.group(1), target=if_match.group(2))
return None
def strip_outer_parens(expr: str) -> str:
text = expr.strip()
while text.startswith("(") and text.endswith(")"):
depth = 0
balanced = True
for index, char in enumerate(text):
if char == "(":
depth += 1
elif char == ")":
depth -= 1
if depth == 0 and index != len(text) - 1:
balanced = False
break
if depth < 0:
balanced = False
break
if not balanced or depth != 0:
break
text = text[1:-1].strip()
return text
def invert_condition_text(condition: str) -> str:
expr = strip_outer_parens(condition)
comparisons = {
" != ": " == ",
" == ": " != ",
" <= ": " > ",
" >= ": " < ",
" < ": " >= ",
" > ": " <= ",
}
for source, replacement in comparisons.items():
if source in expr:
return expr.replace(source, replacement, 1)
if expr.startswith("!"):
return strip_outer_parens(expr[1:])
if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_:.]*(\(.*\))?", expr):
return f"!{expr}"
return f"!({expr})"
def indent_lines(lines: list[str], prefix: str = " ") -> list[str]:
return [f"{prefix}{line}" if line else "" for line in lines]
def detect_noop_compare_chain(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
start_index: int,
end_index: int,
) -> int | None:
cursor = start_index
common_target: str | None = None
while cursor + 1 < end_index:
_, compare_statements = blocks[cursor]
_, goto_statements = blocks[cursor + 1]
if len(compare_statements) != 1 or len(goto_statements) != 1:
return None
compare_terminal = parse_terminal_statement(compare_statements[0])
goto_terminal = parse_terminal_statement(goto_statements[0])
if compare_terminal is None or compare_terminal.kind != "if":
return None
if goto_terminal is None or goto_terminal.kind != "goto":
return None
if common_target is None:
common_target = goto_terminal.target
elif goto_terminal.target != common_target:
return None
if compare_terminal.target == common_target:
body_index = label_to_index.get(common_target or "")
if body_index is None or body_index != cursor + 2 or body_index >= end_index:
return None
return body_index
next_index = label_to_index.get(compare_terminal.target or "")
if next_index is None or next_index != cursor + 2 or next_index >= end_index:
return None
cursor += 2
return None
2026-03-26 00:37:17 +01:00
def last_nonempty_block_index(
blocks: list[tuple[str, list[str]]],
start_index: int,
end_index: int,
) -> int | None:
for index in range(end_index - 1, start_index - 1, -1):
if blocks[index][1]:
return index
return None
def parse_selector_condition(condition: str) -> tuple[str, str] | None:
expr = strip_outer_parens(condition)
match = re.fullmatch(r"(.+?)\s*!=\s*(.+)", expr)
if match is None:
return None
return match.group(1).strip(), match.group(2).strip()
def parse_loop_selector_statement(statement: str) -> str | None:
match = re.fullmatch(r"/\* loop_selector (.+) \*/", statement)
if match is None:
return None
return match.group(1)
def is_loop_selector_only_block(statements: list[str]) -> bool:
return len(statements) == 1 and parse_loop_selector_statement(statements[0]) is not None
2026-03-26 00:37:17 +01:00
def render_selector_chain(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
start_index: int,
end_index: int,
return_labels: set[str],
active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None,
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None,
2026-03-26 00:37:17 +01:00
) -> tuple[list[str], int] | None:
if not blocks[start_index][1]:
return None
base_terminal = parse_terminal_statement(blocks[start_index][1][-1])
if base_terminal is None or base_terminal.kind != "if":
return None
selector = parse_selector_condition(base_terminal.condition or "")
if selector is None:
return None
selector_expr, _ = selector
cursor = start_index
join_label: str | None = None
branches: list[tuple[str, list[str]]] = []
while cursor < end_index:
_, statements = blocks[cursor]
if not statements:
return None
terminal = parse_terminal_statement(statements[-1])
if terminal is None or terminal.kind != "if":
return None
parsed = parse_selector_condition(terminal.condition or "")
if parsed is None or parsed[0] != selector_expr:
return None
target_label = terminal.target or ""
target_index = label_to_index.get(target_label)
if target_index is None or target_index <= cursor + 1 or target_index > end_index:
return None
body_tail_index = last_nonempty_block_index(blocks, cursor + 1, target_index)
if body_tail_index is None:
return None
body_tail_terminal = parse_terminal_statement(blocks[body_tail_index][1][-1])
if body_tail_terminal is None or body_tail_terminal.kind != "goto":
return None
current_join = body_tail_terminal.target or ""
current_join_index = label_to_index.get(current_join)
if current_join_index is None or current_join_index > end_index:
return None
if current_join_index < target_index:
return None
if current_join_index == target_index and target_label != current_join:
return None
if join_label is None:
join_label = current_join
elif current_join != join_label:
return None
body_result = render_structured_region(
blocks,
label_to_index,
cursor + 1,
target_index,
return_labels,
{join_label},
active_regions,
render_cache,
2026-03-26 00:37:17 +01:00
)
if body_result is None:
return None
body_lines, _ = body_result
branches.append((invert_condition_text(terminal.condition or "condition"), body_lines))
if target_label == join_label:
break
cursor = target_index
if join_label is None:
return None
rendered: list[str] = []
for index, (condition, body_lines) in enumerate(branches):
branch_head = "if" if index == 0 else "else if"
rendered.append(f"{branch_head} ({condition}) {{")
rendered.extend(indent_lines(body_lines))
rendered.append("}")
return rendered, label_to_index[join_label]
2026-03-26 23:12:38 +01:00
def render_loop_construct(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
index: int,
end_index: int,
return_labels: set[str],
active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None,
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None,
) -> tuple[list[str], int] | None:
_, statements = blocks[index]
if not statements:
return None
terminal = parse_terminal_statement(statements[-1])
if terminal is None or terminal.kind != "if":
return None
target_label = terminal.target or ""
target_index = label_to_index.get(target_label)
if target_index is None or target_index <= index or target_index > end_index:
return None
loop_tail_index = last_nonempty_block_index(blocks, index + 1, target_index)
if loop_tail_index is None:
return None
loop_tail_terminal = parse_terminal_statement(blocks[loop_tail_index][1][-1])
if loop_tail_terminal is None or loop_tail_terminal.kind != "goto" or loop_tail_terminal.target != blocks[index][0]:
return None
loop_body = render_structured_region(
blocks,
label_to_index,
index + 1,
target_index,
return_labels,
{blocks[index][0]},
active_regions,
render_cache,
)
if loop_body is None:
return None
loop_lines, _ = loop_body
loop_selector = None
if index > 0 and is_loop_selector_only_block(blocks[index - 1][1]):
loop_selector = parse_loop_selector_statement(blocks[index - 1][1][0])
rendered: list[str] = []
if loop_selector is not None:
rendered.append(f"for {loop_selector} {{")
else:
rendered.append(f"while ({invert_condition_text(terminal.condition or 'condition')}) {{")
rendered.extend(indent_lines(loop_lines))
rendered.append("}")
return rendered, target_index
def render_infinite_loop_construct(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
index: int,
end_index: int,
return_labels: set[str],
active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None,
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None,
) -> tuple[list[str], int] | None:
if index + 1 >= end_index:
return None
loop_label = blocks[index][0]
loop_tail_index: int | None = None
for candidate in range(end_index - 1, index, -1):
statements = blocks[candidate][1]
if not statements:
continue
terminal = parse_terminal_statement(statements[-1])
if terminal is not None and terminal.kind == "goto" and terminal.target == loop_label:
loop_tail_index = candidate
break
if loop_tail_index is None:
return None
loop_body = render_structured_region(
blocks,
label_to_index,
index,
loop_tail_index + 1,
return_labels,
{loop_label},
active_regions,
render_cache,
)
if loop_body is None:
return None
loop_lines, _ = loop_body
rendered = ["while (true) {"]
rendered.extend(indent_lines(loop_lines))
rendered.append("}")
return rendered, loop_tail_index + 1
2026-03-25 23:32:13 +01:00
def render_structured_region(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
start_index: int,
end_index: int,
return_labels: set[str],
2026-03-26 00:37:17 +01:00
exit_labels: set[str] | None = None,
active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None,
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None,
2026-03-25 23:32:13 +01:00
) -> tuple[list[str], bool] | None:
region_key = (start_index, end_index, tuple(sorted(exit_labels or ())))
if render_cache is not None and region_key in render_cache:
return render_cache[region_key]
if active_regions is None:
active_regions = set()
elif region_key in active_regions:
return None
active_regions = set(active_regions)
active_regions.add(region_key)
2026-03-26 00:37:17 +01:00
allowed_exit_labels = set(exit_labels or ())
2026-03-25 23:32:13 +01:00
lines: list[str] = []
index = start_index
while index < end_index:
skipped_index = detect_noop_compare_chain(blocks, label_to_index, index, end_index)
if skipped_index is not None:
index = skipped_index
continue
_, statements = blocks[index]
if not statements:
index += 1
continue
if is_loop_selector_only_block(statements):
index += 1
continue
2026-03-25 23:32:13 +01:00
terminal = parse_terminal_statement(statements[-1])
if terminal is None:
lines.extend(statements)
index += 1
continue
lines.extend(statements[:-1])
if terminal.kind == "return":
lines.append("return;")
return lines, False
if terminal.kind == "goto":
target_label = terminal.target or ""
target_index = label_to_index.get(target_label)
if target_label in return_labels:
lines.append("return;")
return lines, False
2026-03-26 00:37:17 +01:00
if target_label in allowed_exit_labels:
return lines, False
2026-03-25 23:32:13 +01:00
if target_index is None:
return None
if target_index == index + 1:
index += 1
continue
if index < target_index < end_index:
index = target_index
continue
return None
target_label = terminal.target or ""
target_index = label_to_index.get(target_label)
if target_index is None or target_index <= index or target_index > end_index:
return None
if target_index == index + 1:
index += 1
continue
selector_chain = render_selector_chain(
blocks,
label_to_index,
index,
end_index,
return_labels,
active_regions,
render_cache,
)
2026-03-26 00:37:17 +01:00
if selector_chain is not None:
selector_lines, selector_join_index = selector_chain
lines.extend(selector_lines)
index = selector_join_index
continue
2026-03-26 23:12:38 +01:00
loop_construct = render_loop_construct(
blocks,
label_to_index,
index,
end_index,
return_labels,
active_regions,
render_cache,
)
if loop_construct is not None:
loop_lines, loop_join_index = loop_construct
lines.extend(loop_lines)
index = loop_join_index
continue
2026-03-26 00:37:17 +01:00
true_tail_index = last_nonempty_block_index(blocks, index + 1, target_index)
if true_tail_index is not None:
true_tail_terminal = parse_terminal_statement(blocks[true_tail_index][1][-1])
if true_tail_terminal is not None and true_tail_terminal.kind == "goto":
join_label = true_tail_terminal.target or ""
join_index = label_to_index.get(join_label)
if join_index is not None and join_index > target_index and join_index <= end_index:
true_result = render_structured_region(
blocks,
label_to_index,
index + 1,
target_index,
return_labels,
{join_label},
active_regions,
render_cache,
2026-03-26 00:37:17 +01:00
)
false_result = render_structured_region(
blocks,
label_to_index,
target_index,
join_index,
return_labels,
{join_label},
active_regions,
render_cache,
2026-03-26 00:37:17 +01:00
)
if true_result is not None and false_result is not None:
true_lines, _ = true_result
false_lines, _ = false_result
lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{")
lines.extend(indent_lines(true_lines))
lines.append("}")
if false_lines:
if false_lines[0].startswith("if "):
lines.append(f"else {false_lines[0]}")
lines.extend(false_lines[1:])
else:
lines.append("else {")
lines.extend(indent_lines(false_lines))
lines.append("}")
index = join_index
continue
inner_result = render_structured_region(
blocks,
label_to_index,
index + 1,
target_index,
return_labels,
None,
active_regions,
render_cache,
)
2026-03-25 23:32:13 +01:00
if inner_result is None:
if render_cache is not None:
render_cache[region_key] = None
2026-03-25 23:32:13 +01:00
return None
inner_lines, inner_falls_through = inner_result
if inner_lines:
lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{")
lines.extend(indent_lines(inner_lines))
lines.append("}")
elif not inner_falls_through:
lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{")
lines.append("}")
index = target_index
result = (lines, True)
if render_cache is not None:
render_cache[region_key] = result
return result
2026-03-25 23:32:13 +01:00
def render_structured_pseudocode(blocks: list[tuple[str, list[str]]]) -> list[str] | None:
if not blocks:
return []
label_to_index = {label: index for index, (label, _) in enumerate(blocks)}
return_labels = {
label
for label, statements in blocks
if len(statements) == 1 and statements[0] == "return;"
}
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] = {}
structured = render_structured_region(blocks, label_to_index, 0, len(blocks), return_labels, None, None, render_cache)
2026-03-25 23:32:13 +01:00
if structured is None:
return None
return structured[0]
2026-03-26 00:37:17 +01:00
def render_partially_structured_blocks(blocks: list[tuple[str, list[str]]]) -> list[str]:
if not blocks:
return []
label_to_index = {label: index for index, (label, _) in enumerate(blocks)}
return_labels = {
label
for label, statements in blocks
if len(statements) == 1 and statements[0] == "return;"
}
lines: list[str] = []
index = 0
while index < len(blocks):
label, statements = blocks[index]
if is_loop_selector_only_block(statements):
loop_selector = parse_loop_selector_statement(statements[0])
if loop_selector is not None and index + 1 < len(blocks):
next_label, next_statements = blocks[index + 1]
next_terminal = parse_terminal_statement(next_statements[-1]) if next_statements else None
if next_terminal is not None and next_terminal.kind == "if":
target_index = label_to_index.get(next_terminal.target or "")
if target_index is not None and target_index > index + 1:
loop_tail_index = last_nonempty_block_index(blocks, index + 2, target_index)
if loop_tail_index is not None:
loop_tail_terminal = parse_terminal_statement(blocks[loop_tail_index][1][-1])
if loop_tail_terminal is not None and loop_tail_terminal.kind == "goto" and loop_tail_terminal.target == next_label:
loop_body = render_structured_region(
blocks,
label_to_index,
index + 2,
target_index,
return_labels,
{next_label},
)
if loop_body is not None:
loop_lines, _ = loop_body
lines.append(f" {label}:")
lines.append(f" for {loop_selector} {{")
lines.extend(f" {line}" for line in indent_lines(loop_lines))
lines.append(" }")
lines.append("")
index = target_index
continue
lines.append(f" {label}:")
lines.append(f" {statements[0]}")
lines.append("")
index += 1
continue
2026-03-26 00:37:17 +01:00
selector_chain = render_selector_chain(blocks, label_to_index, index, len(blocks), return_labels)
if selector_chain is not None:
selector_lines, selector_join_index = selector_chain
lines.append(f" {label}:")
for statement in selector_lines:
lines.append(f" {statement}" if statement else "")
lines.append("")
index = selector_join_index
continue
2026-03-26 23:12:38 +01:00
loop_construct = render_loop_construct(
blocks,
label_to_index,
index,
len(blocks),
return_labels,
)
if loop_construct is not None:
loop_lines, loop_join_index = loop_construct
lines.append(f" {label}:")
for statement in loop_lines:
lines.append(f" {statement}" if statement else "")
lines.append("")
index = loop_join_index
continue
infinite_loop_construct = render_infinite_loop_construct(
blocks,
label_to_index,
index,
len(blocks),
return_labels,
)
if infinite_loop_construct is not None:
loop_lines, loop_join_index = infinite_loop_construct
lines.append(f" {label}:")
for statement in loop_lines:
lines.append(f" {statement}" if statement else "")
lines.append("")
index = loop_join_index
continue
2026-03-26 00:37:17 +01:00
lines.append(f" {label}:")
for statement in statements:
lines.append(f" {statement}")
lines.append("")
index += 1
return lines
def render_pseudocode(ir: dict[str, Any], shape_catalog: ShapeCatalog | None = None) -> str:
2026-03-25 23:32:13 +01:00
slot_name = sanitize_identifier(ir["event"]["event_name_hint"] or f"slot_{ir['event']['slot']:02X}")
lines = [
(
f"function {sanitize_identifier(ir['class']['class_name'].lower())}_{slot_name}() "
f"/* entry={ir['class']['entry_index']} class_id=0x{ir['class']['class_id']:04X} slot=0x{ir['event']['slot']:02X} */"
),
"{",
]
if ir["debug_symbols"]:
lines.append(" var")
for index, symbol in enumerate(ir["debug_symbols"]):
separator = "," if index + 1 < len(ir["debug_symbols"]) else ";"
lines.append(f" {sanitize_identifier(symbol['name'])}{separator} /* {symbol['bp_repr']} type=0x{symbol['type_id']:02X} */")
lines.append("")
rendered_blocks = decompile_pseudocode_blocks(ir)
structured_lines = render_structured_pseudocode(rendered_blocks)
if structured_lines is not None:
for statement in structured_lines:
lines.append(f" {statement}" if statement else "")
else:
2026-03-26 00:37:17 +01:00
lines.extend(render_partially_structured_blocks(rendered_blocks))
2026-03-25 23:32:13 +01:00
2026-04-07 17:16:44 +02:00
if ir["debug_symbols"] or ir["field_tags"]:
lines.append("")
lines.append(" /* post-return metadata (not executable):")
for symbol in ir["debug_symbols"]:
lines.append(
f" debug_symbol {sanitize_identifier(symbol['name'])} {symbol['bp_repr']} type=0x{symbol['type_id']:02X} unk1=0x{symbol['unknown1']:02X} unk3=0x{symbol['unknown3']:02X}"
)
for tag in ir["field_tags"]:
lines.append(f" field_tag {tag['tag_label']} ({tag['bp_repr']})")
lines.append(" */")
2026-03-25 23:32:13 +01:00
lines.append("}")
return apply_shape_catalog_to_pseudocode("\n".join(lines) + "\n", shape_catalog)
2026-03-25 23:32:13 +01:00
2026-03-26 23:12:38 +01:00
def validate_pseudocode_text(text: str) -> list[str]:
errors: list[str] = []
label_lines: dict[str, int] = {}
goto_targets: list[tuple[str, int]] = []
brace_depth = 0
for line_number, raw_line in enumerate(text.splitlines(), start=1):
stripped = raw_line.strip()
if not stripped:
continue
if stripped.endswith("{"):
brace_depth += 1
if stripped == "}":
brace_depth -= 1
if brace_depth < 0:
errors.append(f"line {line_number}: unexpected closing brace")
brace_depth = 0
label_match = re.fullmatch(r"([A-Za-z_][A-Za-z0-9_]*):", stripped)
if label_match is not None:
label = label_match.group(1)
previous_line = label_lines.get(label)
if previous_line is not None:
errors.append(f"line {line_number}: duplicate label {label} (first at line {previous_line})")
else:
label_lines[label] = line_number
for match in re.finditer(r"\bgoto ([A-Za-z_][A-Za-z0-9_]*)\s*;", stripped):
goto_targets.append((match.group(1), line_number))
if brace_depth != 0:
errors.append(f"unbalanced braces: final depth {brace_depth}")
for target, line_number in goto_targets:
if target not in label_lines:
errors.append(f"line {line_number}: goto target {target} has no label")
return errors
def render_text(ir: dict[str, Any]) -> str:
2026-03-25 23:32:13 +01:00
labels = build_listing_labels(ir)
def format_operand(key: str, value: Any) -> str:
if value is None or value == "":
return ""
if key == "intrinsic_name_hint" and value:
return f"hint={value}"
if key == "target_event_name_hint" and value:
return f"event={value}"
if key == "target_offset" and isinstance(value, int):
2026-04-10 00:45:41 +02:00
target_absolute = value + ir["event"]["derived_body_start"]
label = labels.get(target_absolute)
return f"->{label or f'0x{target_absolute:04X}'}"
2026-03-25 23:32:13 +01:00
if isinstance(value, int):
if key.endswith("_signed"):
return f"{key}={value}"
return f"{key}=0x{value:X}"
return f"{key}={value}"
lines = [
f"Class {ir['class']['class_name']} entry={ir['class']['entry_index']} class_id=0x{ir['class']['class_id']:X}",
2026-03-25 23:32:13 +01:00
f"Slot 0x{ir['event']['slot']:02X} hint={ir['event']['event_name_hint']} raw_word=0x{ir['event']['raw_event_entry_word']:04X} raw_code_off=0x{ir['event']['raw_code_offset']:08X}",
f"Body 0x{ir['event']['derived_body_start']:04X}..0x{ir['event']['derived_body_end']:04X} len={ir['event']['derived_body_length']} end={ir['body']['end_reason']} ops={ir['body']['decoded_op_count']}",
f"SHA1 {ir['body']['raw_body_sha1']}",
"",
]
for op in ir["ops"]:
2026-03-25 23:32:13 +01:00
absolute_offset = op["absolute_body_offset"]
label = labels.get(absolute_offset)
if label is not None:
lines.extend(["", f"{label}:"])
operand_items = [formatted for key, value in op["operands"].items() if (formatted := format_operand(key, value))]
lines.append(f"{absolute_offset:04X}: {op['opcode']:02X} {op['mnemonic']:<24} {' '.join(operand_items)} raw={op['raw_bytes']}")
if ir["debug_symbols"]:
lines.extend(["", f"Debug symbols @ 0x{ir['body']['debug_symbol_offset']:04X}:"])
for symbol in ir["debug_symbols"]:
lines.append(
f" {symbol['index']:02X}: unk1=0x{symbol['unknown1']:02X} type=0x{symbol['type_id']:02X} ('{symbol['type_char']}') {symbol['bp_repr']} unk3=0x{symbol['unknown3']:02X} name={symbol['name']}"
)
if ir["field_tags"]:
lines.extend(["", "Field tags:"])
for tag in ir["field_tags"]:
lines.append(
f" {tag['tag_label']} ({tag['bp_repr']})"
)
if ir["body"]["unknown_trailing_bytes"]:
lines.extend(["", f"unknown_trailing_bytes={ir['body']['unknown_trailing_bytes']}"])
return "\n".join(lines) + "\n"
def main() -> None:
parser = argparse.ArgumentParser(description="Proof-of-concept Crusader USECODE parser over extracted owner-loaded artifacts")
parser.add_argument("--class", dest="class_name", required=True, help="Class name from class_event_index.tsv, for example NPCTRIG")
parser.add_argument("--slot", required=True, help="Event slot, for example 0x0A")
2026-03-26 00:37:17 +01:00
parser.add_argument("--extracted-root", default=str(EXTRACTED_ROOT), help="Extracted USECODE root containing class_event_index.tsv and chunks/")
parser.add_argument("--variant", choices=["auto", "regret", "remorse"], default="auto", help="Crusader intrinsic numbering to apply (default: auto, fallback regret)")
parser.add_argument(
"--shape-csv",
help=(
"Shape catalog CSV to apply to pseudocode output "
"(default: Remorse uses <extracted-root>/usecode_shape_catalog_remorse.csv; "
"Regret uses <extracted-root>/usecode_shape_catalog_regret.csv)"
),
)
parser.add_argument("--output", help="Write IR JSON to this file instead of stdout")
parser.add_argument("--emit-text", action="store_true", help="Emit a readable text listing beside the JSON")
parser.add_argument("--text-output", help="Write the text listing to this file")
2026-03-25 23:32:13 +01:00
parser.add_argument("--emit-script", action="store_true", help="Emit a decompiled script-style view beside the JSON")
parser.add_argument("--script-output", help="Write the script-style decompilation to this file")
parser.add_argument("--emit-pseudocode", action="store_true", help="Emit a higher-level pseudocode view beside the JSON")
parser.add_argument("--pseudocode-output", help="Write the pseudocode view to this file")
parser.add_argument("--family-diff", action="store_true", help="Emit repeated-body family diff report instead of (or alongside) the IR")
parser.add_argument("--family-diff-output", help="Write the family diff JSON to this file")
parser.add_argument("--family-diff-text-output", help="Write the family diff text report to this file")
args = parser.parse_args()
slot = parse_int(args.slot)
2026-03-26 00:37:17 +01:00
extracted_root = Path(args.extracted_root)
shape_csv = Path(args.shape_csv) if args.shape_csv else default_shape_catalog_path(extracted_root, args.variant)
shape_catalog = load_shape_catalog(shape_csv)
2026-03-26 00:37:17 +01:00
event_row, layout_row = select_rows(args.class_name, slot, extracted_root)
ir = parse_body_ir(event_row, layout_row, None if args.variant == "auto" else args.variant, extracted_root)
rendered_json = json.dumps(ir, indent=2)
if args.output:
Path(args.output).write_text(rendered_json + "\n", encoding="utf-8")
else:
print(rendered_json)
if args.emit_text:
rendered_text = render_text(ir)
if args.text_output:
Path(args.text_output).write_text(rendered_text, encoding="utf-8")
else:
print(rendered_text)
2026-03-25 23:32:13 +01:00
if args.emit_script:
rendered_script = render_script(ir)
if args.script_output:
Path(args.script_output).write_text(rendered_script, encoding="utf-8")
else:
print(rendered_script)
if args.emit_pseudocode:
rendered_pseudocode = render_pseudocode(ir, shape_catalog=shape_catalog)
2026-03-25 23:32:13 +01:00
if args.pseudocode_output:
Path(args.pseudocode_output).write_text(rendered_pseudocode, encoding="utf-8")
else:
print(rendered_pseudocode)
if args.family_diff:
2026-03-26 00:37:17 +01:00
diff = compute_family_diff(args.class_name, slot, extracted_root)
diff_json = json.dumps(diff, indent=2)
if args.family_diff_output:
Path(args.family_diff_output).write_text(diff_json + "\n", encoding="utf-8")
else:
print(diff_json)
diff_text = render_family_diff_text(diff)
if args.family_diff_text_output:
Path(args.family_diff_text_output).write_text(diff_text, encoding="utf-8")
else:
print(diff_text)
if __name__ == "__main__":
main()