from __future__ import annotations import argparse import ast import csv import hashlib import json import re from functools import lru_cache from dataclasses import dataclass from pathlib import Path from typing import Any REPO_ROOT = Path(__file__).resolve().parents[1] EXTRACTED_ROOT = REPO_ROOT / "USECODE" / "EUSECODE_extracted" CLASS_EVENT_INDEX = EXTRACTED_ROOT / "class_event_index.tsv" CLASS_LAYOUT_INDEX = EXTRACTED_ROOT / "class_layout_index.tsv" RUNTIME_VM_IR_INDEX = EXTRACTED_ROOT / "runtime_vm_ir.tsv" CHUNKS_DIR = EXTRACTED_ROOT / "chunks" UNKCOFFS_DIR = REPO_ROOT / "tools" / "unkcoffs" DEFAULT_GAME_VARIANT = "regret" INTRINSIC_HINT_PATHS = { "regret": UNKCOFFS_DIR / "regret_ints.py", "remorse": UNKCOFFS_DIR / "remorse_ints.py", } def resolve_extracted_root(extracted_root: Path | str | None = None) -> Path: if extracted_root is None: return EXTRACTED_ROOT return Path(extracted_root) def extracted_root_paths(extracted_root: Path | str | None = None) -> tuple[Path, Path, Path, Path]: root = resolve_extracted_root(extracted_root) return ( root / "class_event_index.tsv", root / "class_layout_index.tsv", root / "runtime_vm_ir.tsv", root / "chunks", ) def repo_relative_path(path: Path) -> str: try: return str(path.relative_to(REPO_ROOT)).replace("\\", "/") except ValueError: return str(path).replace("\\", "/") def infer_flex_path(extracted_root: Path | str | None = None) -> str: root = resolve_extracted_root(extracted_root) parent = root.parent if parent == REPO_ROOT: return "EUSECODE.FLX" return f"{repo_relative_path(parent)}/EUSECODE.FLX" EVENT_NAME_HINTS = { 0x00: "look", 0x01: "use", 0x02: "anim", 0x03: "setActivity", 0x04: "cachein", 0x05: "hit", 0x06: "gotHit", 0x07: "hatch", 0x08: "schedule", 0x09: "release", 0x0A: "equip", 0x0B: "unequip", 0x0C: "combine", 0x0D: "func0D", 0x0E: "calledFromAnim", 0x0F: "enterFastArea", 0x10: "leaveFastArea", 0x11: "cast", 0x12: "justMoved", 0x13: "avatarStoleSomething", 0x14: "animGetHit", 0x15: "func15", 0x16: "func16", 0x17: "func17", 0x18: "func18", 0x19: "func19", 0x1A: "func1A", 0x1B: "func1B", 0x1C: "func1C", 0x1D: "func1D", 0x1E: "func1E", 0x1F: "func1F", } # Intrinsic table extracted from Pentagram ConvertUsecodeCrusader.h # Source note: "current discovered intrinsics are for regret1.21 only" # This is used as a hint only – ordinal mapping may differ between builds. BASE_INTRINSIC_HINTS: dict[int, str] = { 0x0000: "Intrinsic0000()", 0x0001: "Item::getFrame(void)", 0x0002: "Item::setFrame(uint16)", 0x0003: "Item::getMapNum(void)", 0x0004: "Item::getStatus(void)", 0x0005: "Item::orStatus(sint16)", 0x0006: "Item::callEvent0A(sint16)", 0x0007: "Intrinsic0007()", 0x0008: "Item::isNpc(void)", 0x0009: "Item::getZ(void)", 0x000A: "Intrinsic000A()", 0x000B: "Item::getQLo(void)", 0x000C: "Item::destroy(void)", 0x000D: "Intrinsic000D()", 0x000E: "Item::getX(void)", 0x000F: "Item::getY(void)", 0x0010: "Intrinsic0010()", 0x0011: "Item::getType(void)", 0x0012: "Intrinsic0012()", 0x0013: "Intrinsic0013()", 0x0014: "Item::legal_create(uint16,uint16,uint16,uint16,uint16)", 0x0015: "Item::andStatus(void)", 0x0016: "Intrinsic0016()", 0x0017: "Intrinsic00C3()", 0x0018: "Intrinsic00DA()", 0x0019: "Intrinsic0019()", 0x001A: "Item::create(uint16,uint16)", 0x001B: "Item::pop(uint16,uint16,uint8)", 0x001C: "Intrinsic00FA()", 0x001D: "Item::push(void)", 0x001E: "Intrinsic001E()", 0x001F: "Item::getQLo(void)", 0x0020: "Item::setQLo(sint16)", 0x0021: "Item::getQHi(void)", 0x0022: "Item::setQHi(sint16)", 0x0023: "Intrinsic0023()", 0x0024: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0025: "Item::getCY(void)", 0x0026: "Item::getCX(void)", 0x0027: "Intrinsic0027()", 0x0028: "Item::setNpcNum(sint16)", 0x0029: "Intrinsic0029()", 0x002A: "Intrinsic002A()", 0x002B: "Item::pop(void)", 0x002C: "Intrinsic002C()", 0x002D: "Item::isCompletelyOn(uint16)", 0x002E: "Item::pop(uint16)", 0x002F: "Intrinsic002F()", 0x0030: "Intrinsic0030()", 0x0031: "Item::getFamily(void)", 0x0032: "Item::destroyContents(void)", 0x0033: "Intrinsic0033()", 0x0034: "Item::getDirToItem(uint16)", 0x0035: "Intrinsic0035()", 0x0036: "Intrinsic0036()", 0x0037: "Intrinsic0037()", 0x0038: "Item::andStatus(void)", 0x0039: "Kernel::resetRef(uint16,ProcessType)", 0x003A: "Item::touch(void)", 0x003B: "Egg::getEggId(void)", 0x003C: "Intrinsic003C()", 0x003D: "Intrinsic003D()", 0x003E: "Item::callEvent11(sint16)", 0x003F: "Intrinsic003F()", 0x0040: "Intrinsic0040()", 0x0041: "Item::isOn(uint16)", 0x0042: "Item::getQHi(void)", 0x0043: "Item::isOn(uint16)", 0x0044: "Item::getQHi(void)", 0x0045: "Item::isOn(uint16)", 0x0046: "Item::getQHi(void)", 0x0047: "Item::isOn(uint16)", 0x0048: "Item::getQHi(void)", 0x0049: "Item::isOn(uint16)", 0x004A: "Item::getQHi(void)", 0x004B: "Item::isOn(uint16)", 0x004C: "Item::getQHi(void)", 0x004D: "Intrinsic004D()", 0x004E: "Npc::isDead(void)", 0x004F: "Intrinsic009C()", 0x0050: "Intrinsic0050()", 0x0051: "Intrinsic0051()", 0x0052: "Intrinsic0052()", 0x0053: "Intrinsic00BD()", 0x0054: "Intrinsic0054()", 0x0055: "Intrinsic0055()", 0x0056: "Intrinsic0056()", 0x0057: "Intrinsic0057()", 0x0058: "Item::use(void)", 0x0059: "Item::setQuantity(sint16)", 0x005A: "Intrinsic005A()", 0x005B: "Item::getSurfaceWeight(void)", 0x005C: "Intrinsic005C()", 0x005D: "Item::setFrame(uint16)", 0x005E: "Intrinsic00DA()", 0x005F: "Intrinsic005F()", 0x0060: "Intrinsic0060()", 0x0061: "Intrinsic0061()", 0x0062: "Intrinsic0062()", 0x0063: "Item::legal_create(uint16,uint16,WorldPoint&)", 0x0064: "Item::getPoint(WorldPoint&)", 0x0065: "Item::legal_move(WorldPoint&,uint16,uint16)", 0x0066: "Item::fall(void)", 0x0067: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0068: "Kernel::getNumProcesses(uint16,ProcessType)", 0x0069: "Item::getCY(void)", 0x006A: "Intrinsic006A()", 0x006B: "Intrinsic006B()", 0x006C: "Intrinsic006C()", 0x006D: "Intrinsic006D()", 0x006E: "Intrinsic006E()", 0x006F: "Item::isInNpc(void)", 0x0070: "Intrinsic0070()", 0x0071: "Intrinsic0071()", 0x0072: "Intrinsic0072()", 0x0073: "Intrinsic0073()", 0x0074: "Npc::isDead(void)", 0x0075: "Item::getNpcNum(void)", 0x0076: "IntrinsicReturn0", 0x0077: "Intrinsic0077()", 0x0078: "Item::callEvent0B(sint16)", 0x0079: "Item::andStatus(void)", 0x007A: "Item::move(uint16,uint16,uint8)", 0x007B: "Intrinsic007B()", 0x007C: "Intrinsic007C()", 0x007D: "Intrinsic007D()", 0x007E: "Intrinsic007E()", 0x007F: "Intrinsic007F()", 0x0080: "Intrinsic0080()", 0x0081: "Intrinsic0081()", 0x0082: "Intrinsic0082()", 0x0083: "Intrinsic0083()", 0x0084: "Intrinsic0084()", 0x0085: "Intrinsic0085()", 0x0086: "teleportToEgg(sint16,int,uint8)", 0x0087: "Intrinsic0087()", 0x0088: "Intrinsic0088()", 0x0089: "Intrinsic00BD()", 0x008A: "Item::getQuality(void)", 0x008B: "Item::setQuality(sint16)", 0x008C: "Intrinsic008C()", 0x008D: "Intrinsic008D()", 0x008E: "Intrinsic008E()", 0x008F: "Camera::getX(void)", 0x0090: "Camera::getY(void)", 0x0091: "Item::setMapNum(sint16)", 0x0092: "Item::getNpcNum(void)", 0x0093: "Item::shoot(WorldPoint&,sint16,sint16)", 0x0094: "Intrinsic0094()", 0x0095: "Item::enterFastArea(void)", 0x0096: "Intrinsic00CA()", 0x0097: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0098: "Item::getNpcNum(void)", 0x0099: "Intrinsic0099()", 0x009A: "teleportToEgg(sint16,uint8)", 0x009B: "Intrinsic009B()", 0x009C: "Intrinsic009C()", 0x009D: "Intrinsic009D()", 0x009E: "Intrinsic009E()", 0x009F: "Intrinsic009F()", 0x00A0: "Item::andStatus(void)", 0x00A1: "Item::getUnkEggType(void)", 0x00A2: "Egg::setEggXRange(uint16)", 0x00A3: "Item::setFrame(uint16)", 0x00A4: "Item::overlaps(uint16)", 0x00A5: "Item::isOn(uint16)", 0x00A6: "Item::getQHi(void)", 0x00A7: "Intrinsic00DA()", 0x00A8: "Item::getCY(void)", 0x00A9: "Intrinsic00A9()", 0x00AA: "Item::isOn(uint16)", 0x00AB: "Npc::isDead(void)", 0x00AC: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00AD: "Intrinsic00AD()", 0x00AE: "Item::getQHi(void)", 0x00AF: "Item::andStatus(void)", 0x00B0: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00B1: "Item::andStatus(void)", 0x00B2: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00B3: "Item::andStatus(void)", 0x00B4: "Item::getDirToCoords(uint16,uint16)", 0x00B5: "Intrinsic00B5()", 0x00B6: "Intrinsic00B6()", 0x00B7: "Item::getNpcNum(void)", 0x00B8: "Item::getCY(void)", 0x00B9: "Item::isOn(uint16)", 0x00BA: "Item::getFootpad(sint16&,sint16&,sint16&)", 0x00BB: "Npc::isDead(void)", 0x00BC: "Intrinsic00BC()", 0x00BD: "Intrinsic00BD()", 0x00BE: "Intrinsic00BE()", 0x00BF: "Item::andStatus(void)", 0x00C0: "Intrinsic00C0()", 0x00C1: "Intrinsic00C1()", 0x00C2: "IntrinsicReturn0", 0x00C3: "Intrinsic00C3()", 0x00C4: "Item::getQHi(void)", 0x00C5: "Item::setQuality(sint16)", 0x00C6: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00C7: "Intrinsic00C7()", 0x00C8: "Intrinsic00C8()", 0x00C9: "Item::callEvent0A(sint16)", 0x00CA: "Intrinsic00CA()", 0x00CB: "Item::isOn(uint16)", 0x00CC: "Intrinsic00CC()", 0x00CD: "Intrinsic00CD()", 0x00CE: "Item::getQHi(void)", 0x00CF: "Item::isOn(uint16)", 0x00D0: "Intrinsic00D0()", 0x00D1: "Intrinsic00D1()", 0x00D2: "Intrinsic00D2()", 0x00D3: "Intrinsic00FA()", 0x00D4: "Camera::getY(void)", 0x00D5: "Intrinsic00D5()", 0x00D6: "Intrinsic00D6()", 0x00D7: "Intrinsic00D7()", 0x00D8: "Intrinsic00D8()", 0x00D9: "Intrinsic00D9()", 0x00DA: "Intrinsic00DA()", 0x00DB: "Intrinsic00DB()", 0x00DC: "Item::getQLo(void)", 0x00DD: "Item::getQHi(void)", 0x00DE: "Item::getNpcNum(void)", 0x00DF: "Intrinsic00DF()", 0x00E0: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00E1: "Intrinsic00FA()", 0x00E2: "Item::getQLo(void)", 0x00E3: "Item::getCY(void)", 0x00E4: "Item::getNpcNum(void)", 0x00E5: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00E6: "Item::getNpcNum(void)", 0x00E7: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00E8: "Item::getNpcNum(void)", 0x00E9: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00EA: "Item::getNpcNum(void)", 0x00EB: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00EC: "Item::getNpcNum(void)", 0x00ED: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00EE: "Item::getNpcNum(void)", 0x00EF: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00F0: "Item::getNpcNum(void)", 0x00F1: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00F2: "Item::getNpcNum(void)", 0x00F3: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00F4: "Item::getNpcNum(void)", 0x00F5: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00F6: "Item::getNpcNum(void)", 0x00F7: "Item::andStatus(void)", 0x00F8: "Intrinsic00FA()", 0x00F9: "Item::getQLo(void)", 0x00FA: "Intrinsic00FA()", 0x00FB: "Intrinsic00FB()", 0x00FC: "Intrinsic00FC()", 0x00FD: "Item::getQLo(void)", 0x00FE: "Intrinsic00FE()", 0x00FF: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0100: "Item::andStatus(void)", 0x0101: "Item::isOn(uint16)", 0x0102: "Npc::isDead(void)", 0x0103: "Intrinsic00BD()", 0x0104: "Item::getQHi(void)", 0x0105: "Intrinsic00DA()", 0x0106: "Intrinsic00FA()", 0x0107: "Item::getQLo(void)", 0x0108: "Item::isOn(uint16)", 0x0109: "Item::getQHi(void)", 0x010A: "Item::isOn(uint16)", 0x010B: "Item::getQHi(void)", 0x010C: "Item::hurl(sint16,sint16,sint16,sint16)", 0x010D: "Item::getNpcNum(void)", 0x010E: "Item::getCY(void)", 0x010F: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0110: "Item::isOn(uint16)", 0x0111: "Intrinsic0111()", 0x0112: "IntrinsicReturn0", 0x0113: "Npc::isDead(void)", 0x0114: "Intrinsic0088()", 0x0115: "Intrinsic00C1()", 0x0116: "Item::getQHi(void)", 0x0117: "Intrinsic00BD()", 0x0118: "Item::andStatus(void)", 0x0119: "Item::getNpcNum(void)", 0x011A: "Item::andStatus(void)", 0x011B: "Item::getNpcNum(void)", 0x011C: "Intrinsic011C()", 0x011D: "Item::andStatus(void)", 0x011E: "Item::getNpcNum(void)", 0x011F: "Item::AvatarStoleSomehting(uint16)", 0x0120: "Item::andStatus(void)", 0x0121: "Item::getNpcNum(void)", 0x0122: "Item::getQ(void)", 0x0123: "Item::setQ(uint)", 0x0124: "Item::andStatus(void)", 0x0125: "Item::getNpcNum(void)", 0x0126: "Item::andStatus(void)", 0x0127: "Item::getNpcNum(void)", 0x0128: "Item::andStatus(void)", 0x0129: "Item::getNpcNum(void)", 0x012A: "Item::andStatus(void)", 0x012B: "Item::getNpcNum(void)", 0x012C: "Item::andStatus(void)", 0x012D: "Item::getNpcNum(void)", 0x012E: "Intrinsic00C3()", 0x012F: "Item::andStatus(void)", 0x0130: "Item::getNpcNum(void)", 0x0131: "Intrinsic0131()", 0x0132: "Item::andStatus(void)", 0x0133: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0134: "Item::andStatus(void)", 0x0135: "Camera::getY(void)", 0x0136: "Camera::getZ(void)", 0x0137: "Intrinsic0137()", 0x0138: "Intrinsic009C()", 0x0139: "Item::getTypeFlagCrusader(sint16)", 0x013A: "Item::getNpcNum(void)", 0x013B: "Item::hurl(sint16,sint16,sint16,sint16)", 0x013C: "Item::getCY(void)", 0x013D: "Item::getCZ(void)", 0x013E: "Item::setFrame(uint16)", 0x013F: "Intrinsic013F()", 0x0140: "Intrinsic0140()", 0x0141: "Intrinsic0141()", 0x0142: "Intrinsic0142()", 0x0143: "Npc::isDead(void)", 0x0144: "Intrinsic00FA()", 0x0145: "Intrinsic0145()", 0x0146: "Intrinsic0146()", 0x0147: "Intrinsic0147()", 0x0148: "Item::getNpcNum(void)", 0x0149: "Item::getQLo(void)", 0x014A: "Item::andStatus(void)", 0x014B: "Intrinsic014B()", 0x014C: "Intrinsic014C()", 0x014D: "Intrinsic014D()", 0x014E: "Intrinsic003C()", 0x014F: "Egg::getEggXRange(void)", 0x0150: "Intrinsic009C()", 0x0151: "Intrinsic0072()", 0x0152: "Item::setFrame(uint16)", 0x0153: "Intrinsic00C1()", 0x0154: "Intrinsic00C3()", 0x0155: "Intrinsic00C1()", 0x0156: "Item::isOn(uint16)", 0x0157: "Intrinsic00C3()", 0x0158: "Intrinsic00FA()", 0x0159: "Item::getQHi(void)", 0x015A: "Item::getQLo(void)", 0x015B: "Intrinsic00C1()", 0x015C: "Intrinsic00C3()", 0x015D: "Intrinsic015D()", } VARIANT_INTRINSIC_CALLSITE_HINTS: dict[str, dict[tuple[int, int], str]] = { "regret": { (0x001E, 0x10): "Item::I_fireWeapon(Item *, x, y, z, byte, int, byte)", }, "remorse": {}, } CLASS_EVENT_NAME_HINTS: dict[tuple[int, int], str] = { (0x0A0C, 0x32): "waitNTimerTicks", } LOOP_SELECTOR_FIELD_HINTS = { 0x3A: "family", 0x40: "shape", } SHAPE_CATALOG_FILENAME = "usecode_shape_catalog.csv" SHAPE_CATALOG_FILENAMES = { "remorse": "usecode_shape_catalog_remorse.csv", "regret": "usecode_shape_catalog_regret.csv", } SHAPE_CATALOG_FIELDNAMES = ["shape_code", "human_readable_id", "description"] NUMERIC_SHAPE_LITERAL_PATTERN = r"(?:0x[0-9A-Fa-f]+|\d+)" SHAPE_REFERENCE_PATTERNS = ( re.compile(rf"(?P\bshape=)(?P{NUMERIC_SHAPE_LITERAL_PATTERN})\b"), re.compile( rf"(?P\bItem\.(?:getShape|getType)\([^\)\n]*\)\s*(?:==|!=|<=|>=|<|>)\s*)(?P{NUMERIC_SHAPE_LITERAL_PATTERN})\b" ), re.compile(rf"(?P\bItem\.create\(\s*[^,\n]+,\s*)(?P{NUMERIC_SHAPE_LITERAL_PATTERN})\b"), re.compile(rf"(?P\bItem\.legal_create\(\s*)(?P{NUMERIC_SHAPE_LITERAL_PATTERN})\b"), ) ShapeCatalog = dict[int, dict[str, str]] def infer_shape_catalog_variant(extracted_root: Path | str | None = None, game_variant: str | None = None) -> str | None: normalized = normalize_game_variant(game_variant) if normalized is not None: return normalized root = resolve_extracted_root(extracted_root) inferred = infer_game_variant_from_path(root) if inferred is not None: return inferred try: relative_root = root.resolve().relative_to(REPO_ROOT.resolve()) except ValueError: relative_root = None if relative_root is not None: relative_parts = tuple(part.lower() for part in relative_root.parts) if relative_parts[:2] == ("usecode", "eusecode_extracted"): return "remorse" if relative_parts[:3] == ("usecode", "regret", "regret_usecode_extracted"): return "regret" return None def default_shape_catalog_path( extracted_root: Path | str | None = None, game_variant: str | None = None, ) -> Path: root = resolve_extracted_root(extracted_root) variant = infer_shape_catalog_variant(root, game_variant) filename = SHAPE_CATALOG_FILENAMES.get(variant, SHAPE_CATALOG_FILENAME) return root / filename def format_shape_code(shape_code: int) -> str: return f"0x{shape_code:04X}" def load_shape_catalog(path: Path | str | None) -> ShapeCatalog: if path is None: return {} shape_path = Path(path) if not shape_path.exists(): return {} catalog: ShapeCatalog = {} with shape_path.open("r", encoding="utf-8", newline="") as handle: reader = csv.DictReader(handle) for row in reader: shape_code = try_parse_int((row.get("shape_code") or "").strip()) if shape_code is None: continue catalog[shape_code] = { "shape_code": format_shape_code(shape_code), "human_readable_id": (row.get("human_readable_id") or "").strip(), "description": row.get("description") or "", } return catalog def shape_catalog_identifier(shape_code: int, shape_catalog: ShapeCatalog | None = None) -> str | None: if not shape_catalog: return None row = shape_catalog.get(shape_code) if row is None: return None human_readable_id = (row.get("human_readable_id") or "").strip() if not human_readable_id: return None return sanitize_identifier(human_readable_id) def format_shape_reference( shape_code: int, shape_catalog: ShapeCatalog | None = None, frame_expr: str | None = None, ) -> str: base = shape_catalog_identifier(shape_code, shape_catalog) or format_shape_code(shape_code) if frame_expr is None or not frame_expr.strip(): return base return f"{base}[{frame_expr}]" def iter_shape_code_matches(text: str): for pattern in SHAPE_REFERENCE_PATTERNS: for match in pattern.finditer(text): shape_code = try_parse_int(match.group("value")) if shape_code is not None: yield shape_code def collect_shape_codes_from_pseudocode(text: str) -> set[int]: return set(iter_shape_code_matches(text)) def apply_shape_catalog_to_pseudocode(text: str, shape_catalog: ShapeCatalog | None = None) -> str: if not shape_catalog: return text def replace_match(match: re.Match[str]) -> str: shape_code = try_parse_int(match.group("value")) if shape_code is None: return match.group(0) shape_id = shape_catalog_identifier(shape_code, shape_catalog) if shape_id is None: return match.group(0) return f"{match.group('prefix')}{shape_id}" rendered = text for pattern in SHAPE_REFERENCE_PATTERNS: rendered = pattern.sub(replace_match, rendered) return rendered def generic_loop_selector_call(name: str, arguments: list[tuple[str, str]]) -> str: rendered_args = ", ".join(f"{label}={expr}" for label, expr in arguments) return f"{name}({rendered_args})" def normalize_game_variant(value: str | None) -> str | None: if value is None: return None normalized = value.strip().lower() if not normalized or normalized == "auto": return None if normalized not in INTRINSIC_HINT_PATHS: raise ValueError(f"Unsupported Crusader variant: {value}") return normalized def infer_game_variant_from_path(path: Path | None) -> str | None: if path is None: return None lowered_parts = [part.lower() for part in path.parts] if any("regret" in part for part in lowered_parts): return "regret" if any("remorse" in part for part in lowered_parts): return "remorse" return None def resolve_game_variant(game_variant: str | None = None, source_root: Path | None = None) -> str: normalized = normalize_game_variant(game_variant) if normalized is not None: return normalized inferred = infer_game_variant_from_path(source_root) if inferred is not None: return inferred return DEFAULT_GAME_VARIANT def load_intrinsic_hints_from_file(path: Path) -> dict[int, str]: if not path.exists(): return {} try: module = ast.parse(path.read_text(encoding="utf-8"), filename=str(path)) except (OSError, SyntaxError): return {} for node in module.body: if not isinstance(node, ast.Assign): continue if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): continue if node.targets[0].id != "intrinsics": continue try: values = ast.literal_eval(node.value) except (SyntaxError, ValueError): return {} if not isinstance(values, list): return {} return { index: str(value) for index, value in enumerate(values) if isinstance(value, str) and value.strip() } return {} def normalize_intrinsic_hint(name: str) -> str: normalized = name.strip() normalized = re.sub(r"^(?:unsigned|signed|void|byte|char|short|long|int\d+|uint\d+|sint\d+)\s+(?=[A-Za-z_])", "", normalized) normalized = re.sub(r"(? dict[int, str]: variant = resolve_game_variant(game_variant, source_root) hints = {index: normalize_intrinsic_hint(name) for index, name in BASE_INTRINSIC_HINTS.items()} for index, name in load_intrinsic_hints_from_file(INTRINSIC_HINT_PATHS[variant]).items(): normalized = normalize_intrinsic_hint(name) existing = hints.get(index) if existing is None or not normalized.startswith("Intrinsic") or existing.startswith("Intrinsic"): hints[index] = normalized return hints _INTRINSIC_HINTS_CACHE: dict[str, dict[int, str]] = {} def get_intrinsic_hints(game_variant: str | None = None, source_root: Path | None = None) -> dict[int, str]: variant = resolve_game_variant(game_variant, source_root) cached = _INTRINSIC_HINTS_CACHE.get(variant) if cached is None: cached = build_intrinsic_hints(variant) _INTRINSIC_HINTS_CACHE[variant] = cached return cached def get_intrinsic_callsite_hints(game_variant: str | None = None, source_root: Path | None = None) -> dict[tuple[int, int], str]: variant = resolve_game_variant(game_variant, source_root) return VARIANT_INTRINSIC_CALLSITE_HINTS.get(variant, {}) INTRINSIC_HINTS = get_intrinsic_hints(DEFAULT_GAME_VARIANT) NO_ARG_MNEMONICS = { 0x08: "pop_result", 0x12: "pop_temp", 0x13: "pop_temp_dword", 0x14: "add", 0x15: "add_dword", 0x16: "concat", 0x17: "append_list", 0x1C: "sub", 0x1D: "sub_dword", 0x1E: "mul", 0x1F: "mul_dword", 0x20: "div", 0x21: "div_dword", 0x22: "mod", 0x23: "mod_dword", 0x24: "cmp", 0x25: "cmp_dword", 0x26: "strcmp", 0x27: "cmp_huge", 0x28: "lt", 0x29: "lt_dword", 0x2A: "le", 0x2B: "le_dword", 0x2C: "gt", 0x2D: "gt_dword", 0x2E: "ge", 0x2F: "ge_dword", 0x30: "not", 0x31: "not_dword", 0x32: "and", 0x33: "and_dword", 0x34: "or", 0x35: "or_dword", 0x36: "ne", 0x37: "ne_dword", 0x39: "bit_and", 0x3A: "bit_or", 0x3B: "bit_not", 0x3C: "lsh", 0x3D: "rsh", 0x50: "ret", 0x53: "suspend", 0x59: "push_pid", 0x5D: "push_retval_byte", 0x5E: "push_retval_word", 0x5F: "push_retval_dword", 0x60: "word_to_dword", 0x61: "dword_to_word", 0x68: "copy_string", 0x6A: "ptr_to_string", 0x6B: "str_to_ptr", 0x6D: "push_process_result", 0x73: "loopnext", 0x77: "set_info", 0x78: "process_exclude", 0x7A: "end", } def parse_int(value: str) -> int: return int(value, 0) def try_parse_int(value: str) -> int | None: try: return parse_int(value) except (TypeError, ValueError): return None def signed_byte(value: int) -> int: return value - 0x100 if value & 0x80 else value def bp_repr(value: int) -> str: disp = signed_byte(value) sign = "+" if disp >= 0 else "-" return f"[BP{sign}{abs(disp):02X}h]" def sp_repr(value: int) -> str: disp = signed_byte(value) sign = "+" if disp >= 0 else "-" return f"[SP{sign}{abs(disp):02X}h]" @dataclass class ParseResult: op: dict[str, Any] | None next_offset: int end_reason: str | None = None unknown_tail: bytes | None = None @dataclass class DebugSymbolRecord: index: int unknown1: int type_id: int type_char: str bp_offset: int bp_repr: str unknown3: int name: str @dataclass class DebugSymbolParseResult: debug_symbols: list[DebugSymbolRecord] end_offset: int has_end_opcode: bool trailing_bytes: bytes @dataclass class FieldTagRecord: tag_id: int bp_offset: int value_kind: int name: str @dataclass class FieldTagParseResult: field_tags: list[FieldTagRecord] end_offset: int trailing_bytes: bytes class BodyReader: def __init__(self, data: bytes, offset: int = 0) -> None: self.data = data self.offset = offset def read_u8(self) -> int: value = self.data[self.offset] self.offset += 1 return value def read_u16(self) -> int: value = int.from_bytes(self.data[self.offset:self.offset + 2], "little") self.offset += 2 return value def read_u32(self) -> int: value = int.from_bytes(self.data[self.offset:self.offset + 4], "little") self.offset += 4 return value def read_cstring(self) -> str: chars: list[str] = [] while self.offset < len(self.data): byte = self.read_u8() if byte == 0: break chars.append(chr(byte)) return "".join(chars) def read_fixed_string(self, length: int) -> str: raw = self.data[self.offset:self.offset + length] self.offset += length return raw.decode("latin-1", errors="replace").rstrip("\x00") def op_record(start: int, absolute_start: int, opcode: int, raw_bytes: bytes, mnemonic: str, operands: dict[str, Any]) -> dict[str, Any]: return { "offset": start, "absolute_body_offset": absolute_start, "opcode": opcode, "mnemonic": mnemonic, "raw_bytes": raw_bytes.hex(), "operands": operands, } def parse_one_op( body: bytes, start: int, intrinsic_hints: dict[int, str] | None = None, intrinsic_callsite_hints: dict[tuple[int, int], str] | None = None, ) -> ParseResult: reader = BodyReader(body, start) opcode = reader.read_u8() operands: dict[str, Any] = {} mnemonic = NO_ARG_MNEMONICS.get(opcode) active_intrinsic_hints = intrinsic_hints or INTRINSIC_HINTS active_callsite_hints = intrinsic_callsite_hints or get_intrinsic_callsite_hints(DEFAULT_GAME_VARIANT) if opcode == 0x00: operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])} mnemonic = "pop_local_byte" elif opcode == 0x01: operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])} mnemonic = "pop_local_word" elif opcode == 0x02: operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])} mnemonic = "pop_local_dword" elif opcode == 0x03: bp_offset = reader.read_u8() size = reader.read_u8() operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "size": size} mnemonic = "pop_local_blob" elif opcode == 0x04: operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])} mnemonic = "pop_member_byte" elif opcode == 0x05: operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])} mnemonic = "pop_member_word" elif opcode == 0x06: operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])} mnemonic = "pop_member_dword" elif opcode == 0x07: bp_offset = reader.read_u8() size = reader.read_u8() operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "size": size} mnemonic = "pop_member_blob" elif opcode == 0x09: bp_offset = reader.read_u8() element_size = reader.read_u8() slist_flag = reader.read_u8() operands = { "bp_offset": bp_offset, "target": bp_repr(bp_offset), "element_size": element_size, "slist_flag": slist_flag, } mnemonic = "pop_list_element" elif opcode == 0x0A: value = reader.read_u8() operands = {"value_u8": value, "value_signed": signed_byte(value)} mnemonic = "push_byte_immediate" elif opcode == 0x0B: operands = {"value_u16": reader.read_u16()} mnemonic = "push_word_immediate" elif opcode == 0x0C: operands = {"value_u32": reader.read_u32()} mnemonic = "push_dword_immediate" elif opcode == 0x0D: declared_length = reader.read_u16() text = reader.read_cstring() operands = {"declared_length": declared_length, "string": text} mnemonic = "push_string_immediate" elif opcode == 0x0E: element_size = reader.read_u8() count = reader.read_u8() operands = {"element_size": element_size, "count": count} mnemonic = "create_list" elif opcode == 0x0F: arg_bytes = reader.read_u8() intrinsic_ordinal = reader.read_u16() operands = { "intrinsic_ordinal": intrinsic_ordinal, "arg_bytes": arg_bytes, "intrinsic_name_hint": active_callsite_hints.get((intrinsic_ordinal, arg_bytes), active_intrinsic_hints.get(intrinsic_ordinal)), } mnemonic = "call_intrinsic" elif opcode == 0x10: operands = {"target_offset": reader.read_u16()} mnemonic = "call_near" elif opcode == 0x11: target_class_id = reader.read_u16() target_slot = reader.read_u16() operands = { "target_class_id": target_class_id, "target_event_slot": target_slot, "target_event_name_hint": EVENT_NAME_HINTS.get(target_slot), } mnemonic = "call_class_event" elif opcode in {0x18, 0x19, 0x1A, 0x1B}: element_size = reader.read_u8() operands = {"element_size": element_size} mnemonic = { 0x18: "append_unique_inline", 0x19: "append_unique_indirect", 0x1A: "remove_matching_indirect", 0x1B: "remove_matching_inline", }[opcode] elif opcode == 0x38: element_size = reader.read_u8() slist_flag = reader.read_u8() operands = {"element_size": element_size, "slist_flag": slist_flag} mnemonic = "in_list" elif opcode in {0x3E, 0x3F, 0x40, 0x41, 0x43, 0x46, 0x47, 0x48, 0x49, 0x4B, 0x62, 0x63, 0x64, 0x69}: bp_offset = reader.read_u8() operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset)} mnemonic = { 0x3E: "push_local_byte", 0x3F: "push_local_word", 0x40: "push_local_dword", 0x41: "push_local_string", 0x43: "push_local_slist", 0x46: "push_member_byte", 0x47: "push_member_word", 0x48: "push_member_dword", 0x49: "push_member_huge", 0x4B: "push_local_addr", 0x62: "free_local_string", 0x63: "free_local_slist", 0x64: "free_local_list", 0x69: "push_string_ptr", }[opcode] elif opcode == 0x42: bp_offset = reader.read_u8() element_size = reader.read_u8() operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "element_size": element_size} mnemonic = "push_local_list" elif opcode == 0x44: element_size = reader.read_u8() slist_flag = reader.read_u8() operands = {"element_size": element_size, "slist_flag": slist_flag} mnemonic = "push_list_element" elif opcode == 0x45: byte0 = reader.read_u8() byte1 = reader.read_u8() operands = {"value_a": byte0, "value_b": byte1} mnemonic = "push_huge" elif opcode in {0x4C, 0x4D}: size = reader.read_u8() operands = {"size": size} mnemonic = {0x4C: "push_indirect", 0x4D: "pop_indirect"}[opcode] elif opcode in {0x4E, 0x4F}: global_id = reader.read_u16() size = reader.read_u8() operands = {"global_id": global_id, "size": size} mnemonic = {0x4E: "push_global", 0x4F: "pop_global"}[opcode] elif opcode in {0x51, 0x52}: relative = reader.read_u16() signed_relative = relative - 0x10000 if relative & 0x8000 else relative target = reader.offset + signed_relative operands = {"relative_u16": relative, "relative_signed": signed_relative, "target_offset": target} mnemonic = {0x51: "jne", 0x52: "jmp"}[opcode] elif opcode == 0x54: arg0 = reader.read_u8() arg1 = reader.read_u8() operands = {"arg0": arg0, "arg1": arg1} mnemonic = "implies" elif opcode == 0x57: arg_bytes = reader.read_u8() this_size = reader.read_u8() target_class_id = reader.read_u16() target_slot = reader.read_u16() operands = { "arg_bytes": arg_bytes, "this_size": this_size, "target_class_id": target_class_id, "target_event_slot": target_slot, "target_event_name_hint": EVENT_NAME_HINTS.get(target_slot), } mnemonic = "spawn" elif opcode == 0x58: target_class_id = reader.read_u16() target_slot = reader.read_u16() inline_offset = reader.read_u16() this_size = reader.read_u8() unknown = reader.read_u8() operands = { "target_class_id": target_class_id, "target_event_slot": target_slot, "target_event_name_hint": EVENT_NAME_HINTS.get(target_slot), "inline_offset": inline_offset, "this_size": this_size, "unknown": unknown, } mnemonic = "spawn_inline" elif opcode == 0x5A: operands = {"local_bytes": reader.read_u8()} mnemonic = "init" elif opcode == 0x5B: operands = {"line_number": reader.read_u16()} mnemonic = "line_number" elif opcode == 0x5C: relative = reader.read_u16() symbol_offset = reader.offset + (relative - 0x10000 if relative & 0x8000 else relative) symbol = reader.read_fixed_string(8) trailing_zero = reader.read_u8() operands = { "symbol_offset": symbol_offset, "symbol": symbol, "trailing_zero": trailing_zero, } mnemonic = "symbol_info" elif opcode in {0x65, 0x66, 0x67, 0x6E, 0x6F, 0x74}: value = reader.read_u8() operands = {"value_u8": value} if opcode in {0x65, 0x66, 0x67}: operands["target"] = sp_repr(value) mnemonic = { 0x65: "free_stack_string", 0x66: "free_stack_list", 0x67: "free_stack_slist", 0x6E: "add_sp", 0x6F: "push_stack_addr", 0x74: "loopscr", }[opcode] elif opcode == 0x6C: bp_offset = reader.read_u8() copy_type = reader.read_u8() operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "copy_type": copy_type} mnemonic = "param_pid_chg" elif opcode == 0x70: current_var = reader.read_u8() string_bytes = reader.read_u8() loop_type = reader.read_u8() operands = {"current_var": current_var, "string_bytes": string_bytes, "loop_type": loop_type} mnemonic = "loop" elif opcode in {0x75, 0x76}: bp_offset = reader.read_u8() element_size = reader.read_u8() branch = reader.read_u16() signed_branch = branch - 0x10000 if branch & 0x8000 else branch target = reader.offset + signed_branch operands = { "bp_offset": bp_offset, "target_var": bp_repr(bp_offset), "element_size": element_size, "relative_u16": branch, "relative_signed": signed_branch, "target_offset": target, } mnemonic = {0x75: "foreach_list", 0x76: "foreach_slist"}[opcode] elif opcode == 0x79: operands = {"global_id": reader.read_u16()} mnemonic = "global_address" elif mnemonic is None: return ParseResult(op=None, next_offset=start, end_reason="unknown_opcode", unknown_tail=body[start:]) raw = body[start:reader.offset] op = op_record(start, start, opcode, raw, mnemonic, operands) end_reason = "end_opcode" if opcode == 0x7A else None return ParseResult(op=op, next_offset=reader.offset, end_reason=end_reason) def load_tsv_rows(path: Path) -> list[dict[str, str]]: with path.open("r", encoding="utf-8", newline="") as handle: return list(csv.DictReader(handle, delimiter="\t")) @lru_cache(maxsize=None) def load_class_name_hints_by_id(extracted_root_key: str) -> dict[int, str]: _, class_layout_index, _, _ = extracted_root_paths(Path(extracted_root_key)) hints: dict[int, str] = {} for row in load_tsv_rows(class_layout_index): class_id = try_parse_int(row.get("class_id", "")) class_name = (row.get("class_name_hint") or "").strip() if class_id is None or not class_name: continue hints[class_id] = class_name return hints def class_name_hints_by_id(extracted_root: Path | str | None = None) -> dict[int, str]: root = resolve_extracted_root(extracted_root) return load_class_name_hints_by_id(str(root)) def find_chunk_file(entry_index: int, extracted_root: Path | str | None = None) -> Path: _, _, _, chunks_dir = extracted_root_paths(extracted_root) matches = sorted(chunks_dir.glob(f"chunk_{entry_index:03d}_*.bin")) if not matches: matches = sorted(chunks_dir.glob(f"chunk_{entry_index}_*.bin")) if not matches: raise FileNotFoundError(f"No chunk file found for entry_index={entry_index}") return matches[0] def select_rows(class_name: str, slot: int, extracted_root: Path | str | None = None) -> tuple[dict[str, str], dict[str, str]]: class_event_index, class_layout_index, _, _ = extracted_root_paths(extracted_root) event_rows = load_tsv_rows(class_event_index) layout_rows = load_tsv_rows(class_layout_index) event_row = next( ( row for row in event_rows if row["class_name_hint"].upper() == class_name.upper() and try_parse_int(row.get("slot", "")) == slot and try_parse_int(row.get("entry_index", "")) is not None ), None, ) if event_row is None: raise KeyError(f"No class_event_index row found for class={class_name} slot=0x{slot:02X}") if not event_row["derived_body_start"] or not event_row["derived_body_end"]: raise ValueError(f"Selected row has no derived body range for class={class_name} slot=0x{slot:02X}") entry_index = parse_int(event_row["entry_index"]) layout_row = next( (row for row in layout_rows if try_parse_int(row.get("entry_index", "")) == entry_index), None, ) if layout_row is None: raise KeyError(f"No class_layout_index row found for entry_index={entry_index}") return event_row, layout_row def load_runtime_ir_rows(extracted_root: Path | str | None = None) -> list[dict[str, str]]: _, _, runtime_vm_ir_index, _ = extracted_root_paths(extracted_root) return load_tsv_rows(runtime_vm_ir_index) def runtime_stage_hints(ops: list[dict[str, Any]], extracted_root: Path | str | None = None) -> list[dict[str, str]]: opcode_values = {op["opcode"] for op in ops} hints: list[dict[str, str]] = [] for row in load_runtime_ir_rows(extracted_root): opcode_or_lane = row.get("opcode_or_lane", "") if opcode_or_lane.lower().startswith("opcode 0x"): opcode_value = try_parse_int(opcode_or_lane.split()[1]) if opcode_value is None or opcode_value not in opcode_values: continue elif row.get("stage_address") not in {"000d:177c", "000d:1acb", "000d:2104", "000d:21ed", "000d:22bc"}: continue hints.append(row) return hints def annotation_hints(event_row: dict[str, str], payload_shape_hint: str, ops: list[dict[str, Any]], extracted_root: Path | str | None = None) -> dict[str, Any]: slot = parse_int(event_row["slot"]) return { "runtime_family": "slot-backed-owner-loaded-body", "payload_shape_hint": payload_shape_hint, "compiled_anchors": [ {"address": "000d:46ec", "role": "context_create_from_slot"}, {"address": "000d:0988", "role": "referent_chain_mutator"}, {"address": "000d:177c", "role": "push_frame_word_literal"}, {"address": "000d:1acb", "role": "compare_stream_dword_and_push_bool"}, {"address": "000d:208b", "role": "materialize_or_forward_value"}, {"address": "000d:21ed", "role": "prepend_inline_payload"}, {"address": "000d:22bc", "role": "matrix_pushback_stage"}, {"address": "000d:2104", "role": "finalize_to_outptr"}, {"address": "000d:ebe3", "role": "opcode_sequence_run"}, ], "runtime_stage_hints": runtime_stage_hints(ops, extracted_root), "slot_taxonomy": {"slot": slot, "event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot)}, } def infer_payload_shape(slot: int) -> str | None: if slot in {0x10, 0x12}: return "none" if slot in {0x0A, 0x0B, 0x11, 0x14}: return "word" if slot == 0x13: return "signed_word" return None def parse_debug_symbols(body: bytes, start: int) -> DebugSymbolParseResult | None: if start >= len(body): return None if body[start] == 0x7A: return DebugSymbolParseResult(debug_symbols=[], end_offset=start + 1, has_end_opcode=True, trailing_bytes=body[start + 1:]) reader = BodyReader(body, start) count = reader.read_u8() debug_symbols: list[DebugSymbolRecord] = [] try: for index in range(count): unknown1 = reader.read_u8() type_id = reader.read_u8() bp_offset = reader.read_u8() unknown3 = reader.read_u8() name = reader.read_cstring() type_char = chr(type_id) if 0x20 <= type_id <= 0x7E else "." debug_symbols.append( DebugSymbolRecord( index=index, unknown1=unknown1, type_id=type_id, type_char=type_char, bp_offset=bp_offset, bp_repr=bp_repr(bp_offset), unknown3=unknown3, name=name, ) ) except IndexError: return None has_end_opcode = reader.offset < len(body) and body[reader.offset] == 0x7A end_offset = reader.offset + (1 if has_end_opcode else 0) trailing_bytes = body[end_offset:] if not has_end_opcode: return None return DebugSymbolParseResult( debug_symbols=debug_symbols, end_offset=end_offset, has_end_opcode=has_end_opcode, trailing_bytes=trailing_bytes, ) def parse_field_tags(body: bytes, start: int) -> FieldTagParseResult | None: if start >= len(body): return None reader = BodyReader(body, start) field_tags: list[FieldTagRecord] = [] try: while reader.offset < len(body) and body[reader.offset] != 0x7A: tag_id = reader.read_u8() bp_offset = reader.read_u8() value_kind = reader.read_u8() name = reader.read_cstring() if not name: return None field_tags.append(FieldTagRecord(tag_id=tag_id, bp_offset=bp_offset, value_kind=value_kind, name=name)) if reader.offset < len(body) and body[reader.offset] == 0x00: reader.offset += 1 except IndexError: return None if not field_tags or reader.offset >= len(body) or body[reader.offset] != 0x7A: return None end_offset = reader.offset + 1 return FieldTagParseResult(field_tags=field_tags, end_offset=end_offset, trailing_bytes=body[end_offset:]) def parse_body_ir( event_row: dict[str, str], layout_row: dict[str, str], game_variant: str | None = None, extracted_root: Path | str | None = None, ) -> dict[str, Any]: resolved_extracted_root = resolve_extracted_root(extracted_root) entry_index = parse_int(event_row["entry_index"]) chunk_file = find_chunk_file(entry_index, resolved_extracted_root) chunk_bytes = chunk_file.read_bytes() resolved_game_variant = resolve_game_variant(game_variant, chunk_file) intrinsic_hints = get_intrinsic_hints(resolved_game_variant, chunk_file) intrinsic_callsite_hints = get_intrinsic_callsite_hints(resolved_game_variant, chunk_file) target_class_name_hints = class_name_hints_by_id(resolved_extracted_root) body_start = parse_int(event_row["derived_body_start"]) body_end = parse_int(event_row["derived_body_end"]) body = chunk_bytes[body_start:body_end] ops: list[dict[str, Any]] = [] offset = 0 end_reason = "body_exhausted" unknown_tail = b"" debug_symbols: list[dict[str, Any]] = [] debug_symbol_offset: int | None = None field_tags: list[dict[str, Any]] = [] while offset < len(body): result = parse_one_op(body, offset, intrinsic_hints, intrinsic_callsite_hints) if result.op is not None: operands = result.op["operands"] if "target_class_id" in operands: class_id = operands["target_class_id"] operands["target_class_name_hint"] = target_class_name_hints.get(class_id) result.op["absolute_body_offset"] = body_start + result.op["offset"] ops.append(result.op) if result.end_reason is not None: end_reason = result.end_reason unknown_tail = result.unknown_tail or b"" if result.end_reason == "end_opcode": unknown_tail = body[result.next_offset:] offset = result.next_offset break offset = result.next_offset if offset >= len(body) and end_reason == "body_exhausted": unknown_tail = b"" candidate_debug_offsets = sorted( { operands["symbol_offset"] for op in ops for operands in [op["operands"]] if op["mnemonic"] == "symbol_info" and isinstance(operands.get("symbol_offset"), int) and 0 <= operands["symbol_offset"] < len(body) } ) last_ret_index = next((index for index in range(len(ops) - 1, -1, -1) if ops[index]["mnemonic"] == "ret"), None) if end_reason == "unknown_opcode" and last_ret_index is not None: ret_end = ops[last_ret_index]["offset"] + (len(ops[last_ret_index]["raw_bytes"]) // 2) ret_debug_result = parse_debug_symbols(body, ret_end) if ret_debug_result is not None: ops = ops[:last_ret_index + 1] debug_symbol_offset = ret_end debug_symbols = [ { "index": symbol.index, "unknown1": symbol.unknown1, "type_id": symbol.type_id, "type_char": symbol.type_char, "bp_offset": symbol.bp_offset, "bp_repr": symbol.bp_repr, "unknown3": symbol.unknown3, "name": symbol.name, } for symbol in ret_debug_result.debug_symbols ] end_reason = "debug_symbols_then_end" unknown_tail = ret_debug_result.trailing_bytes offset = ret_debug_result.end_offset if end_reason == "unknown_opcode" and candidate_debug_offsets: for candidate_offset in reversed(candidate_debug_offsets): if candidate_offset != offset: continue debug_result = parse_debug_symbols(body, candidate_offset) if debug_result is None: continue debug_symbol_offset = candidate_offset debug_symbols = [ { "index": symbol.index, "unknown1": symbol.unknown1, "type_id": symbol.type_id, "type_char": symbol.type_char, "bp_offset": symbol.bp_offset, "bp_repr": symbol.bp_repr, "unknown3": symbol.unknown3, "name": symbol.name, } for symbol in debug_result.debug_symbols ] end_reason = "debug_symbols_then_end" unknown_tail = debug_result.trailing_bytes offset = debug_result.end_offset break if end_reason == "unknown_opcode" and last_ret_index is not None: ret_end = ops[last_ret_index]["offset"] + (len(ops[last_ret_index]["raw_bytes"]) // 2) field_tag_result = parse_field_tags(body, ret_end) if field_tag_result is not None: ops = ops[:last_ret_index + 1] field_tags = [ { "tag_id": tag.tag_id, "bp_offset": tag.bp_offset, "bp_repr": bp_repr(tag.bp_offset), "value_kind": tag.value_kind, "name": tag.name, "tag_label": f"{tag.tag_id:02X}:{tag.bp_offset:02X}{tag.value_kind:02X}->{tag.name}", } for tag in field_tag_result.field_tags ] end_reason = "field_tags_then_end" unknown_tail = field_tag_result.trailing_bytes offset = field_tag_result.end_offset slot = parse_int(event_row["slot"]) payload_shape = infer_payload_shape(slot) return { "schema_version": "crusader-usecode-ir-v1-poc", "source": { "game_variant": resolved_game_variant, "flex_path": infer_flex_path(resolved_extracted_root), "extracted_root": repo_relative_path(resolved_extracted_root), "chunk_file": repo_relative_path(chunk_file), }, "class": { "entry_index": entry_index, "object_index": parse_int(layout_row["object_index"]), "class_id": parse_int(layout_row["class_id"]), "class_name": layout_row["class_name_hint"], "raw_code_base_u32": parse_int(layout_row["raw_code_base_u32"]), "code_base_minus_one": parse_int(layout_row["code_base_minus_one"]), "conservative_event_count": parse_int(layout_row["conservative_event_count"]), }, "event": { "slot": slot, "event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot), "raw_event_entry_word": parse_int(event_row["raw_event_entry_word"]), "raw_code_offset": parse_int(event_row["raw_code_offset"]), "derived_body_start": body_start, "derived_body_end": body_end, "derived_body_length": parse_int(event_row["derived_body_length"]), "repeated_template_status": event_row["repeated_template_status"], }, "body": { "end_reason": end_reason, "raw_body_sha1": hashlib.sha1(body).hexdigest(), "unknown_trailing_bytes": unknown_tail.hex(), "decoded_op_count": len(ops), "debug_symbol_offset": debug_symbol_offset, "debug_symbol_count": len(debug_symbols), "field_tag_count": len(field_tags), }, "ops": ops, "debug_symbols": debug_symbols, "field_tags": field_tags, "annotation_hints": annotation_hints(event_row, payload_shape, ops, resolved_extracted_root), } # --------------------------------------------------------------------------- # Family diff helpers # --------------------------------------------------------------------------- def _common_prefix_len(a: bytes, b: bytes) -> int: limit = min(len(a), len(b)) for i in range(limit): if a[i] != b[i]: return i return limit def _common_suffix_len(a: bytes, b: bytes, prefix_len: int) -> int: la, lb = len(a), len(b) limit = min(la - prefix_len, lb - prefix_len) for i in range(1, limit + 1): if a[la - i] != b[lb - i]: return i - 1 return limit def compute_family_diff(class_name: str, slot: int, extracted_root: Path | str | None = None) -> dict[str, Any]: """ Find all event rows that share the same repeated_template_status family tag as the named class/slot row, then decode each body and compute pairwise diff statistics against the reference body. Returns a dict with: reference_entry – entry_index for the named class/slot family_tag – repeated_template_status value used for grouping sibling_count – number of additional rows in the same family members – list of per-member records (entry, class, body stats, diff vs ref) """ class_event_index, class_layout_index, _, _ = extracted_root_paths(extracted_root) event_rows = load_tsv_rows(class_event_index) layout_rows = load_tsv_rows(class_layout_index) layout_by_entry: dict[int, dict[str, str]] = {} for row in layout_rows: idx = try_parse_int(row.get("entry_index", "")) if idx is not None: layout_by_entry[idx] = row # Locate the reference row ref_row = next( ( row for row in event_rows if row["class_name_hint"].upper() == class_name.upper() and try_parse_int(row.get("slot", "")) == slot and try_parse_int(row.get("entry_index", "")) is not None ), None, ) if ref_row is None: raise KeyError(f"No class_event_index row for class={class_name} slot=0x{slot:02X}") family_tag = ref_row.get("repeated_template_status", "").strip() ref_entry = parse_int(ref_row["entry_index"]) # Collect family members – same family_tag if non-empty, else same slot if family_tag and family_tag not in {"", "unique"}: family_rows = [ row for row in event_rows if row.get("repeated_template_status", "").strip() == family_tag and try_parse_int(row.get("entry_index", "")) is not None ] else: # Fall back: same slot across all classes family_rows = [ row for row in event_rows if try_parse_int(row.get("slot", "")) == slot and try_parse_int(row.get("entry_index", "")) is not None ] # Load reference body bytes def _load_body(row: dict[str, str]) -> bytes | None: body_start_str = row.get("derived_body_start", "") body_end_str = row.get("derived_body_end", "") if not body_start_str or not body_end_str: return None try: chunk = find_chunk_file(parse_int(row["entry_index"]), extracted_root) data = chunk.read_bytes() return data[parse_int(body_start_str):parse_int(body_end_str)] except (FileNotFoundError, ValueError): return None ref_body = _load_body(ref_row) if ref_body is None: raise ValueError(f"Cannot load reference body for class={class_name} slot=0x{slot:02X}") members: list[dict[str, Any]] = [] for row in family_rows: entry_idx = parse_int(row["entry_index"]) body = _load_body(row) is_ref = entry_idx == ref_entry member: dict[str, Any] = { "entry_index": entry_idx, "class_name": row["class_name_hint"], "slot": try_parse_int(row.get("slot", "")), "body_length": len(body) if body is not None else None, "is_reference": is_ref, } if body is not None and not is_ref: prefix = _common_prefix_len(ref_body, body) suffix = _common_suffix_len(ref_body, body, prefix) ref_diff_window = ref_body[prefix:len(ref_body) - suffix] if suffix else ref_body[prefix:] member_diff_window = body[prefix:len(body) - suffix] if suffix else body[prefix:] member["diff_vs_reference"] = { "common_prefix_bytes": prefix, "common_suffix_bytes": suffix, "ref_diff_window_hex": ref_diff_window.hex(), "member_diff_window_hex": member_diff_window.hex(), "diff_window_length_ref": len(ref_diff_window), "diff_window_length_member": len(member_diff_window), "identical": ref_body == body, } elif is_ref: member["diff_vs_reference"] = {"identical": True, "note": "reference"} members.append(member) members.sort(key=lambda m: (0 if m["is_reference"] else 1, m["entry_index"])) return { "reference_entry": ref_entry, "reference_class": class_name, "slot": slot, "family_tag": family_tag or f"slot_0x{slot:02X}_all", "member_count": len(members), "sibling_count": len(members) - 1, "members": members, } def render_family_diff_text(diff: dict[str, Any]) -> str: lines = [ f"Family diff: {diff['family_tag']}", f"Reference entry={diff['reference_entry']} class={diff['reference_class']} slot=0x{diff['slot']:02X}", f"Members: {diff['member_count']} Siblings: {diff['sibling_count']}", "", ] for m in diff["members"]: tag = " [REF]" if m["is_reference"] else "" body_len = m["body_length"] if m["body_length"] is not None else "?" lines.append(f" entry={m['entry_index']} class={m['class_name']} slot=0x{m['slot']:02X} body_len={body_len}{tag}") d = m.get("diff_vs_reference") if d and not m["is_reference"]: if d["identical"]: lines.append(" identical to reference") else: lines.append(f" prefix={d['common_prefix_bytes']} suffix={d['common_suffix_bytes']}") lines.append(f" ref_diff_window ({d['diff_window_length_ref']}B): {d['ref_diff_window_hex']}") lines.append(f" mem_diff_window ({d['diff_window_length_member']}B): {d['member_diff_window_hex']}") return "\n".join(lines) + "\n" def build_listing_labels(ir: dict[str, Any]) -> dict[int, str]: return { op["operands"]["target_offset"] + ir["event"]["derived_body_start"]: f"L_{op['operands']['target_offset'] + ir['event']['derived_body_start']:04X}" for op in ir["ops"] if "target_offset" in op["operands"] and isinstance(op["operands"]["target_offset"], int) } def build_script_blocks(ir: dict[str, Any]) -> tuple[dict[int, str], list[tuple[str, list[dict[str, Any]]]]]: ops = ir["ops"] if not ops: return {}, [] branch_mnemonics = {"jne", "jmp", "foreach_list", "foreach_slist"} leaders = {ops[0]["absolute_body_offset"]} for index, op in enumerate(ops): target_offset = op["operands"].get("target_offset") if isinstance(target_offset, int): leaders.add(ir["event"]["derived_body_start"] + target_offset) if op["mnemonic"] in branch_mnemonics and index + 1 < len(ops): leaders.add(ops[index + 1]["absolute_body_offset"]) ordered_leaders = sorted(leaders) label_map = {ordered_leaders[0]: "entry"} for absolute_offset in ordered_leaders[1:]: label_map[absolute_offset] = f"block_{absolute_offset:04X}" blocks: list[tuple[str, list[dict[str, Any]]]] = [] current_label = label_map[ops[0]["absolute_body_offset"]] current_ops: list[dict[str, Any]] = [] for op in ops: absolute_offset = op["absolute_body_offset"] if absolute_offset in label_map and current_ops and label_map[absolute_offset] != current_label: blocks.append((current_label, current_ops)) current_label = label_map[absolute_offset] current_ops = [] current_ops.append(op) if current_ops: blocks.append((current_label, current_ops)) return label_map, blocks def format_script_string(value: str) -> str: escaped = value.replace("\\", "\\\\").replace('"', '\\"') return f'"{escaped}"' def format_generic_operands(operands: dict[str, Any], label_map: dict[int, str], body_start: int) -> str: parts: list[str] = [] for key, value in operands.items(): if value is None or value == "": continue if key == "target_offset" and isinstance(value, int): parts.append(f"target={label_map.get(body_start + value, f'0x{body_start + value:04X}')}") continue if key == "target_event_name_hint": parts.append(f"event={value}") continue if key == "intrinsic_name_hint": parts.append(f"hint={value}") continue if isinstance(value, int): if key.endswith("_signed"): parts.append(f"{key}={value}") else: parts.append(f"{key}=0x{value:X}") continue parts.append(f"{key}={value}") return " ".join(parts) def format_script_statement(op: dict[str, Any], label_map: dict[int, str], body_start: int) -> str: mnemonic = op["mnemonic"] operands = op["operands"] if mnemonic == "init": return f"init locals 0x{operands['local_bytes']:02X}" if mnemonic == "push_byte_immediate": return f"push 0x{operands['value_u8']:02X} ; signed {operands['value_signed']}" if mnemonic == "push_word_immediate": return f"push 0x{operands['value_u16']:04X}" if mnemonic == "push_dword_immediate": return f"push 0x{operands['value_u32']:08X}" if mnemonic == "push_string_immediate": return f"push {format_script_string(operands['string'])}" if mnemonic.startswith("push_local_") or mnemonic.startswith("push_member_"): return f"push {operands['target']}" if mnemonic == "push_local_list": return f"push {operands['target']}" if mnemonic == "push_list_element": return f"push list_element size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}" if mnemonic == "push_huge": return f"push huge 0x{operands['value_a']:02X}:0x{operands['value_b']:02X}" if mnemonic == "push_global": return f"push global[0x{operands['global_id']:04X}] size=0x{operands['size']:X}" if mnemonic == "push_local_addr": return f"push &{operands['target']}" if mnemonic == "push_string_ptr": return f"push string_ptr {operands['target']}" if mnemonic.startswith("pop_local_") or mnemonic.startswith("pop_member_"): return f"pop -> {operands['target']}" if mnemonic == "pop_local_blob" or mnemonic == "pop_member_blob": return f"pop_blob -> {operands['target']} size=0x{operands['size']:X}" if mnemonic == "pop_list_element": return f"pop_list_element -> {operands['target']} elem_size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}" if mnemonic == "pop_global": return f"pop -> global[0x{operands['global_id']:04X}] size=0x{operands['size']:X}" if mnemonic == "call_intrinsic": hint = operands.get("intrinsic_name_hint") or "unknown_intrinsic" return f"call intrinsic 0x{operands['intrinsic_ordinal']:04X} {hint} args=0x{operands['arg_bytes']:02X}" if mnemonic == "call_near": target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}") return f"call {target}" if mnemonic == "call_class_event": return f"call {format_target_event_reference(operands)}" if mnemonic in {"append_unique_inline", "append_unique_indirect", "remove_matching_indirect", "remove_matching_inline"}: return f"{mnemonic} size=0x{operands['element_size']:X}" if mnemonic == "create_list": return f"create_list elem_size=0x{operands['element_size']:X} count=0x{operands['count']:X}" if mnemonic == "in_list": return f"in_list elem_size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}" if mnemonic == "jne": target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}") return f"if pop() != 0 goto {target}" if mnemonic == "jmp": target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}") return f"goto {target}" if mnemonic in {"foreach_list", "foreach_slist"}: target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}") return f"{mnemonic} {operands['target_var']} elem_size=0x{operands['element_size']:X} -> {target}" if mnemonic == "spawn": return ( f"spawn {format_target_event_reference(operands)} " f"args=0x{operands['arg_bytes']:02X} this_size=0x{operands['this_size']:02X}" ) if mnemonic == "spawn_inline": return ( f"spawn_inline {format_target_event_reference(operands)} " f"inline=0x{operands['inline_offset']:04X} this_size=0x{operands['this_size']:02X} unk=0x{operands['unknown']:02X}" ) if mnemonic == "line_number": return f"line {operands['line_number']}" if mnemonic == "symbol_info": return f"symbol_info {operands['symbol']} -> 0x{operands['symbol_offset']:04X}" if mnemonic == "global_address": return f"push &global[0x{operands['global_id']:04X}]" if mnemonic == "ret": return "return" if mnemonic in NO_ARG_MNEMONICS.values(): return mnemonic rendered_operands = format_generic_operands(operands, label_map, body_start) return f"{mnemonic} {rendered_operands}".rstrip() def render_script(ir: dict[str, Any]) -> str: label_map, blocks = build_script_blocks(ir) slot_name = ir["event"]["event_name_hint"] or f"slot_{ir['event']['slot']:02X}" lines = [ ( f"script {ir['class']['class_name']}.{slot_name} " f"(entry={ir['class']['entry_index']}, class_id=0x{ir['class']['class_id']:04X}, slot=0x{ir['event']['slot']:02X})" ), "{", f" body_range 0x{ir['event']['derived_body_start']:04X}..0x{ir['event']['derived_body_end']:04X}", f" raw_event_word 0x{ir['event']['raw_event_entry_word']:04X}", f" raw_code_offset 0x{ir['event']['raw_code_offset']:08X}", f" end_reason {ir['body']['end_reason']}", ] if ir["debug_symbols"]: lines.append(" locals") lines.append(" {") for symbol in ir["debug_symbols"]: lines.append( f" {symbol['bp_repr']} {symbol['name']} ; type=0x{symbol['type_id']:02X} ('{symbol['type_char']}') unk1=0x{symbol['unknown1']:02X} unk3=0x{symbol['unknown3']:02X}" ) lines.append(" }") lines.append("") for label, ops in blocks: lines.append(f" {label}:") for op in ops: statement = format_script_statement(op, label_map, ir["event"]["derived_body_start"]) lines.append(f" {statement} ; {op['absolute_body_offset']:04X}: {op['raw_bytes']}") lines.append("") if ir["field_tags"]: lines.append(" field_tags") lines.append(" {") for tag in ir["field_tags"]: lines.append( f" {tag['bp_repr']} {tag['name']} ; tag=0x{tag['tag_id']:02X} kind=0x{tag['value_kind']:02X}" ) lines.append(" }") if ir["body"]["unknown_trailing_bytes"]: lines.append(f" unknown_trailing_bytes {ir['body']['unknown_trailing_bytes']}") lines.append("}") return "\n".join(lines) + "\n" def sanitize_identifier(name: str) -> str: cleaned = [char if char.isalnum() or char == "_" else "_" for char in name.strip()] identifier = "".join(cleaned).strip("_") or "var" if identifier[0].isdigit(): identifier = f"v_{identifier}" return identifier def target_event_display_name(operands: dict[str, Any]) -> str: class_id = operands["target_class_id"] slot = operands["target_event_slot"] return CLASS_EVENT_NAME_HINTS.get((class_id, slot)) or operands.get("target_event_name_hint") or f"slot_{slot:02X}" def format_target_event_reference(operands: dict[str, Any]) -> str: class_name_hint = operands.get("target_class_name_hint") event_name = sanitize_identifier(target_event_display_name(operands)) if class_name_hint: return f"{sanitize_identifier(class_name_hint)}.{event_name}" return f"class_{operands['target_class_id']:04X}_{event_name}" def build_local_name_map(ir: dict[str, Any]) -> dict[int, str]: return { symbol["bp_offset"]: sanitize_identifier(symbol["name"]) for symbol in ir["debug_symbols"] } def format_bp_name(bp_offset: int, local_name_map: dict[int, str]) -> str: if bp_offset in local_name_map: return local_name_map[bp_offset] disp = signed_byte(bp_offset) if disp >= 0: return f"arg_{disp:02X}" return f"local_{abs(disp):02X}" def intrinsic_display_name(name_hint: str | None, ordinal: int) -> str: if not name_hint: return f"intrinsic_{ordinal:04X}" display = name_hint.replace("::", ".") display = re.sub(r"(?<=\.)I_", "", display) if display.startswith("I_"): display = display[2:] paren = display.find("(") if paren != -1: display = display[:paren] return display def push_expr_from_op(op: dict[str, Any], local_name_map: dict[int, str]) -> tuple[str, int] | None: mnemonic = op["mnemonic"] operands = op["operands"] if mnemonic == "push_byte_immediate": return (str(operands["value_signed"]), 1) if mnemonic == "push_word_immediate": return (f"0x{operands['value_u16']:04X}", 2) if mnemonic == "push_dword_immediate": return (f"0x{operands['value_u32']:08X}", 4) if mnemonic == "push_string_immediate": return (format_script_string(operands["string"]), max(2, operands["declared_length"])) if mnemonic in {"push_local_byte", "push_local_word", "push_local_dword", "push_local_string", "push_local_slist", "push_local_addr", "push_string_ptr"}: return (format_bp_name(operands["bp_offset"], local_name_map), 4 if mnemonic in {"push_local_dword", "push_local_addr", "push_string_ptr"} else 2) if mnemonic in {"push_member_byte", "push_member_word", "push_member_dword", "push_member_huge"}: return (f"member.{format_bp_name(operands['bp_offset'], local_name_map)}", 4 if mnemonic in {"push_member_dword", "push_member_huge"} else 2) if mnemonic == "push_local_list": return (format_bp_name(operands["bp_offset"], local_name_map), max(2, operands["element_size"])) if mnemonic == "push_list_element": return (f"list_element(size=0x{operands['element_size']:X})", max(1, operands["element_size"])) if mnemonic == "push_huge": return (f"0x{operands['value_a']:02X}{operands['value_b']:02X}", 4) if mnemonic == "push_global": return (f"global[0x{operands['global_id']:04X}]", max(1, operands["size"])) if mnemonic == "push_pid": return ("pid", 2) if mnemonic == "push_process_result": return ("process_result", 2) return None def pop_stack_bytes(stack: list[tuple[str, int]], byte_count: int) -> list[str]: if byte_count <= 0: return [] parts: list[str] = [] consumed = 0 while stack and consumed < byte_count: expr, width = stack.pop() parts.append(expr) consumed += max(1, width) parts.reverse() return parts def combine_binary(stack: list[tuple[str, int]], operator: str, result_width: int = 2) -> None: if len(stack) < 2: return right_expr, _ = stack.pop() left_expr, _ = stack.pop() stack.append((f"({left_expr} {operator} {right_expr})", result_width)) def evaluate_loop_setup_op( op: dict[str, Any], stack: list[tuple[str, int]], local_name_map: dict[int, str], ) -> bool: pushed = push_expr_from_op(op, local_name_map) if pushed is not None: stack.append(pushed) return True mnemonic = op["mnemonic"] operands = op["operands"] if mnemonic == "push_indirect": if stack: expr, _ = stack.pop() stack.append((f"*({expr})", max(1, operands["size"]))) return True if mnemonic in {"add", "add_dword"}: combine_binary(stack, "+", 4 if mnemonic.endswith("dword") else 2) return True if mnemonic in {"sub", "sub_dword"}: combine_binary(stack, "-", 4 if mnemonic.endswith("dword") else 2) return True if mnemonic in {"mul", "mul_dword"}: combine_binary(stack, "*", 4 if mnemonic.endswith("dword") else 2) return True if mnemonic in {"div", "div_dword"}: combine_binary(stack, "/", 4 if mnemonic.endswith("dword") else 2) return True if mnemonic == "line_number": return True return False def normalize_loop_origin(expr: str) -> str: normalized = expr.strip() if normalized.startswith("*(") and normalized.endswith(")"): return normalized[2:-1] return normalized def try_decode_loop_selector( ops: list[dict[str, Any]], start_index: int, local_name_map: dict[int, str], ) -> tuple[str, int] | None: selector_tokens: list[int] = [] selector_stack: list[tuple[str, int]] = [] index = start_index while index < len(ops): op = ops[index] mnemonic = op["mnemonic"] if mnemonic == "loopscr": selector_tokens.append(op["operands"]["value_u8"]) index += 1 continue if mnemonic == "loop": break if not evaluate_loop_setup_op(op, selector_stack, local_name_map): return None index += 1 if index >= len(ops) or ops[index]["mnemonic"] != "loop": return None loop_operands = ops[index]["operands"] if loop_operands.get("string_bytes") != 0x6 or loop_operands.get("loop_type") != 0x2: return None if len(selector_tokens) != 4 or selector_tokens[0] != 0x24 or selector_tokens[1] != 0x3D or selector_tokens[3] != 0x25: if selector_tokens == [0x24, 0x42] and len(selector_stack) >= 4: current_var = format_bp_name(loop_operands["current_var"], local_name_map) return ( f"{current_var} in {generic_loop_selector_call('selector_0x42', [ ('arg0', selector_stack[-4][0]), ('arg1', selector_stack[-3][0]), ('arg2', selector_stack[-2][0]), ('origin', normalize_loop_origin(selector_stack[-1][0])), ])}", index + 1, ) return None selector_field = LOOP_SELECTOR_FIELD_HINTS.get(selector_tokens[2]) if selector_field is None or len(selector_stack) < 3: return None current_var = format_bp_name(loop_operands["current_var"], local_name_map) selector_value = selector_stack[-3][0] origin_expr = normalize_loop_origin(selector_stack[-1][0]) return ( f"{current_var} in nearby_items({selector_field}={selector_value}, origin={origin_expr})", index + 1, ) def loop_selector_statement(selector_text: str) -> str: return f"/* loop_selector {selector_text} */" def decompile_pseudocode_blocks(ir: dict[str, Any]) -> list[tuple[str, list[str]]]: label_map, blocks = build_script_blocks(ir) local_name_map = build_local_name_map(ir) skip_mnemonics = {"line_number", "symbol_info", "add_sp", "init"} pending_result: str | None rendered_blocks: list[tuple[str, list[str]]] = [] for label, ops in blocks: stack: list[tuple[str, int]] = [] pending_result = None block_lines: list[str] = [] index = 0 while index < len(ops): op = ops[index] mnemonic = op["mnemonic"] operands = op["operands"] if mnemonic == "loopscr": decoded_loop = try_decode_loop_selector(ops, index, local_name_map) if decoded_loop is not None: selector_text, next_index = decoded_loop block_lines.append(loop_selector_statement(selector_text)) stack.clear() pending_result = None index = next_index continue pushed = push_expr_from_op(op, local_name_map) if pushed is not None: stack.append(pushed) index += 1 continue if mnemonic in skip_mnemonics: index += 1 continue if mnemonic == "push_indirect": if stack: expr, _ = stack.pop() stack.append((f"*({expr})", max(1, operands["size"]))) index += 1 continue if mnemonic == "set_info": args = ", ".join(expr for expr, _ in stack) if stack else "" stack.clear() block_lines.append(f"set_info({args});") index += 1 continue if mnemonic == "process_exclude": block_lines.append("process_exclude();") index += 1 continue if mnemonic == "call_intrinsic": arg_exprs = pop_stack_bytes(stack, operands["arg_bytes"]) pending_result = f"{intrinsic_display_name(operands.get('intrinsic_name_hint'), operands['intrinsic_ordinal'])}({', '.join(arg_exprs)})" index += 1 continue if mnemonic == "push_retval_byte": stack.append((pending_result or "retval", 1)) pending_result = None index += 1 continue if mnemonic == "push_retval_word": stack.append((pending_result or "retval", 2)) pending_result = None index += 1 continue if mnemonic == "push_retval_dword": stack.append((pending_result or "retval", 4)) pending_result = None index += 1 continue if mnemonic == "call_class_event": arg_text = ", ".join(expr for expr, _ in stack) stack.clear() block_lines.append(f"{format_target_event_reference(operands)}({arg_text});") pending_result = None index += 1 continue if mnemonic == "spawn": arg_text = ", ".join(expr for expr, _ in stack) stack.clear() block_lines.append(f"spawn {format_target_event_reference(operands)}({arg_text});") pending_result = None index += 1 continue if mnemonic == "spawn_inline": arg_text = ", ".join(expr for expr, _ in stack) stack.clear() block_lines.append( f"spawn_inline {format_target_event_reference(operands)}({arg_text}) /* inline=0x{operands['inline_offset']:04X} */;" ) pending_result = None index += 1 continue if mnemonic in {"add", "add_dword"}: combine_binary(stack, "+", 4 if mnemonic.endswith("dword") else 2) index += 1 continue if mnemonic in {"sub", "sub_dword"}: combine_binary(stack, "-", 4 if mnemonic.endswith("dword") else 2) index += 1 continue if mnemonic in {"mul", "mul_dword"}: combine_binary(stack, "*", 4 if mnemonic.endswith("dword") else 2) index += 1 continue if mnemonic in {"div", "div_dword"}: combine_binary(stack, "/", 4 if mnemonic.endswith("dword") else 2) index += 1 continue if mnemonic == "bit_and": combine_binary(stack, "&") index += 1 continue if mnemonic == "bit_or": combine_binary(stack, "|") index += 1 continue if mnemonic == "and": combine_binary(stack, "&&") index += 1 continue if mnemonic == "or": combine_binary(stack, "||") index += 1 continue if mnemonic == "cmp": combine_binary(stack, "!=") index += 1 continue if mnemonic == "ne": combine_binary(stack, "!=") index += 1 continue if mnemonic == "lt": combine_binary(stack, "<") index += 1 continue if mnemonic == "le": combine_binary(stack, "<=") index += 1 continue if mnemonic == "gt": combine_binary(stack, ">") index += 1 continue if mnemonic == "ge": combine_binary(stack, ">=") index += 1 continue if mnemonic == "not": if stack: expr, width = stack.pop() stack.append((f"(!{expr})", width)) index += 1 continue if mnemonic == "implies": expr = stack.pop()[0] if stack else "retval" stack.append((f"implies({expr}, 0x{operands['arg0']:X}, 0x{operands['arg1']:X})", 1)) index += 1 continue if mnemonic == "pop_temp": if stack: stack.pop() index += 1 continue if mnemonic == "suspend": block_lines.append("suspend;") stack.clear() index += 1 continue if mnemonic == "jne": target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}") condition = stack.pop()[0] if stack else "condition" block_lines.append(f"if {condition} goto {target};") index += 1 continue if mnemonic == "jmp": target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}") block_lines.append(f"goto {target};") stack.clear() index += 1 continue if mnemonic in {"foreach_list", "foreach_slist"}: target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}") block_lines.append( f"{mnemonic} {format_bp_name(operands['bp_offset'], local_name_map)} -> {target};" ) index += 1 continue if mnemonic == "ret": block_lines.append("return;") stack.clear() break if mnemonic.startswith("pop_local_") or mnemonic.startswith("pop_member_"): if stack: expr, _ = stack.pop() else: expr = "value" target_name = format_bp_name(operands["bp_offset"], local_name_map) block_lines.append(f"{target_name} = {expr};") index += 1 continue rendered_operands = format_generic_operands(operands, label_map, ir["event"]["derived_body_start"]) block_lines.append(f"/* {mnemonic} {rendered_operands} */") index += 1 rendered_blocks.append((label, block_lines)) return rendered_blocks @dataclass(frozen=True) class TerminalStatement: kind: str condition: str | None = None target: str | None = None def parse_terminal_statement(statement: str) -> TerminalStatement | None: if statement == "return;": return TerminalStatement("return") goto_match = re.fullmatch(r"goto ([A-Za-z0-9_]+);", statement) if goto_match is not None: return TerminalStatement("goto", target=goto_match.group(1)) if_match = re.fullmatch(r"if (.+) goto ([A-Za-z0-9_]+);", statement) if if_match is not None: return TerminalStatement("if", condition=if_match.group(1), target=if_match.group(2)) return None def strip_outer_parens(expr: str) -> str: text = expr.strip() while text.startswith("(") and text.endswith(")"): depth = 0 balanced = True for index, char in enumerate(text): if char == "(": depth += 1 elif char == ")": depth -= 1 if depth == 0 and index != len(text) - 1: balanced = False break if depth < 0: balanced = False break if not balanced or depth != 0: break text = text[1:-1].strip() return text def invert_condition_text(condition: str) -> str: expr = strip_outer_parens(condition) comparisons = { " != ": " == ", " == ": " != ", " <= ": " > ", " >= ": " < ", " < ": " >= ", " > ": " <= ", } for source, replacement in comparisons.items(): if source in expr: return expr.replace(source, replacement, 1) if expr.startswith("!"): return strip_outer_parens(expr[1:]) if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_:.]*(\(.*\))?", expr): return f"!{expr}" return f"!({expr})" def indent_lines(lines: list[str], prefix: str = " ") -> list[str]: return [f"{prefix}{line}" if line else "" for line in lines] def detect_noop_compare_chain( blocks: list[tuple[str, list[str]]], label_to_index: dict[str, int], start_index: int, end_index: int, ) -> int | None: cursor = start_index common_target: str | None = None while cursor + 1 < end_index: _, compare_statements = blocks[cursor] _, goto_statements = blocks[cursor + 1] if len(compare_statements) != 1 or len(goto_statements) != 1: return None compare_terminal = parse_terminal_statement(compare_statements[0]) goto_terminal = parse_terminal_statement(goto_statements[0]) if compare_terminal is None or compare_terminal.kind != "if": return None if goto_terminal is None or goto_terminal.kind != "goto": return None if common_target is None: common_target = goto_terminal.target elif goto_terminal.target != common_target: return None if compare_terminal.target == common_target: body_index = label_to_index.get(common_target or "") if body_index is None or body_index != cursor + 2 or body_index >= end_index: return None return body_index next_index = label_to_index.get(compare_terminal.target or "") if next_index is None or next_index != cursor + 2 or next_index >= end_index: return None cursor += 2 return None def last_nonempty_block_index( blocks: list[tuple[str, list[str]]], start_index: int, end_index: int, ) -> int | None: for index in range(end_index - 1, start_index - 1, -1): if blocks[index][1]: return index return None def parse_selector_condition(condition: str) -> tuple[str, str] | None: expr = strip_outer_parens(condition) match = re.fullmatch(r"(.+?)\s*!=\s*(.+)", expr) if match is None: return None return match.group(1).strip(), match.group(2).strip() def parse_loop_selector_statement(statement: str) -> str | None: match = re.fullmatch(r"/\* loop_selector (.+) \*/", statement) if match is None: return None return match.group(1) def is_loop_selector_only_block(statements: list[str]) -> bool: return len(statements) == 1 and parse_loop_selector_statement(statements[0]) is not None def render_selector_chain( blocks: list[tuple[str, list[str]]], label_to_index: dict[str, int], start_index: int, end_index: int, return_labels: set[str], active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None, render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None, ) -> tuple[list[str], int] | None: if not blocks[start_index][1]: return None base_terminal = parse_terminal_statement(blocks[start_index][1][-1]) if base_terminal is None or base_terminal.kind != "if": return None selector = parse_selector_condition(base_terminal.condition or "") if selector is None: return None selector_expr, _ = selector cursor = start_index join_label: str | None = None branches: list[tuple[str, list[str]]] = [] while cursor < end_index: _, statements = blocks[cursor] if not statements: return None terminal = parse_terminal_statement(statements[-1]) if terminal is None or terminal.kind != "if": return None parsed = parse_selector_condition(terminal.condition or "") if parsed is None or parsed[0] != selector_expr: return None target_label = terminal.target or "" target_index = label_to_index.get(target_label) if target_index is None or target_index <= cursor + 1 or target_index > end_index: return None body_tail_index = last_nonempty_block_index(blocks, cursor + 1, target_index) if body_tail_index is None: return None body_tail_terminal = parse_terminal_statement(blocks[body_tail_index][1][-1]) if body_tail_terminal is None or body_tail_terminal.kind != "goto": return None current_join = body_tail_terminal.target or "" current_join_index = label_to_index.get(current_join) if current_join_index is None or current_join_index > end_index: return None if current_join_index < target_index: return None if current_join_index == target_index and target_label != current_join: return None if join_label is None: join_label = current_join elif current_join != join_label: return None body_result = render_structured_region( blocks, label_to_index, cursor + 1, target_index, return_labels, {join_label}, active_regions, render_cache, ) if body_result is None: return None body_lines, _ = body_result branches.append((invert_condition_text(terminal.condition or "condition"), body_lines)) if target_label == join_label: break cursor = target_index if join_label is None: return None rendered: list[str] = [] for index, (condition, body_lines) in enumerate(branches): branch_head = "if" if index == 0 else "else if" rendered.append(f"{branch_head} ({condition}) {{") rendered.extend(indent_lines(body_lines)) rendered.append("}") return rendered, label_to_index[join_label] def render_loop_construct( blocks: list[tuple[str, list[str]]], label_to_index: dict[str, int], index: int, end_index: int, return_labels: set[str], active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None, render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None, ) -> tuple[list[str], int] | None: _, statements = blocks[index] if not statements: return None terminal = parse_terminal_statement(statements[-1]) if terminal is None or terminal.kind != "if": return None target_label = terminal.target or "" target_index = label_to_index.get(target_label) if target_index is None or target_index <= index or target_index > end_index: return None loop_tail_index = last_nonempty_block_index(blocks, index + 1, target_index) if loop_tail_index is None: return None loop_tail_terminal = parse_terminal_statement(blocks[loop_tail_index][1][-1]) if loop_tail_terminal is None or loop_tail_terminal.kind != "goto" or loop_tail_terminal.target != blocks[index][0]: return None loop_body = render_structured_region( blocks, label_to_index, index + 1, target_index, return_labels, {blocks[index][0]}, active_regions, render_cache, ) if loop_body is None: return None loop_lines, _ = loop_body loop_selector = None if index > 0 and is_loop_selector_only_block(blocks[index - 1][1]): loop_selector = parse_loop_selector_statement(blocks[index - 1][1][0]) rendered: list[str] = [] if loop_selector is not None: rendered.append(f"for {loop_selector} {{") else: rendered.append(f"while ({invert_condition_text(terminal.condition or 'condition')}) {{") rendered.extend(indent_lines(loop_lines)) rendered.append("}") return rendered, target_index def render_infinite_loop_construct( blocks: list[tuple[str, list[str]]], label_to_index: dict[str, int], index: int, end_index: int, return_labels: set[str], active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None, render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None, ) -> tuple[list[str], int] | None: if index + 1 >= end_index: return None loop_label = blocks[index][0] loop_tail_index: int | None = None for candidate in range(end_index - 1, index, -1): statements = blocks[candidate][1] if not statements: continue terminal = parse_terminal_statement(statements[-1]) if terminal is not None and terminal.kind == "goto" and terminal.target == loop_label: loop_tail_index = candidate break if loop_tail_index is None: return None loop_body = render_structured_region( blocks, label_to_index, index, loop_tail_index + 1, return_labels, {loop_label}, active_regions, render_cache, ) if loop_body is None: return None loop_lines, _ = loop_body rendered = ["while (true) {"] rendered.extend(indent_lines(loop_lines)) rendered.append("}") return rendered, loop_tail_index + 1 def render_structured_region( blocks: list[tuple[str, list[str]]], label_to_index: dict[str, int], start_index: int, end_index: int, return_labels: set[str], exit_labels: set[str] | None = None, active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None, render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None, ) -> tuple[list[str], bool] | None: region_key = (start_index, end_index, tuple(sorted(exit_labels or ()))) if render_cache is not None and region_key in render_cache: return render_cache[region_key] if active_regions is None: active_regions = set() elif region_key in active_regions: return None active_regions = set(active_regions) active_regions.add(region_key) allowed_exit_labels = set(exit_labels or ()) lines: list[str] = [] index = start_index while index < end_index: skipped_index = detect_noop_compare_chain(blocks, label_to_index, index, end_index) if skipped_index is not None: index = skipped_index continue _, statements = blocks[index] if not statements: index += 1 continue if is_loop_selector_only_block(statements): index += 1 continue terminal = parse_terminal_statement(statements[-1]) if terminal is None: lines.extend(statements) index += 1 continue lines.extend(statements[:-1]) if terminal.kind == "return": lines.append("return;") return lines, False if terminal.kind == "goto": target_label = terminal.target or "" target_index = label_to_index.get(target_label) if target_label in return_labels: lines.append("return;") return lines, False if target_label in allowed_exit_labels: return lines, False if target_index is None: return None if target_index == index + 1: index += 1 continue if index < target_index < end_index: index = target_index continue return None target_label = terminal.target or "" target_index = label_to_index.get(target_label) if target_index is None or target_index <= index or target_index > end_index: return None if target_index == index + 1: index += 1 continue selector_chain = render_selector_chain( blocks, label_to_index, index, end_index, return_labels, active_regions, render_cache, ) if selector_chain is not None: selector_lines, selector_join_index = selector_chain lines.extend(selector_lines) index = selector_join_index continue loop_construct = render_loop_construct( blocks, label_to_index, index, end_index, return_labels, active_regions, render_cache, ) if loop_construct is not None: loop_lines, loop_join_index = loop_construct lines.extend(loop_lines) index = loop_join_index continue true_tail_index = last_nonempty_block_index(blocks, index + 1, target_index) if true_tail_index is not None: true_tail_terminal = parse_terminal_statement(blocks[true_tail_index][1][-1]) if true_tail_terminal is not None and true_tail_terminal.kind == "goto": join_label = true_tail_terminal.target or "" join_index = label_to_index.get(join_label) if join_index is not None and join_index > target_index and join_index <= end_index: true_result = render_structured_region( blocks, label_to_index, index + 1, target_index, return_labels, {join_label}, active_regions, render_cache, ) false_result = render_structured_region( blocks, label_to_index, target_index, join_index, return_labels, {join_label}, active_regions, render_cache, ) if true_result is not None and false_result is not None: true_lines, _ = true_result false_lines, _ = false_result lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{") lines.extend(indent_lines(true_lines)) lines.append("}") if false_lines: if false_lines[0].startswith("if "): lines.append(f"else {false_lines[0]}") lines.extend(false_lines[1:]) else: lines.append("else {") lines.extend(indent_lines(false_lines)) lines.append("}") index = join_index continue inner_result = render_structured_region( blocks, label_to_index, index + 1, target_index, return_labels, None, active_regions, render_cache, ) if inner_result is None: if render_cache is not None: render_cache[region_key] = None return None inner_lines, inner_falls_through = inner_result if inner_lines: lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{") lines.extend(indent_lines(inner_lines)) lines.append("}") elif not inner_falls_through: lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{") lines.append("}") index = target_index result = (lines, True) if render_cache is not None: render_cache[region_key] = result return result def render_structured_pseudocode(blocks: list[tuple[str, list[str]]]) -> list[str] | None: if not blocks: return [] label_to_index = {label: index for index, (label, _) in enumerate(blocks)} return_labels = { label for label, statements in blocks if len(statements) == 1 and statements[0] == "return;" } render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] = {} structured = render_structured_region(blocks, label_to_index, 0, len(blocks), return_labels, None, None, render_cache) if structured is None: return None return structured[0] def render_partially_structured_blocks(blocks: list[tuple[str, list[str]]]) -> list[str]: if not blocks: return [] label_to_index = {label: index for index, (label, _) in enumerate(blocks)} return_labels = { label for label, statements in blocks if len(statements) == 1 and statements[0] == "return;" } lines: list[str] = [] index = 0 while index < len(blocks): label, statements = blocks[index] if is_loop_selector_only_block(statements): loop_selector = parse_loop_selector_statement(statements[0]) if loop_selector is not None and index + 1 < len(blocks): next_label, next_statements = blocks[index + 1] next_terminal = parse_terminal_statement(next_statements[-1]) if next_statements else None if next_terminal is not None and next_terminal.kind == "if": target_index = label_to_index.get(next_terminal.target or "") if target_index is not None and target_index > index + 1: loop_tail_index = last_nonempty_block_index(blocks, index + 2, target_index) if loop_tail_index is not None: loop_tail_terminal = parse_terminal_statement(blocks[loop_tail_index][1][-1]) if loop_tail_terminal is not None and loop_tail_terminal.kind == "goto" and loop_tail_terminal.target == next_label: loop_body = render_structured_region( blocks, label_to_index, index + 2, target_index, return_labels, {next_label}, ) if loop_body is not None: loop_lines, _ = loop_body lines.append(f" {label}:") lines.append(f" for {loop_selector} {{") lines.extend(f" {line}" for line in indent_lines(loop_lines)) lines.append(" }") lines.append("") index = target_index continue lines.append(f" {label}:") lines.append(f" {statements[0]}") lines.append("") index += 1 continue selector_chain = render_selector_chain(blocks, label_to_index, index, len(blocks), return_labels) if selector_chain is not None: selector_lines, selector_join_index = selector_chain lines.append(f" {label}:") for statement in selector_lines: lines.append(f" {statement}" if statement else "") lines.append("") index = selector_join_index continue loop_construct = render_loop_construct( blocks, label_to_index, index, len(blocks), return_labels, ) if loop_construct is not None: loop_lines, loop_join_index = loop_construct lines.append(f" {label}:") for statement in loop_lines: lines.append(f" {statement}" if statement else "") lines.append("") index = loop_join_index continue infinite_loop_construct = render_infinite_loop_construct( blocks, label_to_index, index, len(blocks), return_labels, ) if infinite_loop_construct is not None: loop_lines, loop_join_index = infinite_loop_construct lines.append(f" {label}:") for statement in loop_lines: lines.append(f" {statement}" if statement else "") lines.append("") index = loop_join_index continue lines.append(f" {label}:") for statement in statements: lines.append(f" {statement}") lines.append("") index += 1 return lines def render_pseudocode(ir: dict[str, Any], shape_catalog: ShapeCatalog | None = None) -> str: slot_name = sanitize_identifier(ir["event"]["event_name_hint"] or f"slot_{ir['event']['slot']:02X}") lines = [ ( f"function {sanitize_identifier(ir['class']['class_name'].lower())}_{slot_name}() " f"/* entry={ir['class']['entry_index']} class_id=0x{ir['class']['class_id']:04X} slot=0x{ir['event']['slot']:02X} */" ), "{", ] if ir["debug_symbols"]: lines.append(" var") for index, symbol in enumerate(ir["debug_symbols"]): separator = "," if index + 1 < len(ir["debug_symbols"]) else ";" lines.append(f" {sanitize_identifier(symbol['name'])}{separator} /* {symbol['bp_repr']} type=0x{symbol['type_id']:02X} */") lines.append("") rendered_blocks = decompile_pseudocode_blocks(ir) structured_lines = render_structured_pseudocode(rendered_blocks) if structured_lines is not None: for statement in structured_lines: lines.append(f" {statement}" if statement else "") else: lines.extend(render_partially_structured_blocks(rendered_blocks)) lines.append("}") return apply_shape_catalog_to_pseudocode("\n".join(lines) + "\n", shape_catalog) def validate_pseudocode_text(text: str) -> list[str]: errors: list[str] = [] label_lines: dict[str, int] = {} goto_targets: list[tuple[str, int]] = [] brace_depth = 0 for line_number, raw_line in enumerate(text.splitlines(), start=1): stripped = raw_line.strip() if not stripped: continue if stripped.endswith("{"): brace_depth += 1 if stripped == "}": brace_depth -= 1 if brace_depth < 0: errors.append(f"line {line_number}: unexpected closing brace") brace_depth = 0 label_match = re.fullmatch(r"([A-Za-z_][A-Za-z0-9_]*):", stripped) if label_match is not None: label = label_match.group(1) previous_line = label_lines.get(label) if previous_line is not None: errors.append(f"line {line_number}: duplicate label {label} (first at line {previous_line})") else: label_lines[label] = line_number for match in re.finditer(r"\bgoto ([A-Za-z_][A-Za-z0-9_]*)\s*;", stripped): goto_targets.append((match.group(1), line_number)) if brace_depth != 0: errors.append(f"unbalanced braces: final depth {brace_depth}") for target, line_number in goto_targets: if target not in label_lines: errors.append(f"line {line_number}: goto target {target} has no label") return errors def render_text(ir: dict[str, Any]) -> str: labels = build_listing_labels(ir) def format_operand(key: str, value: Any) -> str: if value is None or value == "": return "" if key == "intrinsic_name_hint" and value: return f"hint={value}" if key == "target_event_name_hint" and value: return f"event={value}" if key == "target_offset" and isinstance(value, int): label = labels.get(value + ir["event"]["derived_body_start"]) return f"->{label or f'0x{value + ir['event']['derived_body_start']:04X}'}" if isinstance(value, int): if key.endswith("_signed"): return f"{key}={value}" return f"{key}=0x{value:X}" return f"{key}={value}" lines = [ f"Class {ir['class']['class_name']} entry={ir['class']['entry_index']} class_id=0x{ir['class']['class_id']:X}", f"Slot 0x{ir['event']['slot']:02X} hint={ir['event']['event_name_hint']} raw_word=0x{ir['event']['raw_event_entry_word']:04X} raw_code_off=0x{ir['event']['raw_code_offset']:08X}", f"Body 0x{ir['event']['derived_body_start']:04X}..0x{ir['event']['derived_body_end']:04X} len={ir['event']['derived_body_length']} end={ir['body']['end_reason']} ops={ir['body']['decoded_op_count']}", f"SHA1 {ir['body']['raw_body_sha1']}", "", ] for op in ir["ops"]: absolute_offset = op["absolute_body_offset"] label = labels.get(absolute_offset) if label is not None: lines.extend(["", f"{label}:"]) operand_items = [formatted for key, value in op["operands"].items() if (formatted := format_operand(key, value))] lines.append(f"{absolute_offset:04X}: {op['opcode']:02X} {op['mnemonic']:<24} {' '.join(operand_items)} raw={op['raw_bytes']}") if ir["debug_symbols"]: lines.extend(["", f"Debug symbols @ 0x{ir['body']['debug_symbol_offset']:04X}:"]) for symbol in ir["debug_symbols"]: lines.append( f" {symbol['index']:02X}: unk1=0x{symbol['unknown1']:02X} type=0x{symbol['type_id']:02X} ('{symbol['type_char']}') {symbol['bp_repr']} unk3=0x{symbol['unknown3']:02X} name={symbol['name']}" ) if ir["field_tags"]: lines.extend(["", "Field tags:"]) for tag in ir["field_tags"]: lines.append( f" {tag['tag_label']} ({tag['bp_repr']})" ) if ir["body"]["unknown_trailing_bytes"]: lines.extend(["", f"unknown_trailing_bytes={ir['body']['unknown_trailing_bytes']}"]) return "\n".join(lines) + "\n" def main() -> None: parser = argparse.ArgumentParser(description="Proof-of-concept Crusader USECODE parser over extracted owner-loaded artifacts") parser.add_argument("--class", dest="class_name", required=True, help="Class name from class_event_index.tsv, for example NPCTRIG") parser.add_argument("--slot", required=True, help="Event slot, for example 0x0A") parser.add_argument("--extracted-root", default=str(EXTRACTED_ROOT), help="Extracted USECODE root containing class_event_index.tsv and chunks/") parser.add_argument("--variant", choices=["auto", "regret", "remorse"], default="auto", help="Crusader intrinsic numbering to apply (default: auto, fallback regret)") parser.add_argument( "--shape-csv", help=( "Shape catalog CSV to apply to pseudocode output " "(default: Remorse uses /usecode_shape_catalog_remorse.csv; " "Regret uses /usecode_shape_catalog_regret.csv)" ), ) parser.add_argument("--output", help="Write IR JSON to this file instead of stdout") parser.add_argument("--emit-text", action="store_true", help="Emit a readable text listing beside the JSON") parser.add_argument("--text-output", help="Write the text listing to this file") parser.add_argument("--emit-script", action="store_true", help="Emit a decompiled script-style view beside the JSON") parser.add_argument("--script-output", help="Write the script-style decompilation to this file") parser.add_argument("--emit-pseudocode", action="store_true", help="Emit a higher-level pseudocode view beside the JSON") parser.add_argument("--pseudocode-output", help="Write the pseudocode view to this file") parser.add_argument("--family-diff", action="store_true", help="Emit repeated-body family diff report instead of (or alongside) the IR") parser.add_argument("--family-diff-output", help="Write the family diff JSON to this file") parser.add_argument("--family-diff-text-output", help="Write the family diff text report to this file") args = parser.parse_args() slot = parse_int(args.slot) extracted_root = Path(args.extracted_root) shape_csv = Path(args.shape_csv) if args.shape_csv else default_shape_catalog_path(extracted_root, args.variant) shape_catalog = load_shape_catalog(shape_csv) event_row, layout_row = select_rows(args.class_name, slot, extracted_root) ir = parse_body_ir(event_row, layout_row, None if args.variant == "auto" else args.variant, extracted_root) rendered_json = json.dumps(ir, indent=2) if args.output: Path(args.output).write_text(rendered_json + "\n", encoding="utf-8") else: print(rendered_json) if args.emit_text: rendered_text = render_text(ir) if args.text_output: Path(args.text_output).write_text(rendered_text, encoding="utf-8") else: print(rendered_text) if args.emit_script: rendered_script = render_script(ir) if args.script_output: Path(args.script_output).write_text(rendered_script, encoding="utf-8") else: print(rendered_script) if args.emit_pseudocode: rendered_pseudocode = render_pseudocode(ir, shape_catalog=shape_catalog) if args.pseudocode_output: Path(args.pseudocode_output).write_text(rendered_pseudocode, encoding="utf-8") else: print(rendered_pseudocode) if args.family_diff: diff = compute_family_diff(args.class_name, slot, extracted_root) diff_json = json.dumps(diff, indent=2) if args.family_diff_output: Path(args.family_diff_output).write_text(diff_json + "\n", encoding="utf-8") else: print(diff_json) diff_text = render_family_diff_text(diff) if args.family_diff_text_output: Path(args.family_diff_text_output).write_text(diff_text, encoding="utf-8") else: print(diff_text) if __name__ == "__main__": main()