Crusader_Decomp/tools/poc_crusader_usecode_parser.py
2026-03-26 00:37:17 +01:00

2587 lines
No EOL
97 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import argparse
import ast
import csv
import hashlib
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any
REPO_ROOT = Path(__file__).resolve().parents[1]
EXTRACTED_ROOT = REPO_ROOT / "USECODE" / "EUSECODE_extracted"
CLASS_EVENT_INDEX = EXTRACTED_ROOT / "class_event_index.tsv"
CLASS_LAYOUT_INDEX = EXTRACTED_ROOT / "class_layout_index.tsv"
RUNTIME_VM_IR_INDEX = EXTRACTED_ROOT / "runtime_vm_ir.tsv"
CHUNKS_DIR = EXTRACTED_ROOT / "chunks"
UNKCOFFS_DIR = REPO_ROOT / "tools" / "unkcoffs"
DEFAULT_GAME_VARIANT = "regret"
INTRINSIC_HINT_PATHS = {
"regret": UNKCOFFS_DIR / "regret_ints.py",
"remorse": UNKCOFFS_DIR / "remorse_ints.py",
}
def resolve_extracted_root(extracted_root: Path | str | None = None) -> Path:
if extracted_root is None:
return EXTRACTED_ROOT
return Path(extracted_root)
def extracted_root_paths(extracted_root: Path | str | None = None) -> tuple[Path, Path, Path, Path]:
root = resolve_extracted_root(extracted_root)
return (
root / "class_event_index.tsv",
root / "class_layout_index.tsv",
root / "runtime_vm_ir.tsv",
root / "chunks",
)
def repo_relative_path(path: Path) -> str:
try:
return str(path.relative_to(REPO_ROOT)).replace("\\", "/")
except ValueError:
return str(path).replace("\\", "/")
def infer_flex_path(extracted_root: Path | str | None = None) -> str:
root = resolve_extracted_root(extracted_root)
parent = root.parent
if parent == REPO_ROOT:
return "EUSECODE.FLX"
return f"{repo_relative_path(parent)}/EUSECODE.FLX"
EVENT_NAME_HINTS = {
0x00: "look",
0x01: "use",
0x02: "anim",
0x03: "setActivity",
0x04: "cachein",
0x05: "hit",
0x06: "gotHit",
0x07: "hatch",
0x08: "schedule",
0x09: "release",
0x0A: "equip",
0x0B: "unequip",
0x0C: "combine",
0x0D: "func0D",
0x0E: "calledFromAnim",
0x0F: "enterFastArea",
0x10: "leaveFastArea",
0x11: "cast",
0x12: "justMoved",
0x13: "avatarStoleSomething",
0x14: "animGetHit",
0x15: "func15",
0x16: "func16",
0x17: "func17",
0x18: "func18",
0x19: "func19",
0x1A: "func1A",
0x1B: "func1B",
0x1C: "func1C",
0x1D: "func1D",
0x1E: "func1E",
0x1F: "func1F",
}
# Intrinsic table extracted from Pentagram ConvertUsecodeCrusader.h
# Source note: "current discovered intrinsics are for regret1.21 only"
# This is used as a hint only ordinal mapping may differ between builds.
BASE_INTRINSIC_HINTS: dict[int, str] = {
0x0000: "Intrinsic0000()",
0x0001: "Item::getFrame(void)",
0x0002: "Item::setFrame(uint16)",
0x0003: "Item::getMapNum(void)",
0x0004: "Item::getStatus(void)",
0x0005: "Item::orStatus(sint16)",
0x0006: "Item::callEvent0A(sint16)",
0x0007: "Intrinsic0007()",
0x0008: "Item::isNpc(void)",
0x0009: "Item::getZ(void)",
0x000A: "Intrinsic000A()",
0x000B: "Item::getQLo(void)",
0x000C: "Item::destroy(void)",
0x000D: "Intrinsic000D()",
0x000E: "Item::getX(void)",
0x000F: "Item::getY(void)",
0x0010: "Intrinsic0010()",
0x0011: "Item::getType(void)",
0x0012: "Intrinsic0012()",
0x0013: "Intrinsic0013()",
0x0014: "Item::legal_create(uint16,uint16,uint16,uint16,uint16)",
0x0015: "Item::andStatus(void)",
0x0016: "Intrinsic0016()",
0x0017: "Intrinsic00C3()",
0x0018: "Intrinsic00DA()",
0x0019: "Intrinsic0019()",
0x001A: "Item::create(uint16,uint16)",
0x001B: "Item::pop(uint16,uint16,uint8)",
0x001C: "Intrinsic00FA()",
0x001D: "Item::push(void)",
0x001E: "Intrinsic001E()",
0x001F: "Item::getQLo(void)",
0x0020: "Item::setQLo(sint16)",
0x0021: "Item::getQHi(void)",
0x0022: "Item::setQHi(sint16)",
0x0023: "Intrinsic0023()",
0x0024: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0025: "Item::getCY(void)",
0x0026: "Item::getCX(void)",
0x0027: "Intrinsic0027()",
0x0028: "Item::setNpcNum(sint16)",
0x0029: "Intrinsic0029()",
0x002A: "Intrinsic002A()",
0x002B: "Item::pop(void)",
0x002C: "Intrinsic002C()",
0x002D: "Item::isCompletelyOn(uint16)",
0x002E: "Item::pop(uint16)",
0x002F: "Intrinsic002F()",
0x0030: "Intrinsic0030()",
0x0031: "Item::getFamily(void)",
0x0032: "Item::destroyContents(void)",
0x0033: "Intrinsic0033()",
0x0034: "Item::getDirToItem(uint16)",
0x0035: "Intrinsic0035()",
0x0036: "Intrinsic0036()",
0x0037: "Intrinsic0037()",
0x0038: "Item::andStatus(void)",
0x0039: "Kernel::resetRef(uint16,ProcessType)",
0x003A: "Item::touch(void)",
0x003B: "Egg::getEggId(void)",
0x003C: "Intrinsic003C()",
0x003D: "Intrinsic003D()",
0x003E: "Item::callEvent11(sint16)",
0x003F: "Intrinsic003F()",
0x0040: "Intrinsic0040()",
0x0041: "Item::isOn(uint16)",
0x0042: "Item::getQHi(void)",
0x0043: "Item::isOn(uint16)",
0x0044: "Item::getQHi(void)",
0x0045: "Item::isOn(uint16)",
0x0046: "Item::getQHi(void)",
0x0047: "Item::isOn(uint16)",
0x0048: "Item::getQHi(void)",
0x0049: "Item::isOn(uint16)",
0x004A: "Item::getQHi(void)",
0x004B: "Item::isOn(uint16)",
0x004C: "Item::getQHi(void)",
0x004D: "Intrinsic004D()",
0x004E: "Npc::isDead(void)",
0x004F: "Intrinsic009C()",
0x0050: "Intrinsic0050()",
0x0051: "Intrinsic0051()",
0x0052: "Intrinsic0052()",
0x0053: "Intrinsic00BD()",
0x0054: "Intrinsic0054()",
0x0055: "Intrinsic0055()",
0x0056: "Intrinsic0056()",
0x0057: "Intrinsic0057()",
0x0058: "Item::use(void)",
0x0059: "Item::setQuantity(sint16)",
0x005A: "Intrinsic005A()",
0x005B: "Item::getSurfaceWeight(void)",
0x005C: "Intrinsic005C()",
0x005D: "Item::setFrame(uint16)",
0x005E: "Intrinsic00DA()",
0x005F: "Intrinsic005F()",
0x0060: "Intrinsic0060()",
0x0061: "Intrinsic0061()",
0x0062: "Intrinsic0062()",
0x0063: "Item::legal_create(uint16,uint16,WorldPoint&)",
0x0064: "Item::getPoint(WorldPoint&)",
0x0065: "Item::legal_move(WorldPoint&,uint16,uint16)",
0x0066: "Item::fall(void)",
0x0067: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0068: "Kernel::getNumProcesses(uint16,ProcessType)",
0x0069: "Item::getCY(void)",
0x006A: "Intrinsic006A()",
0x006B: "Intrinsic006B()",
0x006C: "Intrinsic006C()",
0x006D: "Intrinsic006D()",
0x006E: "Intrinsic006E()",
0x006F: "Item::isInNpc(void)",
0x0070: "Intrinsic0070()",
0x0071: "Intrinsic0071()",
0x0072: "Intrinsic0072()",
0x0073: "Intrinsic0073()",
0x0074: "Npc::isDead(void)",
0x0075: "Item::getNpcNum(void)",
0x0076: "IntrinsicReturn0",
0x0077: "Intrinsic0077()",
0x0078: "Item::callEvent0B(sint16)",
0x0079: "Item::andStatus(void)",
0x007A: "Item::move(uint16,uint16,uint8)",
0x007B: "Intrinsic007B()",
0x007C: "Intrinsic007C()",
0x007D: "Intrinsic007D()",
0x007E: "Intrinsic007E()",
0x007F: "Intrinsic007F()",
0x0080: "Intrinsic0080()",
0x0081: "Intrinsic0081()",
0x0082: "Intrinsic0082()",
0x0083: "Intrinsic0083()",
0x0084: "Intrinsic0084()",
0x0085: "Intrinsic0085()",
0x0086: "teleportToEgg(sint16,int,uint8)",
0x0087: "Intrinsic0087()",
0x0088: "Intrinsic0088()",
0x0089: "Intrinsic00BD()",
0x008A: "Item::getQuality(void)",
0x008B: "Item::setQuality(sint16)",
0x008C: "Intrinsic008C()",
0x008D: "Intrinsic008D()",
0x008E: "Intrinsic008E()",
0x008F: "Camera::getX(void)",
0x0090: "Camera::getY(void)",
0x0091: "Item::setMapNum(sint16)",
0x0092: "Item::getNpcNum(void)",
0x0093: "Item::shoot(WorldPoint&,sint16,sint16)",
0x0094: "Intrinsic0094()",
0x0095: "Item::enterFastArea(void)",
0x0096: "Intrinsic00CA()",
0x0097: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0098: "Item::getNpcNum(void)",
0x0099: "Intrinsic0099()",
0x009A: "teleportToEgg(sint16,uint8)",
0x009B: "Intrinsic009B()",
0x009C: "Intrinsic009C()",
0x009D: "Intrinsic009D()",
0x009E: "Intrinsic009E()",
0x009F: "Intrinsic009F()",
0x00A0: "Item::andStatus(void)",
0x00A1: "Item::getUnkEggType(void)",
0x00A2: "Egg::setEggXRange(uint16)",
0x00A3: "Item::setFrame(uint16)",
0x00A4: "Item::overlaps(uint16)",
0x00A5: "Item::isOn(uint16)",
0x00A6: "Item::getQHi(void)",
0x00A7: "Intrinsic00DA()",
0x00A8: "Item::getCY(void)",
0x00A9: "Intrinsic00A9()",
0x00AA: "Item::isOn(uint16)",
0x00AB: "Npc::isDead(void)",
0x00AC: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00AD: "Intrinsic00AD()",
0x00AE: "Item::getQHi(void)",
0x00AF: "Item::andStatus(void)",
0x00B0: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00B1: "Item::andStatus(void)",
0x00B2: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00B3: "Item::andStatus(void)",
0x00B4: "Item::getDirToCoords(uint16,uint16)",
0x00B5: "Intrinsic00B5()",
0x00B6: "Intrinsic00B6()",
0x00B7: "Item::getNpcNum(void)",
0x00B8: "Item::getCY(void)",
0x00B9: "Item::isOn(uint16)",
0x00BA: "Item::getFootpad(sint16&,sint16&,sint16&)",
0x00BB: "Npc::isDead(void)",
0x00BC: "Intrinsic00BC()",
0x00BD: "Intrinsic00BD()",
0x00BE: "Intrinsic00BE()",
0x00BF: "Item::andStatus(void)",
0x00C0: "Intrinsic00C0()",
0x00C1: "Intrinsic00C1()",
0x00C2: "IntrinsicReturn0",
0x00C3: "Intrinsic00C3()",
0x00C4: "Item::getQHi(void)",
0x00C5: "Item::setQuality(sint16)",
0x00C6: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00C7: "Intrinsic00C7()",
0x00C8: "Intrinsic00C8()",
0x00C9: "Item::callEvent0A(sint16)",
0x00CA: "Intrinsic00CA()",
0x00CB: "Item::isOn(uint16)",
0x00CC: "Intrinsic00CC()",
0x00CD: "Intrinsic00CD()",
0x00CE: "Item::getQHi(void)",
0x00CF: "Item::isOn(uint16)",
0x00D0: "Intrinsic00D0()",
0x00D1: "Intrinsic00D1()",
0x00D2: "Intrinsic00D2()",
0x00D3: "Intrinsic00FA()",
0x00D4: "Camera::getY(void)",
0x00D5: "Intrinsic00D5()",
0x00D6: "Intrinsic00D6()",
0x00D7: "Intrinsic00D7()",
0x00D8: "Intrinsic00D8()",
0x00D9: "Intrinsic00D9()",
0x00DA: "Intrinsic00DA()",
0x00DB: "Intrinsic00DB()",
0x00DC: "Item::getQLo(void)",
0x00DD: "Item::getQHi(void)",
0x00DE: "Item::getNpcNum(void)",
0x00DF: "Intrinsic00DF()",
0x00E0: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E1: "Intrinsic00FA()",
0x00E2: "Item::getQLo(void)",
0x00E3: "Item::getCY(void)",
0x00E4: "Item::getNpcNum(void)",
0x00E5: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E6: "Item::getNpcNum(void)",
0x00E7: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E8: "Item::getNpcNum(void)",
0x00E9: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EA: "Item::getNpcNum(void)",
0x00EB: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EC: "Item::getNpcNum(void)",
0x00ED: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EE: "Item::getNpcNum(void)",
0x00EF: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F0: "Item::getNpcNum(void)",
0x00F1: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F2: "Item::getNpcNum(void)",
0x00F3: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F4: "Item::getNpcNum(void)",
0x00F5: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F6: "Item::getNpcNum(void)",
0x00F7: "Item::andStatus(void)",
0x00F8: "Intrinsic00FA()",
0x00F9: "Item::getQLo(void)",
0x00FA: "Intrinsic00FA()",
0x00FB: "Intrinsic00FB()",
0x00FC: "Intrinsic00FC()",
0x00FD: "Item::getQLo(void)",
0x00FE: "Intrinsic00FE()",
0x00FF: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0100: "Item::andStatus(void)",
0x0101: "Item::isOn(uint16)",
0x0102: "Npc::isDead(void)",
0x0103: "Intrinsic00BD()",
0x0104: "Item::getQHi(void)",
0x0105: "Intrinsic00DA()",
0x0106: "Intrinsic00FA()",
0x0107: "Item::getQLo(void)",
0x0108: "Item::isOn(uint16)",
0x0109: "Item::getQHi(void)",
0x010A: "Item::isOn(uint16)",
0x010B: "Item::getQHi(void)",
0x010C: "Item::hurl(sint16,sint16,sint16,sint16)",
0x010D: "Item::getNpcNum(void)",
0x010E: "Item::getCY(void)",
0x010F: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0110: "Item::isOn(uint16)",
0x0111: "Intrinsic0111()",
0x0112: "IntrinsicReturn0",
0x0113: "Npc::isDead(void)",
0x0114: "Intrinsic0088()",
0x0115: "Intrinsic00C1()",
0x0116: "Item::getQHi(void)",
0x0117: "Intrinsic00BD()",
0x0118: "Item::andStatus(void)",
0x0119: "Item::getNpcNum(void)",
0x011A: "Item::andStatus(void)",
0x011B: "Item::getNpcNum(void)",
0x011C: "Intrinsic011C()",
0x011D: "Item::andStatus(void)",
0x011E: "Item::getNpcNum(void)",
0x011F: "Item::AvatarStoleSomehting(uint16)",
0x0120: "Item::andStatus(void)",
0x0121: "Item::getNpcNum(void)",
0x0122: "Item::getQ(void)",
0x0123: "Item::setQ(uint)",
0x0124: "Item::andStatus(void)",
0x0125: "Item::getNpcNum(void)",
0x0126: "Item::andStatus(void)",
0x0127: "Item::getNpcNum(void)",
0x0128: "Item::andStatus(void)",
0x0129: "Item::getNpcNum(void)",
0x012A: "Item::andStatus(void)",
0x012B: "Item::getNpcNum(void)",
0x012C: "Item::andStatus(void)",
0x012D: "Item::getNpcNum(void)",
0x012E: "Intrinsic00C3()",
0x012F: "Item::andStatus(void)",
0x0130: "Item::getNpcNum(void)",
0x0131: "Intrinsic0131()",
0x0132: "Item::andStatus(void)",
0x0133: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0134: "Item::andStatus(void)",
0x0135: "Camera::getY(void)",
0x0136: "Camera::getZ(void)",
0x0137: "Intrinsic0137()",
0x0138: "Intrinsic009C()",
0x0139: "Item::getTypeFlagCrusader(sint16)",
0x013A: "Item::getNpcNum(void)",
0x013B: "Item::hurl(sint16,sint16,sint16,sint16)",
0x013C: "Item::getCY(void)",
0x013D: "Item::getCZ(void)",
0x013E: "Item::setFrame(uint16)",
0x013F: "Intrinsic013F()",
0x0140: "Intrinsic0140()",
0x0141: "Intrinsic0141()",
0x0142: "Intrinsic0142()",
0x0143: "Npc::isDead(void)",
0x0144: "Intrinsic00FA()",
0x0145: "Intrinsic0145()",
0x0146: "Intrinsic0146()",
0x0147: "Intrinsic0147()",
0x0148: "Item::getNpcNum(void)",
0x0149: "Item::getQLo(void)",
0x014A: "Item::andStatus(void)",
0x014B: "Intrinsic014B()",
0x014C: "Intrinsic014C()",
0x014D: "Intrinsic014D()",
0x014E: "Intrinsic003C()",
0x014F: "Egg::getEggXRange(void)",
0x0150: "Intrinsic009C()",
0x0151: "Intrinsic0072()",
0x0152: "Item::setFrame(uint16)",
0x0153: "Intrinsic00C1()",
0x0154: "Intrinsic00C3()",
0x0155: "Intrinsic00C1()",
0x0156: "Item::isOn(uint16)",
0x0157: "Intrinsic00C3()",
0x0158: "Intrinsic00FA()",
0x0159: "Item::getQHi(void)",
0x015A: "Item::getQLo(void)",
0x015B: "Intrinsic00C1()",
0x015C: "Intrinsic00C3()",
0x015D: "Intrinsic015D()",
}
VARIANT_INTRINSIC_CALLSITE_HINTS: dict[str, dict[tuple[int, int], str]] = {
"regret": {
(0x001E, 0x10): "Item::I_fireWeapon(Item *, x, y, z, byte, int, byte)",
},
"remorse": {},
}
def normalize_game_variant(value: str | None) -> str | None:
if value is None:
return None
normalized = value.strip().lower()
if not normalized or normalized == "auto":
return None
if normalized not in INTRINSIC_HINT_PATHS:
raise ValueError(f"Unsupported Crusader variant: {value}")
return normalized
def infer_game_variant_from_path(path: Path | None) -> str | None:
if path is None:
return None
lowered_parts = [part.lower() for part in path.parts]
if any("regret" in part for part in lowered_parts):
return "regret"
if any("remorse" in part for part in lowered_parts):
return "remorse"
return None
def resolve_game_variant(game_variant: str | None = None, source_root: Path | None = None) -> str:
normalized = normalize_game_variant(game_variant)
if normalized is not None:
return normalized
inferred = infer_game_variant_from_path(source_root)
if inferred is not None:
return inferred
return DEFAULT_GAME_VARIANT
def load_intrinsic_hints_from_file(path: Path) -> dict[int, str]:
if not path.exists():
return {}
try:
module = ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
except (OSError, SyntaxError):
return {}
for node in module.body:
if not isinstance(node, ast.Assign):
continue
if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name):
continue
if node.targets[0].id != "intrinsics":
continue
try:
values = ast.literal_eval(node.value)
except (SyntaxError, ValueError):
return {}
if not isinstance(values, list):
return {}
return {
index: str(value)
for index, value in enumerate(values)
if isinstance(value, str) and value.strip()
}
return {}
def normalize_intrinsic_hint(name: str) -> str:
normalized = name.strip()
normalized = re.sub(r"^(?:unsigned|signed|void|byte|char|short|long|int\d+|uint\d+|sint\d+)\s+(?=[A-Za-z_])", "", normalized)
normalized = re.sub(r"(?<![A-Za-z])udioProcess::", "AudioProcess::", normalized)
normalized = normalized.replace("MusicProcess:I_", "MusicProcess::I_")
normalized = normalized.replace("Somehting", "Something")
normalized = normalized.replace("Actor::I_setDead())", "Actor::I_setDead()")
return normalized
def build_intrinsic_hints(game_variant: str | None = None, source_root: Path | None = None) -> dict[int, str]:
variant = resolve_game_variant(game_variant, source_root)
hints = {index: normalize_intrinsic_hint(name) for index, name in BASE_INTRINSIC_HINTS.items()}
for index, name in load_intrinsic_hints_from_file(INTRINSIC_HINT_PATHS[variant]).items():
normalized = normalize_intrinsic_hint(name)
existing = hints.get(index)
if existing is None or not normalized.startswith("Intrinsic") or existing.startswith("Intrinsic"):
hints[index] = normalized
return hints
_INTRINSIC_HINTS_CACHE: dict[str, dict[int, str]] = {}
def get_intrinsic_hints(game_variant: str | None = None, source_root: Path | None = None) -> dict[int, str]:
variant = resolve_game_variant(game_variant, source_root)
cached = _INTRINSIC_HINTS_CACHE.get(variant)
if cached is None:
cached = build_intrinsic_hints(variant)
_INTRINSIC_HINTS_CACHE[variant] = cached
return cached
def get_intrinsic_callsite_hints(game_variant: str | None = None, source_root: Path | None = None) -> dict[tuple[int, int], str]:
variant = resolve_game_variant(game_variant, source_root)
return VARIANT_INTRINSIC_CALLSITE_HINTS.get(variant, {})
INTRINSIC_HINTS = get_intrinsic_hints(DEFAULT_GAME_VARIANT)
NO_ARG_MNEMONICS = {
0x08: "pop_result",
0x12: "pop_temp",
0x13: "pop_temp_dword",
0x14: "add",
0x15: "add_dword",
0x16: "concat",
0x17: "append_list",
0x1C: "sub",
0x1D: "sub_dword",
0x1E: "mul",
0x1F: "mul_dword",
0x20: "div",
0x21: "div_dword",
0x22: "mod",
0x23: "mod_dword",
0x24: "cmp",
0x25: "cmp_dword",
0x26: "strcmp",
0x27: "cmp_huge",
0x28: "lt",
0x29: "lt_dword",
0x2A: "le",
0x2B: "le_dword",
0x2C: "gt",
0x2D: "gt_dword",
0x2E: "ge",
0x2F: "ge_dword",
0x30: "not",
0x31: "not_dword",
0x32: "and",
0x33: "and_dword",
0x34: "or",
0x35: "or_dword",
0x36: "ne",
0x37: "ne_dword",
0x39: "bit_and",
0x3A: "bit_or",
0x3B: "bit_not",
0x3C: "lsh",
0x3D: "rsh",
0x50: "ret",
0x53: "suspend",
0x59: "push_pid",
0x5D: "push_retval_byte",
0x5E: "push_retval_word",
0x5F: "push_retval_dword",
0x60: "word_to_dword",
0x61: "dword_to_word",
0x68: "copy_string",
0x6A: "ptr_to_string",
0x6B: "str_to_ptr",
0x6D: "push_process_result",
0x73: "loopnext",
0x77: "set_info",
0x78: "process_exclude",
0x7A: "end",
}
def parse_int(value: str) -> int:
return int(value, 0)
def try_parse_int(value: str) -> int | None:
try:
return parse_int(value)
except (TypeError, ValueError):
return None
def signed_byte(value: int) -> int:
return value - 0x100 if value & 0x80 else value
def bp_repr(value: int) -> str:
disp = signed_byte(value)
sign = "+" if disp >= 0 else "-"
return f"[BP{sign}{abs(disp):02X}h]"
def sp_repr(value: int) -> str:
disp = signed_byte(value)
sign = "+" if disp >= 0 else "-"
return f"[SP{sign}{abs(disp):02X}h]"
@dataclass
class ParseResult:
op: dict[str, Any] | None
next_offset: int
end_reason: str | None = None
unknown_tail: bytes | None = None
@dataclass
class DebugSymbolRecord:
index: int
unknown1: int
type_id: int
type_char: str
bp_offset: int
bp_repr: str
unknown3: int
name: str
@dataclass
class DebugSymbolParseResult:
debug_symbols: list[DebugSymbolRecord]
end_offset: int
has_end_opcode: bool
trailing_bytes: bytes
@dataclass
class FieldTagRecord:
tag_id: int
bp_offset: int
value_kind: int
name: str
@dataclass
class FieldTagParseResult:
field_tags: list[FieldTagRecord]
end_offset: int
trailing_bytes: bytes
class BodyReader:
def __init__(self, data: bytes, offset: int = 0) -> None:
self.data = data
self.offset = offset
def read_u8(self) -> int:
value = self.data[self.offset]
self.offset += 1
return value
def read_u16(self) -> int:
value = int.from_bytes(self.data[self.offset:self.offset + 2], "little")
self.offset += 2
return value
def read_u32(self) -> int:
value = int.from_bytes(self.data[self.offset:self.offset + 4], "little")
self.offset += 4
return value
def read_cstring(self) -> str:
chars: list[str] = []
while self.offset < len(self.data):
byte = self.read_u8()
if byte == 0:
break
chars.append(chr(byte))
return "".join(chars)
def read_fixed_string(self, length: int) -> str:
raw = self.data[self.offset:self.offset + length]
self.offset += length
return raw.decode("latin-1", errors="replace").rstrip("\x00")
def op_record(start: int, absolute_start: int, opcode: int, raw_bytes: bytes, mnemonic: str, operands: dict[str, Any]) -> dict[str, Any]:
return {
"offset": start,
"absolute_body_offset": absolute_start,
"opcode": opcode,
"mnemonic": mnemonic,
"raw_bytes": raw_bytes.hex(),
"operands": operands,
}
def parse_one_op(
body: bytes,
start: int,
intrinsic_hints: dict[int, str] | None = None,
intrinsic_callsite_hints: dict[tuple[int, int], str] | None = None,
) -> ParseResult:
reader = BodyReader(body, start)
opcode = reader.read_u8()
operands: dict[str, Any] = {}
mnemonic = NO_ARG_MNEMONICS.get(opcode)
active_intrinsic_hints = intrinsic_hints or INTRINSIC_HINTS
active_callsite_hints = intrinsic_callsite_hints or get_intrinsic_callsite_hints(DEFAULT_GAME_VARIANT)
if opcode == 0x00:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_byte"
elif opcode == 0x01:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_word"
elif opcode == 0x02:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_dword"
elif opcode == 0x03:
bp_offset = reader.read_u8()
size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "size": size}
mnemonic = "pop_local_blob"
elif opcode == 0x04:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_member_byte"
elif opcode == 0x05:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_member_word"
elif opcode == 0x06:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_member_dword"
elif opcode == 0x07:
bp_offset = reader.read_u8()
size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "size": size}
mnemonic = "pop_member_blob"
elif opcode == 0x09:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {
"bp_offset": bp_offset,
"target": bp_repr(bp_offset),
"element_size": element_size,
"slist_flag": slist_flag,
}
mnemonic = "pop_list_element"
elif opcode == 0x0A:
value = reader.read_u8()
operands = {"value_u8": value, "value_signed": signed_byte(value)}
mnemonic = "push_byte_immediate"
elif opcode == 0x0B:
operands = {"value_u16": reader.read_u16()}
mnemonic = "push_word_immediate"
elif opcode == 0x0C:
operands = {"value_u32": reader.read_u32()}
mnemonic = "push_dword_immediate"
elif opcode == 0x0D:
declared_length = reader.read_u16()
text = reader.read_cstring()
operands = {"declared_length": declared_length, "string": text}
mnemonic = "push_string_immediate"
elif opcode == 0x0E:
element_size = reader.read_u8()
count = reader.read_u8()
operands = {"element_size": element_size, "count": count}
mnemonic = "create_list"
elif opcode == 0x0F:
arg_bytes = reader.read_u8()
intrinsic_ordinal = reader.read_u16()
operands = {
"intrinsic_ordinal": intrinsic_ordinal,
"arg_bytes": arg_bytes,
"intrinsic_name_hint": active_callsite_hints.get((intrinsic_ordinal, arg_bytes), active_intrinsic_hints.get(intrinsic_ordinal)),
}
mnemonic = "call_intrinsic"
elif opcode == 0x10:
operands = {"target_offset": reader.read_u16()}
mnemonic = "call_near"
elif opcode == 0x11:
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
operands = {
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
}
mnemonic = "call_class_event"
elif opcode in {0x18, 0x19, 0x1A, 0x1B}:
element_size = reader.read_u8()
operands = {"element_size": element_size}
mnemonic = {
0x18: "append_unique_inline",
0x19: "append_unique_indirect",
0x1A: "remove_matching_indirect",
0x1B: "remove_matching_inline",
}[opcode]
elif opcode == 0x38:
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {"element_size": element_size, "slist_flag": slist_flag}
mnemonic = "in_list"
elif opcode in {0x3E, 0x3F, 0x40, 0x41, 0x43, 0x46, 0x47, 0x48, 0x49, 0x4B, 0x62, 0x63, 0x64, 0x69}:
bp_offset = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset)}
mnemonic = {
0x3E: "push_local_byte",
0x3F: "push_local_word",
0x40: "push_local_dword",
0x41: "push_local_string",
0x43: "push_local_slist",
0x46: "push_member_byte",
0x47: "push_member_word",
0x48: "push_member_dword",
0x49: "push_member_huge",
0x4B: "push_local_addr",
0x62: "free_local_string",
0x63: "free_local_slist",
0x64: "free_local_list",
0x69: "push_string_ptr",
}[opcode]
elif opcode == 0x42:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "element_size": element_size}
mnemonic = "push_local_list"
elif opcode == 0x44:
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {"element_size": element_size, "slist_flag": slist_flag}
mnemonic = "push_list_element"
elif opcode == 0x45:
byte0 = reader.read_u8()
byte1 = reader.read_u8()
operands = {"value_a": byte0, "value_b": byte1}
mnemonic = "push_huge"
elif opcode in {0x4C, 0x4D}:
size = reader.read_u8()
operands = {"size": size}
mnemonic = {0x4C: "push_indirect", 0x4D: "pop_indirect"}[opcode]
elif opcode in {0x4E, 0x4F}:
global_id = reader.read_u16()
size = reader.read_u8()
operands = {"global_id": global_id, "size": size}
mnemonic = {0x4E: "push_global", 0x4F: "pop_global"}[opcode]
elif opcode in {0x51, 0x52}:
relative = reader.read_u16()
signed_relative = relative - 0x10000 if relative & 0x8000 else relative
target = reader.offset + signed_relative
operands = {"relative_u16": relative, "relative_signed": signed_relative, "target_offset": target}
mnemonic = {0x51: "jne", 0x52: "jmp"}[opcode]
elif opcode == 0x54:
arg0 = reader.read_u8()
arg1 = reader.read_u8()
operands = {"arg0": arg0, "arg1": arg1}
mnemonic = "implies"
elif opcode == 0x57:
arg_bytes = reader.read_u8()
this_size = reader.read_u8()
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
operands = {
"arg_bytes": arg_bytes,
"this_size": this_size,
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
}
mnemonic = "spawn"
elif opcode == 0x58:
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
inline_offset = reader.read_u16()
this_size = reader.read_u8()
unknown = reader.read_u8()
operands = {
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
"inline_offset": inline_offset,
"this_size": this_size,
"unknown": unknown,
}
mnemonic = "spawn_inline"
elif opcode == 0x5A:
operands = {"local_bytes": reader.read_u8()}
mnemonic = "init"
elif opcode == 0x5B:
operands = {"line_number": reader.read_u16()}
mnemonic = "line_number"
elif opcode == 0x5C:
relative = reader.read_u16()
symbol_offset = reader.offset + (relative - 0x10000 if relative & 0x8000 else relative)
symbol = reader.read_fixed_string(8)
trailing_zero = reader.read_u8()
operands = {
"symbol_offset": symbol_offset,
"symbol": symbol,
"trailing_zero": trailing_zero,
}
mnemonic = "symbol_info"
elif opcode in {0x65, 0x66, 0x67, 0x6E, 0x6F, 0x74}:
value = reader.read_u8()
operands = {"value_u8": value}
if opcode in {0x65, 0x66, 0x67}:
operands["target"] = sp_repr(value)
mnemonic = {
0x65: "free_stack_string",
0x66: "free_stack_list",
0x67: "free_stack_slist",
0x6E: "add_sp",
0x6F: "push_stack_addr",
0x74: "loopscr",
}[opcode]
elif opcode == 0x6C:
bp_offset = reader.read_u8()
copy_type = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "copy_type": copy_type}
mnemonic = "param_pid_chg"
elif opcode == 0x70:
current_var = reader.read_u8()
string_bytes = reader.read_u8()
loop_type = reader.read_u8()
operands = {"current_var": current_var, "string_bytes": string_bytes, "loop_type": loop_type}
mnemonic = "loop"
elif opcode in {0x75, 0x76}:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
branch = reader.read_u16()
signed_branch = branch - 0x10000 if branch & 0x8000 else branch
target = reader.offset + signed_branch
operands = {
"bp_offset": bp_offset,
"target_var": bp_repr(bp_offset),
"element_size": element_size,
"relative_u16": branch,
"relative_signed": signed_branch,
"target_offset": target,
}
mnemonic = {0x75: "foreach_list", 0x76: "foreach_slist"}[opcode]
elif opcode == 0x79:
operands = {"global_id": reader.read_u16()}
mnemonic = "global_address"
elif mnemonic is None:
return ParseResult(op=None, next_offset=start, end_reason="unknown_opcode", unknown_tail=body[start:])
raw = body[start:reader.offset]
op = op_record(start, start, opcode, raw, mnemonic, operands)
end_reason = "end_opcode" if opcode == 0x7A else None
return ParseResult(op=op, next_offset=reader.offset, end_reason=end_reason)
def load_tsv_rows(path: Path) -> list[dict[str, str]]:
with path.open("r", encoding="utf-8", newline="") as handle:
return list(csv.DictReader(handle, delimiter="\t"))
def find_chunk_file(entry_index: int, extracted_root: Path | str | None = None) -> Path:
_, _, _, chunks_dir = extracted_root_paths(extracted_root)
matches = sorted(chunks_dir.glob(f"chunk_{entry_index:03d}_*.bin"))
if not matches:
matches = sorted(chunks_dir.glob(f"chunk_{entry_index}_*.bin"))
if not matches:
raise FileNotFoundError(f"No chunk file found for entry_index={entry_index}")
return matches[0]
def select_rows(class_name: str, slot: int, extracted_root: Path | str | None = None) -> tuple[dict[str, str], dict[str, str]]:
class_event_index, class_layout_index, _, _ = extracted_root_paths(extracted_root)
event_rows = load_tsv_rows(class_event_index)
layout_rows = load_tsv_rows(class_layout_index)
event_row = next(
(
row for row in event_rows
if row["class_name_hint"].upper() == class_name.upper()
and try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
),
None,
)
if event_row is None:
raise KeyError(f"No class_event_index row found for class={class_name} slot=0x{slot:02X}")
if not event_row["derived_body_start"] or not event_row["derived_body_end"]:
raise ValueError(f"Selected row has no derived body range for class={class_name} slot=0x{slot:02X}")
entry_index = parse_int(event_row["entry_index"])
layout_row = next(
(row for row in layout_rows if try_parse_int(row.get("entry_index", "")) == entry_index),
None,
)
if layout_row is None:
raise KeyError(f"No class_layout_index row found for entry_index={entry_index}")
return event_row, layout_row
def load_runtime_ir_rows(extracted_root: Path | str | None = None) -> list[dict[str, str]]:
_, _, runtime_vm_ir_index, _ = extracted_root_paths(extracted_root)
return load_tsv_rows(runtime_vm_ir_index)
def runtime_stage_hints(ops: list[dict[str, Any]], extracted_root: Path | str | None = None) -> list[dict[str, str]]:
opcode_values = {op["opcode"] for op in ops}
hints: list[dict[str, str]] = []
for row in load_runtime_ir_rows(extracted_root):
opcode_or_lane = row.get("opcode_or_lane", "")
if opcode_or_lane.lower().startswith("opcode 0x"):
opcode_value = try_parse_int(opcode_or_lane.split()[1])
if opcode_value is None or opcode_value not in opcode_values:
continue
elif row.get("stage_address") not in {"000d:177c", "000d:1acb", "000d:2104", "000d:21ed", "000d:22bc"}:
continue
hints.append(row)
return hints
def annotation_hints(event_row: dict[str, str], payload_shape_hint: str, ops: list[dict[str, Any]], extracted_root: Path | str | None = None) -> dict[str, Any]:
slot = parse_int(event_row["slot"])
return {
"runtime_family": "slot-backed-owner-loaded-body",
"payload_shape_hint": payload_shape_hint,
"compiled_anchors": [
{"address": "000d:46ec", "role": "context_create_from_slot"},
{"address": "000d:0988", "role": "referent_chain_mutator"},
{"address": "000d:177c", "role": "push_frame_word_literal"},
{"address": "000d:1acb", "role": "compare_stream_dword_and_push_bool"},
{"address": "000d:208b", "role": "materialize_or_forward_value"},
{"address": "000d:21ed", "role": "prepend_inline_payload"},
{"address": "000d:22bc", "role": "matrix_pushback_stage"},
{"address": "000d:2104", "role": "finalize_to_outptr"},
{"address": "000d:ebe3", "role": "opcode_sequence_run"},
],
"runtime_stage_hints": runtime_stage_hints(ops, extracted_root),
"slot_taxonomy": {"slot": slot, "event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot)},
}
def infer_payload_shape(slot: int) -> str | None:
if slot in {0x10, 0x12}:
return "none"
if slot in {0x0A, 0x0B, 0x11, 0x14}:
return "word"
if slot == 0x13:
return "signed_word"
return None
def parse_debug_symbols(body: bytes, start: int) -> DebugSymbolParseResult | None:
if start >= len(body):
return None
if body[start] == 0x7A:
return DebugSymbolParseResult(debug_symbols=[], end_offset=start + 1, has_end_opcode=True, trailing_bytes=body[start + 1:])
reader = BodyReader(body, start)
count = reader.read_u8()
debug_symbols: list[DebugSymbolRecord] = []
try:
for index in range(count):
unknown1 = reader.read_u8()
type_id = reader.read_u8()
bp_offset = reader.read_u8()
unknown3 = reader.read_u8()
name = reader.read_cstring()
type_char = chr(type_id) if 0x20 <= type_id <= 0x7E else "."
debug_symbols.append(
DebugSymbolRecord(
index=index,
unknown1=unknown1,
type_id=type_id,
type_char=type_char,
bp_offset=bp_offset,
bp_repr=bp_repr(bp_offset),
unknown3=unknown3,
name=name,
)
)
except IndexError:
return None
has_end_opcode = reader.offset < len(body) and body[reader.offset] == 0x7A
end_offset = reader.offset + (1 if has_end_opcode else 0)
trailing_bytes = body[end_offset:]
if not has_end_opcode:
return None
return DebugSymbolParseResult(
debug_symbols=debug_symbols,
end_offset=end_offset,
has_end_opcode=has_end_opcode,
trailing_bytes=trailing_bytes,
)
def parse_field_tags(body: bytes, start: int) -> FieldTagParseResult | None:
if start >= len(body):
return None
reader = BodyReader(body, start)
field_tags: list[FieldTagRecord] = []
try:
while reader.offset < len(body) and body[reader.offset] != 0x7A:
tag_id = reader.read_u8()
bp_offset = reader.read_u8()
value_kind = reader.read_u8()
name = reader.read_cstring()
if not name:
return None
field_tags.append(FieldTagRecord(tag_id=tag_id, bp_offset=bp_offset, value_kind=value_kind, name=name))
if reader.offset < len(body) and body[reader.offset] == 0x00:
reader.offset += 1
except IndexError:
return None
if not field_tags or reader.offset >= len(body) or body[reader.offset] != 0x7A:
return None
end_offset = reader.offset + 1
return FieldTagParseResult(field_tags=field_tags, end_offset=end_offset, trailing_bytes=body[end_offset:])
def parse_body_ir(
event_row: dict[str, str],
layout_row: dict[str, str],
game_variant: str | None = None,
extracted_root: Path | str | None = None,
) -> dict[str, Any]:
resolved_extracted_root = resolve_extracted_root(extracted_root)
entry_index = parse_int(event_row["entry_index"])
chunk_file = find_chunk_file(entry_index, resolved_extracted_root)
chunk_bytes = chunk_file.read_bytes()
resolved_game_variant = resolve_game_variant(game_variant, chunk_file)
intrinsic_hints = get_intrinsic_hints(resolved_game_variant, chunk_file)
intrinsic_callsite_hints = get_intrinsic_callsite_hints(resolved_game_variant, chunk_file)
body_start = parse_int(event_row["derived_body_start"])
body_end = parse_int(event_row["derived_body_end"])
body = chunk_bytes[body_start:body_end]
ops: list[dict[str, Any]] = []
offset = 0
end_reason = "body_exhausted"
unknown_tail = b""
debug_symbols: list[dict[str, Any]] = []
debug_symbol_offset: int | None = None
field_tags: list[dict[str, Any]] = []
while offset < len(body):
result = parse_one_op(body, offset, intrinsic_hints, intrinsic_callsite_hints)
if result.op is not None:
result.op["absolute_body_offset"] = body_start + result.op["offset"]
ops.append(result.op)
if result.end_reason is not None:
end_reason = result.end_reason
unknown_tail = result.unknown_tail or b""
if result.end_reason == "end_opcode":
unknown_tail = body[result.next_offset:]
offset = result.next_offset
break
offset = result.next_offset
if offset >= len(body) and end_reason == "body_exhausted":
unknown_tail = b""
candidate_debug_offsets = sorted(
{
operands["symbol_offset"]
for op in ops
for operands in [op["operands"]]
if op["mnemonic"] == "symbol_info"
and isinstance(operands.get("symbol_offset"), int)
and 0 <= operands["symbol_offset"] < len(body)
}
)
last_ret_index = next((index for index in range(len(ops) - 1, -1, -1) if ops[index]["mnemonic"] == "ret"), None)
if end_reason == "unknown_opcode" and last_ret_index is not None:
ret_end = ops[last_ret_index]["offset"] + (len(ops[last_ret_index]["raw_bytes"]) // 2)
ret_debug_result = parse_debug_symbols(body, ret_end)
if ret_debug_result is not None:
ops = ops[:last_ret_index + 1]
debug_symbol_offset = ret_end
debug_symbols = [
{
"index": symbol.index,
"unknown1": symbol.unknown1,
"type_id": symbol.type_id,
"type_char": symbol.type_char,
"bp_offset": symbol.bp_offset,
"bp_repr": symbol.bp_repr,
"unknown3": symbol.unknown3,
"name": symbol.name,
}
for symbol in ret_debug_result.debug_symbols
]
end_reason = "debug_symbols_then_end"
unknown_tail = ret_debug_result.trailing_bytes
offset = ret_debug_result.end_offset
if end_reason == "unknown_opcode" and candidate_debug_offsets:
for candidate_offset in reversed(candidate_debug_offsets):
if candidate_offset != offset:
continue
debug_result = parse_debug_symbols(body, candidate_offset)
if debug_result is None:
continue
debug_symbol_offset = candidate_offset
debug_symbols = [
{
"index": symbol.index,
"unknown1": symbol.unknown1,
"type_id": symbol.type_id,
"type_char": symbol.type_char,
"bp_offset": symbol.bp_offset,
"bp_repr": symbol.bp_repr,
"unknown3": symbol.unknown3,
"name": symbol.name,
}
for symbol in debug_result.debug_symbols
]
end_reason = "debug_symbols_then_end"
unknown_tail = debug_result.trailing_bytes
offset = debug_result.end_offset
break
if end_reason == "unknown_opcode" and last_ret_index is not None:
ret_end = ops[last_ret_index]["offset"] + (len(ops[last_ret_index]["raw_bytes"]) // 2)
field_tag_result = parse_field_tags(body, ret_end)
if field_tag_result is not None:
ops = ops[:last_ret_index + 1]
field_tags = [
{
"tag_id": tag.tag_id,
"bp_offset": tag.bp_offset,
"bp_repr": bp_repr(tag.bp_offset),
"value_kind": tag.value_kind,
"name": tag.name,
"tag_label": f"{tag.tag_id:02X}:{tag.bp_offset:02X}{tag.value_kind:02X}->{tag.name}",
}
for tag in field_tag_result.field_tags
]
end_reason = "field_tags_then_end"
unknown_tail = field_tag_result.trailing_bytes
offset = field_tag_result.end_offset
slot = parse_int(event_row["slot"])
payload_shape = infer_payload_shape(slot)
return {
"schema_version": "crusader-usecode-ir-v1-poc",
"source": {
"game_variant": resolved_game_variant,
"flex_path": infer_flex_path(resolved_extracted_root),
"extracted_root": repo_relative_path(resolved_extracted_root),
"chunk_file": repo_relative_path(chunk_file),
},
"class": {
"entry_index": entry_index,
"object_index": parse_int(layout_row["object_index"]),
"class_id": parse_int(layout_row["class_id"]),
"class_name": layout_row["class_name_hint"],
"raw_code_base_u32": parse_int(layout_row["raw_code_base_u32"]),
"code_base_minus_one": parse_int(layout_row["code_base_minus_one"]),
"conservative_event_count": parse_int(layout_row["conservative_event_count"]),
},
"event": {
"slot": slot,
"event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot),
"raw_event_entry_word": parse_int(event_row["raw_event_entry_word"]),
"raw_code_offset": parse_int(event_row["raw_code_offset"]),
"derived_body_start": body_start,
"derived_body_end": body_end,
"derived_body_length": parse_int(event_row["derived_body_length"]),
"repeated_template_status": event_row["repeated_template_status"],
},
"body": {
"end_reason": end_reason,
"raw_body_sha1": hashlib.sha1(body).hexdigest(),
"unknown_trailing_bytes": unknown_tail.hex(),
"decoded_op_count": len(ops),
"debug_symbol_offset": debug_symbol_offset,
"debug_symbol_count": len(debug_symbols),
"field_tag_count": len(field_tags),
},
"ops": ops,
"debug_symbols": debug_symbols,
"field_tags": field_tags,
"annotation_hints": annotation_hints(event_row, payload_shape, ops, resolved_extracted_root),
}
# ---------------------------------------------------------------------------
# Family diff helpers
# ---------------------------------------------------------------------------
def _common_prefix_len(a: bytes, b: bytes) -> int:
limit = min(len(a), len(b))
for i in range(limit):
if a[i] != b[i]:
return i
return limit
def _common_suffix_len(a: bytes, b: bytes, prefix_len: int) -> int:
la, lb = len(a), len(b)
limit = min(la - prefix_len, lb - prefix_len)
for i in range(1, limit + 1):
if a[la - i] != b[lb - i]:
return i - 1
return limit
def compute_family_diff(class_name: str, slot: int, extracted_root: Path | str | None = None) -> dict[str, Any]:
"""
Find all event rows that share the same repeated_template_status family tag
as the named class/slot row, then decode each body and compute pairwise diff
statistics against the reference body.
Returns a dict with:
reference_entry entry_index for the named class/slot
family_tag repeated_template_status value used for grouping
sibling_count number of additional rows in the same family
members list of per-member records (entry, class, body stats, diff vs ref)
"""
class_event_index, class_layout_index, _, _ = extracted_root_paths(extracted_root)
event_rows = load_tsv_rows(class_event_index)
layout_rows = load_tsv_rows(class_layout_index)
layout_by_entry: dict[int, dict[str, str]] = {}
for row in layout_rows:
idx = try_parse_int(row.get("entry_index", ""))
if idx is not None:
layout_by_entry[idx] = row
# Locate the reference row
ref_row = next(
(
row for row in event_rows
if row["class_name_hint"].upper() == class_name.upper()
and try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
),
None,
)
if ref_row is None:
raise KeyError(f"No class_event_index row for class={class_name} slot=0x{slot:02X}")
family_tag = ref_row.get("repeated_template_status", "").strip()
ref_entry = parse_int(ref_row["entry_index"])
# Collect family members same family_tag if non-empty, else same slot
if family_tag and family_tag not in {"", "unique"}:
family_rows = [
row for row in event_rows
if row.get("repeated_template_status", "").strip() == family_tag
and try_parse_int(row.get("entry_index", "")) is not None
]
else:
# Fall back: same slot across all classes
family_rows = [
row for row in event_rows
if try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
]
# Load reference body bytes
def _load_body(row: dict[str, str]) -> bytes | None:
body_start_str = row.get("derived_body_start", "")
body_end_str = row.get("derived_body_end", "")
if not body_start_str or not body_end_str:
return None
try:
chunk = find_chunk_file(parse_int(row["entry_index"]), extracted_root)
data = chunk.read_bytes()
return data[parse_int(body_start_str):parse_int(body_end_str)]
except (FileNotFoundError, ValueError):
return None
ref_body = _load_body(ref_row)
if ref_body is None:
raise ValueError(f"Cannot load reference body for class={class_name} slot=0x{slot:02X}")
members: list[dict[str, Any]] = []
for row in family_rows:
entry_idx = parse_int(row["entry_index"])
body = _load_body(row)
is_ref = entry_idx == ref_entry
member: dict[str, Any] = {
"entry_index": entry_idx,
"class_name": row["class_name_hint"],
"slot": try_parse_int(row.get("slot", "")),
"body_length": len(body) if body is not None else None,
"is_reference": is_ref,
}
if body is not None and not is_ref:
prefix = _common_prefix_len(ref_body, body)
suffix = _common_suffix_len(ref_body, body, prefix)
ref_diff_window = ref_body[prefix:len(ref_body) - suffix] if suffix else ref_body[prefix:]
member_diff_window = body[prefix:len(body) - suffix] if suffix else body[prefix:]
member["diff_vs_reference"] = {
"common_prefix_bytes": prefix,
"common_suffix_bytes": suffix,
"ref_diff_window_hex": ref_diff_window.hex(),
"member_diff_window_hex": member_diff_window.hex(),
"diff_window_length_ref": len(ref_diff_window),
"diff_window_length_member": len(member_diff_window),
"identical": ref_body == body,
}
elif is_ref:
member["diff_vs_reference"] = {"identical": True, "note": "reference"}
members.append(member)
members.sort(key=lambda m: (0 if m["is_reference"] else 1, m["entry_index"]))
return {
"reference_entry": ref_entry,
"reference_class": class_name,
"slot": slot,
"family_tag": family_tag or f"slot_0x{slot:02X}_all",
"member_count": len(members),
"sibling_count": len(members) - 1,
"members": members,
}
def render_family_diff_text(diff: dict[str, Any]) -> str:
lines = [
f"Family diff: {diff['family_tag']}",
f"Reference entry={diff['reference_entry']} class={diff['reference_class']} slot=0x{diff['slot']:02X}",
f"Members: {diff['member_count']} Siblings: {diff['sibling_count']}",
"",
]
for m in diff["members"]:
tag = " [REF]" if m["is_reference"] else ""
body_len = m["body_length"] if m["body_length"] is not None else "?"
lines.append(f" entry={m['entry_index']} class={m['class_name']} slot=0x{m['slot']:02X} body_len={body_len}{tag}")
d = m.get("diff_vs_reference")
if d and not m["is_reference"]:
if d["identical"]:
lines.append(" identical to reference")
else:
lines.append(f" prefix={d['common_prefix_bytes']} suffix={d['common_suffix_bytes']}")
lines.append(f" ref_diff_window ({d['diff_window_length_ref']}B): {d['ref_diff_window_hex']}")
lines.append(f" mem_diff_window ({d['diff_window_length_member']}B): {d['member_diff_window_hex']}")
return "\n".join(lines) + "\n"
def build_listing_labels(ir: dict[str, Any]) -> dict[int, str]:
return {
op["operands"]["target_offset"] + ir["event"]["derived_body_start"]: f"L_{op['operands']['target_offset'] + ir['event']['derived_body_start']:04X}"
for op in ir["ops"]
if "target_offset" in op["operands"] and isinstance(op["operands"]["target_offset"], int)
}
def build_script_blocks(ir: dict[str, Any]) -> tuple[dict[int, str], list[tuple[str, list[dict[str, Any]]]]]:
ops = ir["ops"]
if not ops:
return {}, []
branch_mnemonics = {"jne", "jmp", "foreach_list", "foreach_slist"}
leaders = {ops[0]["absolute_body_offset"]}
for index, op in enumerate(ops):
target_offset = op["operands"].get("target_offset")
if isinstance(target_offset, int):
leaders.add(ir["event"]["derived_body_start"] + target_offset)
if op["mnemonic"] in branch_mnemonics and index + 1 < len(ops):
leaders.add(ops[index + 1]["absolute_body_offset"])
ordered_leaders = sorted(leaders)
label_map = {ordered_leaders[0]: "entry"}
for absolute_offset in ordered_leaders[1:]:
label_map[absolute_offset] = f"block_{absolute_offset:04X}"
blocks: list[tuple[str, list[dict[str, Any]]]] = []
current_label = label_map[ops[0]["absolute_body_offset"]]
current_ops: list[dict[str, Any]] = []
for op in ops:
absolute_offset = op["absolute_body_offset"]
if absolute_offset in label_map and current_ops and label_map[absolute_offset] != current_label:
blocks.append((current_label, current_ops))
current_label = label_map[absolute_offset]
current_ops = []
current_ops.append(op)
if current_ops:
blocks.append((current_label, current_ops))
return label_map, blocks
def format_script_string(value: str) -> str:
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
return f'"{escaped}"'
def format_generic_operands(operands: dict[str, Any], label_map: dict[int, str], body_start: int) -> str:
parts: list[str] = []
for key, value in operands.items():
if value is None or value == "":
continue
if key == "target_offset" and isinstance(value, int):
parts.append(f"target={label_map.get(body_start + value, f'0x{body_start + value:04X}')}")
continue
if key == "target_event_name_hint":
parts.append(f"event={value}")
continue
if key == "intrinsic_name_hint":
parts.append(f"hint={value}")
continue
if isinstance(value, int):
if key.endswith("_signed"):
parts.append(f"{key}={value}")
else:
parts.append(f"{key}=0x{value:X}")
continue
parts.append(f"{key}={value}")
return " ".join(parts)
def format_script_statement(op: dict[str, Any], label_map: dict[int, str], body_start: int) -> str:
mnemonic = op["mnemonic"]
operands = op["operands"]
if mnemonic == "init":
return f"init locals 0x{operands['local_bytes']:02X}"
if mnemonic == "push_byte_immediate":
return f"push 0x{operands['value_u8']:02X} ; signed {operands['value_signed']}"
if mnemonic == "push_word_immediate":
return f"push 0x{operands['value_u16']:04X}"
if mnemonic == "push_dword_immediate":
return f"push 0x{operands['value_u32']:08X}"
if mnemonic == "push_string_immediate":
return f"push {format_script_string(operands['string'])}"
if mnemonic.startswith("push_local_") or mnemonic.startswith("push_member_"):
return f"push {operands['target']}"
if mnemonic == "push_local_list":
return f"push {operands['target']}<list:{operands['element_size']}>"
if mnemonic == "push_list_element":
return f"push list_element size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}"
if mnemonic == "push_huge":
return f"push huge 0x{operands['value_a']:02X}:0x{operands['value_b']:02X}"
if mnemonic == "push_global":
return f"push global[0x{operands['global_id']:04X}] size=0x{operands['size']:X}"
if mnemonic == "push_local_addr":
return f"push &{operands['target']}"
if mnemonic == "push_string_ptr":
return f"push string_ptr {operands['target']}"
if mnemonic.startswith("pop_local_") or mnemonic.startswith("pop_member_"):
return f"pop -> {operands['target']}"
if mnemonic == "pop_local_blob" or mnemonic == "pop_member_blob":
return f"pop_blob -> {operands['target']} size=0x{operands['size']:X}"
if mnemonic == "pop_list_element":
return f"pop_list_element -> {operands['target']} elem_size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}"
if mnemonic == "pop_global":
return f"pop -> global[0x{operands['global_id']:04X}] size=0x{operands['size']:X}"
if mnemonic == "call_intrinsic":
hint = operands.get("intrinsic_name_hint") or "unknown_intrinsic"
return f"call intrinsic 0x{operands['intrinsic_ordinal']:04X} {hint} args=0x{operands['arg_bytes']:02X}"
if mnemonic == "call_near":
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"call {target}"
if mnemonic == "call_class_event":
event_hint = operands.get("target_event_name_hint")
suffix = f" {event_hint}" if event_hint else ""
return f"call class 0x{operands['target_class_id']:04X}.slot 0x{operands['target_event_slot']:02X}{suffix}"
if mnemonic in {"append_unique_inline", "append_unique_indirect", "remove_matching_indirect", "remove_matching_inline"}:
return f"{mnemonic} size=0x{operands['element_size']:X}"
if mnemonic == "create_list":
return f"create_list elem_size=0x{operands['element_size']:X} count=0x{operands['count']:X}"
if mnemonic == "in_list":
return f"in_list elem_size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}"
if mnemonic == "jne":
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"if pop() != 0 goto {target}"
if mnemonic == "jmp":
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"goto {target}"
if mnemonic in {"foreach_list", "foreach_slist"}:
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"{mnemonic} {operands['target_var']} elem_size=0x{operands['element_size']:X} -> {target}"
if mnemonic == "spawn":
event_hint = operands.get("target_event_name_hint")
suffix = f" {event_hint}" if event_hint else ""
return (
f"spawn class 0x{operands['target_class_id']:04X}.slot 0x{operands['target_event_slot']:02X}{suffix} "
f"args=0x{operands['arg_bytes']:02X} this_size=0x{operands['this_size']:02X}"
)
if mnemonic == "spawn_inline":
event_hint = operands.get("target_event_name_hint")
suffix = f" {event_hint}" if event_hint else ""
return (
f"spawn_inline class 0x{operands['target_class_id']:04X}.slot 0x{operands['target_event_slot']:02X}{suffix} "
f"inline=0x{operands['inline_offset']:04X} this_size=0x{operands['this_size']:02X} unk=0x{operands['unknown']:02X}"
)
if mnemonic == "line_number":
return f"line {operands['line_number']}"
if mnemonic == "symbol_info":
return f"symbol_info {operands['symbol']} -> 0x{operands['symbol_offset']:04X}"
if mnemonic == "global_address":
return f"push &global[0x{operands['global_id']:04X}]"
if mnemonic == "ret":
return "return"
if mnemonic in NO_ARG_MNEMONICS.values():
return mnemonic
rendered_operands = format_generic_operands(operands, label_map, body_start)
return f"{mnemonic} {rendered_operands}".rstrip()
def render_script(ir: dict[str, Any]) -> str:
label_map, blocks = build_script_blocks(ir)
slot_name = ir["event"]["event_name_hint"] or f"slot_{ir['event']['slot']:02X}"
lines = [
(
f"script {ir['class']['class_name']}.{slot_name} "
f"(entry={ir['class']['entry_index']}, class_id=0x{ir['class']['class_id']:04X}, slot=0x{ir['event']['slot']:02X})"
),
"{",
f" body_range 0x{ir['event']['derived_body_start']:04X}..0x{ir['event']['derived_body_end']:04X}",
f" raw_event_word 0x{ir['event']['raw_event_entry_word']:04X}",
f" raw_code_offset 0x{ir['event']['raw_code_offset']:08X}",
f" end_reason {ir['body']['end_reason']}",
]
if ir["debug_symbols"]:
lines.append(" locals")
lines.append(" {")
for symbol in ir["debug_symbols"]:
lines.append(
f" {symbol['bp_repr']} {symbol['name']} ; type=0x{symbol['type_id']:02X} ('{symbol['type_char']}') unk1=0x{symbol['unknown1']:02X} unk3=0x{symbol['unknown3']:02X}"
)
lines.append(" }")
lines.append("")
for label, ops in blocks:
lines.append(f" {label}:")
for op in ops:
statement = format_script_statement(op, label_map, ir["event"]["derived_body_start"])
lines.append(f" {statement} ; {op['absolute_body_offset']:04X}: {op['raw_bytes']}")
lines.append("")
if ir["field_tags"]:
lines.append(" field_tags")
lines.append(" {")
for tag in ir["field_tags"]:
lines.append(
f" {tag['bp_repr']} {tag['name']} ; tag=0x{tag['tag_id']:02X} kind=0x{tag['value_kind']:02X}"
)
lines.append(" }")
if ir["body"]["unknown_trailing_bytes"]:
lines.append(f" unknown_trailing_bytes {ir['body']['unknown_trailing_bytes']}")
lines.append("}")
return "\n".join(lines) + "\n"
def sanitize_identifier(name: str) -> str:
cleaned = [char if char.isalnum() or char == "_" else "_" for char in name.strip()]
identifier = "".join(cleaned).strip("_") or "var"
if identifier[0].isdigit():
identifier = f"v_{identifier}"
return identifier
def build_local_name_map(ir: dict[str, Any]) -> dict[int, str]:
return {
symbol["bp_offset"]: sanitize_identifier(symbol["name"])
for symbol in ir["debug_symbols"]
}
def format_bp_name(bp_offset: int, local_name_map: dict[int, str]) -> str:
if bp_offset in local_name_map:
return local_name_map[bp_offset]
disp = signed_byte(bp_offset)
if disp >= 0:
return f"arg_{disp:02X}"
return f"local_{abs(disp):02X}"
def intrinsic_display_name(name_hint: str | None, ordinal: int) -> str:
if not name_hint:
return f"intrinsic_{ordinal:04X}"
display = name_hint.replace("::", ".")
display = re.sub(r"(?<=\.)I_", "", display)
if display.startswith("I_"):
display = display[2:]
paren = display.find("(")
if paren != -1:
display = display[:paren]
return display
def push_expr_from_op(op: dict[str, Any], local_name_map: dict[int, str]) -> tuple[str, int] | None:
mnemonic = op["mnemonic"]
operands = op["operands"]
if mnemonic == "push_byte_immediate":
return (str(operands["value_signed"]), 1)
if mnemonic == "push_word_immediate":
return (f"0x{operands['value_u16']:04X}", 2)
if mnemonic == "push_dword_immediate":
return (f"0x{operands['value_u32']:08X}", 4)
if mnemonic == "push_string_immediate":
return (format_script_string(operands["string"]), max(2, operands["declared_length"]))
if mnemonic in {"push_local_byte", "push_local_word", "push_local_dword", "push_local_string", "push_local_slist", "push_local_addr", "push_string_ptr"}:
return (format_bp_name(operands["bp_offset"], local_name_map), 4 if mnemonic in {"push_local_dword", "push_local_addr", "push_string_ptr"} else 2)
if mnemonic in {"push_member_byte", "push_member_word", "push_member_dword", "push_member_huge"}:
return (f"member.{format_bp_name(operands['bp_offset'], local_name_map)}", 4 if mnemonic in {"push_member_dword", "push_member_huge"} else 2)
if mnemonic == "push_local_list":
return (format_bp_name(operands["bp_offset"], local_name_map), max(2, operands["element_size"]))
if mnemonic == "push_list_element":
return (f"list_element(size=0x{operands['element_size']:X})", max(1, operands["element_size"]))
if mnemonic == "push_huge":
return (f"0x{operands['value_a']:02X}{operands['value_b']:02X}", 4)
if mnemonic == "push_global":
return (f"global[0x{operands['global_id']:04X}]", max(1, operands["size"]))
if mnemonic == "push_pid":
return ("pid", 2)
if mnemonic == "push_process_result":
return ("process_result", 2)
return None
def pop_stack_bytes(stack: list[tuple[str, int]], byte_count: int) -> list[str]:
if byte_count <= 0:
return []
parts: list[str] = []
consumed = 0
while stack and consumed < byte_count:
expr, width = stack.pop()
parts.append(expr)
consumed += max(1, width)
parts.reverse()
return parts
def combine_binary(stack: list[tuple[str, int]], operator: str, result_width: int = 2) -> None:
if len(stack) < 2:
return
right_expr, _ = stack.pop()
left_expr, _ = stack.pop()
stack.append((f"({left_expr} {operator} {right_expr})", result_width))
def decompile_pseudocode_blocks(ir: dict[str, Any]) -> list[tuple[str, list[str]]]:
label_map, blocks = build_script_blocks(ir)
local_name_map = build_local_name_map(ir)
skip_mnemonics = {"line_number", "symbol_info", "add_sp", "init"}
pending_result: str | None
rendered_blocks: list[tuple[str, list[str]]] = []
for label, ops in blocks:
stack: list[tuple[str, int]] = []
pending_result = None
block_lines: list[str] = []
index = 0
while index < len(ops):
op = ops[index]
mnemonic = op["mnemonic"]
operands = op["operands"]
pushed = push_expr_from_op(op, local_name_map)
if pushed is not None:
stack.append(pushed)
index += 1
continue
if mnemonic in skip_mnemonics:
index += 1
continue
if mnemonic == "push_indirect":
if stack:
expr, _ = stack.pop()
stack.append((f"*({expr})", max(1, operands["size"])))
index += 1
continue
if mnemonic == "set_info":
args = ", ".join(expr for expr, _ in stack) if stack else ""
stack.clear()
block_lines.append(f"set_info({args});")
index += 1
continue
if mnemonic == "process_exclude":
block_lines.append("process_exclude();")
index += 1
continue
if mnemonic == "call_intrinsic":
arg_exprs = pop_stack_bytes(stack, operands["arg_bytes"])
pending_result = f"{intrinsic_display_name(operands.get('intrinsic_name_hint'), operands['intrinsic_ordinal'])}({', '.join(arg_exprs)})"
index += 1
continue
if mnemonic == "push_retval_byte":
stack.append((pending_result or "retval", 1))
pending_result = None
index += 1
continue
if mnemonic == "push_retval_word":
stack.append((pending_result or "retval", 2))
pending_result = None
index += 1
continue
if mnemonic == "push_retval_dword":
stack.append((pending_result or "retval", 4))
pending_result = None
index += 1
continue
if mnemonic == "call_class_event":
arg_text = ", ".join(expr for expr, _ in stack)
stack.clear()
event_name = operands.get("target_event_name_hint") or f"slot_{operands['target_event_slot']:02X}"
block_lines.append(
f"class_{operands['target_class_id']:04X}_{sanitize_identifier(event_name)}({arg_text});"
)
pending_result = None
index += 1
continue
if mnemonic == "spawn":
arg_text = ", ".join(expr for expr, _ in stack)
stack.clear()
event_name = operands.get("target_event_name_hint") or f"slot_{operands['target_event_slot']:02X}"
block_lines.append(
f"spawn class_{operands['target_class_id']:04X}_{sanitize_identifier(event_name)}({arg_text});"
)
pending_result = None
index += 1
continue
if mnemonic == "spawn_inline":
arg_text = ", ".join(expr for expr, _ in stack)
stack.clear()
event_name = operands.get("target_event_name_hint") or f"slot_{operands['target_event_slot']:02X}"
block_lines.append(
f"spawn_inline class_{operands['target_class_id']:04X}_{sanitize_identifier(event_name)}({arg_text}) /* inline=0x{operands['inline_offset']:04X} */;"
)
pending_result = None
index += 1
continue
if mnemonic in {"add", "add_dword"}:
combine_binary(stack, "+", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic in {"sub", "sub_dword"}:
combine_binary(stack, "-", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic in {"mul", "mul_dword"}:
combine_binary(stack, "*", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic in {"div", "div_dword"}:
combine_binary(stack, "/", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic == "bit_and":
combine_binary(stack, "&")
index += 1
continue
if mnemonic == "bit_or":
combine_binary(stack, "|")
index += 1
continue
if mnemonic == "and":
combine_binary(stack, "&&")
index += 1
continue
if mnemonic == "or":
combine_binary(stack, "||")
index += 1
continue
if mnemonic == "cmp":
combine_binary(stack, "!=")
index += 1
continue
if mnemonic == "ne":
combine_binary(stack, "!=")
index += 1
continue
if mnemonic == "lt":
combine_binary(stack, "<")
index += 1
continue
if mnemonic == "le":
combine_binary(stack, "<=")
index += 1
continue
if mnemonic == "gt":
combine_binary(stack, ">")
index += 1
continue
if mnemonic == "ge":
combine_binary(stack, ">=")
index += 1
continue
if mnemonic == "not":
if stack:
expr, width = stack.pop()
stack.append((f"(!{expr})", width))
index += 1
continue
if mnemonic == "implies":
expr = stack.pop()[0] if stack else "retval"
stack.append((f"implies({expr}, 0x{operands['arg0']:X}, 0x{operands['arg1']:X})", 1))
index += 1
continue
if mnemonic == "pop_temp":
if stack:
stack.pop()
index += 1
continue
if mnemonic == "suspend":
block_lines.append("suspend;")
stack.clear()
index += 1
continue
if mnemonic == "jne":
target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}")
condition = stack.pop()[0] if stack else "condition"
block_lines.append(f"if {condition} goto {target};")
index += 1
continue
if mnemonic == "jmp":
target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}")
block_lines.append(f"goto {target};")
stack.clear()
index += 1
continue
if mnemonic in {"foreach_list", "foreach_slist"}:
target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}")
block_lines.append(
f"{mnemonic} {format_bp_name(operands['bp_offset'], local_name_map)} -> {target};"
)
index += 1
continue
if mnemonic == "ret":
block_lines.append("return;")
stack.clear()
break
if mnemonic.startswith("pop_local_") or mnemonic.startswith("pop_member_"):
if stack:
expr, _ = stack.pop()
else:
expr = "value"
target_name = format_bp_name(operands["bp_offset"], local_name_map)
block_lines.append(f"{target_name} = {expr};")
index += 1
continue
rendered_operands = format_generic_operands(operands, label_map, ir["event"]["derived_body_start"])
block_lines.append(f"/* {mnemonic} {rendered_operands} */")
index += 1
rendered_blocks.append((label, block_lines))
return rendered_blocks
@dataclass(frozen=True)
class TerminalStatement:
kind: str
condition: str | None = None
target: str | None = None
def parse_terminal_statement(statement: str) -> TerminalStatement | None:
if statement == "return;":
return TerminalStatement("return")
goto_match = re.fullmatch(r"goto ([A-Za-z0-9_]+);", statement)
if goto_match is not None:
return TerminalStatement("goto", target=goto_match.group(1))
if_match = re.fullmatch(r"if (.+) goto ([A-Za-z0-9_]+);", statement)
if if_match is not None:
return TerminalStatement("if", condition=if_match.group(1), target=if_match.group(2))
return None
def strip_outer_parens(expr: str) -> str:
text = expr.strip()
while text.startswith("(") and text.endswith(")"):
depth = 0
balanced = True
for index, char in enumerate(text):
if char == "(":
depth += 1
elif char == ")":
depth -= 1
if depth == 0 and index != len(text) - 1:
balanced = False
break
if depth < 0:
balanced = False
break
if not balanced or depth != 0:
break
text = text[1:-1].strip()
return text
def invert_condition_text(condition: str) -> str:
expr = strip_outer_parens(condition)
comparisons = {
" != ": " == ",
" == ": " != ",
" <= ": " > ",
" >= ": " < ",
" < ": " >= ",
" > ": " <= ",
}
for source, replacement in comparisons.items():
if source in expr:
return expr.replace(source, replacement, 1)
if expr.startswith("!"):
return strip_outer_parens(expr[1:])
if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_:.]*(\(.*\))?", expr):
return f"!{expr}"
return f"!({expr})"
def indent_lines(lines: list[str], prefix: str = " ") -> list[str]:
return [f"{prefix}{line}" if line else "" for line in lines]
def detect_noop_compare_chain(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
start_index: int,
end_index: int,
) -> int | None:
cursor = start_index
common_target: str | None = None
while cursor + 1 < end_index:
_, compare_statements = blocks[cursor]
_, goto_statements = blocks[cursor + 1]
if len(compare_statements) != 1 or len(goto_statements) != 1:
return None
compare_terminal = parse_terminal_statement(compare_statements[0])
goto_terminal = parse_terminal_statement(goto_statements[0])
if compare_terminal is None or compare_terminal.kind != "if":
return None
if goto_terminal is None or goto_terminal.kind != "goto":
return None
if common_target is None:
common_target = goto_terminal.target
elif goto_terminal.target != common_target:
return None
if compare_terminal.target == common_target:
body_index = label_to_index.get(common_target or "")
if body_index is None or body_index != cursor + 2 or body_index >= end_index:
return None
return body_index
next_index = label_to_index.get(compare_terminal.target or "")
if next_index is None or next_index != cursor + 2 or next_index >= end_index:
return None
cursor += 2
return None
def last_nonempty_block_index(
blocks: list[tuple[str, list[str]]],
start_index: int,
end_index: int,
) -> int | None:
for index in range(end_index - 1, start_index - 1, -1):
if blocks[index][1]:
return index
return None
def parse_selector_condition(condition: str) -> tuple[str, str] | None:
expr = strip_outer_parens(condition)
match = re.fullmatch(r"(.+?)\s*!=\s*(.+)", expr)
if match is None:
return None
return match.group(1).strip(), match.group(2).strip()
def render_selector_chain(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
start_index: int,
end_index: int,
return_labels: set[str],
) -> tuple[list[str], int] | None:
if not blocks[start_index][1]:
return None
base_terminal = parse_terminal_statement(blocks[start_index][1][-1])
if base_terminal is None or base_terminal.kind != "if":
return None
selector = parse_selector_condition(base_terminal.condition or "")
if selector is None:
return None
selector_expr, _ = selector
cursor = start_index
join_label: str | None = None
branches: list[tuple[str, list[str]]] = []
while cursor < end_index:
_, statements = blocks[cursor]
if not statements:
return None
terminal = parse_terminal_statement(statements[-1])
if terminal is None or terminal.kind != "if":
return None
parsed = parse_selector_condition(terminal.condition or "")
if parsed is None or parsed[0] != selector_expr:
return None
target_label = terminal.target or ""
target_index = label_to_index.get(target_label)
if target_index is None or target_index <= cursor + 1 or target_index > end_index:
return None
body_tail_index = last_nonempty_block_index(blocks, cursor + 1, target_index)
if body_tail_index is None:
return None
body_tail_terminal = parse_terminal_statement(blocks[body_tail_index][1][-1])
if body_tail_terminal is None or body_tail_terminal.kind != "goto":
return None
current_join = body_tail_terminal.target or ""
current_join_index = label_to_index.get(current_join)
if current_join_index is None or current_join_index > end_index:
return None
if current_join_index < target_index:
return None
if current_join_index == target_index and target_label != current_join:
return None
if join_label is None:
join_label = current_join
elif current_join != join_label:
return None
body_result = render_structured_region(
blocks,
label_to_index,
cursor + 1,
target_index,
return_labels,
{join_label},
)
if body_result is None:
return None
body_lines, _ = body_result
branches.append((invert_condition_text(terminal.condition or "condition"), body_lines))
if target_label == join_label:
break
cursor = target_index
if join_label is None:
return None
rendered: list[str] = []
for index, (condition, body_lines) in enumerate(branches):
branch_head = "if" if index == 0 else "else if"
rendered.append(f"{branch_head} ({condition}) {{")
rendered.extend(indent_lines(body_lines))
rendered.append("}")
return rendered, label_to_index[join_label]
def render_structured_region(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
start_index: int,
end_index: int,
return_labels: set[str],
exit_labels: set[str] | None = None,
) -> tuple[list[str], bool] | None:
allowed_exit_labels = set(exit_labels or ())
lines: list[str] = []
index = start_index
while index < end_index:
skipped_index = detect_noop_compare_chain(blocks, label_to_index, index, end_index)
if skipped_index is not None:
index = skipped_index
continue
_, statements = blocks[index]
if not statements:
index += 1
continue
terminal = parse_terminal_statement(statements[-1])
if terminal is None:
lines.extend(statements)
index += 1
continue
lines.extend(statements[:-1])
if terminal.kind == "return":
lines.append("return;")
return lines, False
if terminal.kind == "goto":
target_label = terminal.target or ""
target_index = label_to_index.get(target_label)
if target_label in return_labels:
lines.append("return;")
return lines, False
if target_label in allowed_exit_labels:
return lines, False
if target_index is None:
return None
if target_index == index + 1:
index += 1
continue
if index < target_index < end_index:
index = target_index
continue
return None
target_label = terminal.target or ""
target_index = label_to_index.get(target_label)
if target_index is None or target_index <= index or target_index > end_index:
return None
if target_index == index + 1:
index += 1
continue
selector_chain = render_selector_chain(blocks, label_to_index, index, end_index, return_labels)
if selector_chain is not None:
selector_lines, selector_join_index = selector_chain
lines.extend(selector_lines)
index = selector_join_index
continue
if target_index <= end_index:
loop_tail_index = last_nonempty_block_index(blocks, index + 1, target_index)
if loop_tail_index is not None:
loop_tail_terminal = parse_terminal_statement(blocks[loop_tail_index][1][-1])
if loop_tail_terminal is not None and loop_tail_terminal.kind == "goto" and loop_tail_terminal.target == blocks[index][0]:
loop_body = render_structured_region(
blocks,
label_to_index,
index + 1,
target_index,
return_labels,
{blocks[index][0]},
)
if loop_body is not None:
loop_lines, _ = loop_body
lines.append(f"while ({invert_condition_text(terminal.condition or 'condition')}) {{")
lines.extend(indent_lines(loop_lines))
lines.append("}")
index = target_index
continue
true_tail_index = last_nonempty_block_index(blocks, index + 1, target_index)
if true_tail_index is not None:
true_tail_terminal = parse_terminal_statement(blocks[true_tail_index][1][-1])
if true_tail_terminal is not None and true_tail_terminal.kind == "goto":
join_label = true_tail_terminal.target or ""
join_index = label_to_index.get(join_label)
if join_index is not None and join_index > target_index and join_index <= end_index:
true_result = render_structured_region(
blocks,
label_to_index,
index + 1,
target_index,
return_labels,
{join_label},
)
false_result = render_structured_region(
blocks,
label_to_index,
target_index,
join_index,
return_labels,
{join_label},
)
if true_result is not None and false_result is not None:
true_lines, _ = true_result
false_lines, _ = false_result
lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{")
lines.extend(indent_lines(true_lines))
lines.append("}")
if false_lines:
if false_lines[0].startswith("if "):
lines.append(f"else {false_lines[0]}")
lines.extend(false_lines[1:])
else:
lines.append("else {")
lines.extend(indent_lines(false_lines))
lines.append("}")
index = join_index
continue
inner_result = render_structured_region(blocks, label_to_index, index + 1, target_index, return_labels)
if inner_result is None:
return None
inner_lines, inner_falls_through = inner_result
if inner_lines:
lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{")
lines.extend(indent_lines(inner_lines))
lines.append("}")
elif not inner_falls_through:
lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{")
lines.append("}")
index = target_index
return lines, True
def render_structured_pseudocode(blocks: list[tuple[str, list[str]]]) -> list[str] | None:
if not blocks:
return []
label_to_index = {label: index for index, (label, _) in enumerate(blocks)}
return_labels = {
label
for label, statements in blocks
if len(statements) == 1 and statements[0] == "return;"
}
structured = render_structured_region(blocks, label_to_index, 0, len(blocks), return_labels)
if structured is None:
return None
return structured[0]
def render_partially_structured_blocks(blocks: list[tuple[str, list[str]]]) -> list[str]:
if not blocks:
return []
label_to_index = {label: index for index, (label, _) in enumerate(blocks)}
return_labels = {
label
for label, statements in blocks
if len(statements) == 1 and statements[0] == "return;"
}
lines: list[str] = []
index = 0
while index < len(blocks):
label, statements = blocks[index]
selector_chain = render_selector_chain(blocks, label_to_index, index, len(blocks), return_labels)
if selector_chain is not None:
selector_lines, selector_join_index = selector_chain
lines.append(f" {label}:")
for statement in selector_lines:
lines.append(f" {statement}" if statement else "")
lines.append("")
index = selector_join_index
continue
lines.append(f" {label}:")
for statement in statements:
lines.append(f" {statement}")
lines.append("")
index += 1
return lines
def render_pseudocode(ir: dict[str, Any]) -> str:
slot_name = sanitize_identifier(ir["event"]["event_name_hint"] or f"slot_{ir['event']['slot']:02X}")
lines = [
(
f"function {sanitize_identifier(ir['class']['class_name'].lower())}_{slot_name}() "
f"/* entry={ir['class']['entry_index']} class_id=0x{ir['class']['class_id']:04X} slot=0x{ir['event']['slot']:02X} */"
),
"{",
]
if ir["debug_symbols"]:
lines.append(" var")
for index, symbol in enumerate(ir["debug_symbols"]):
separator = "," if index + 1 < len(ir["debug_symbols"]) else ";"
lines.append(f" {sanitize_identifier(symbol['name'])}{separator} /* {symbol['bp_repr']} type=0x{symbol['type_id']:02X} */")
lines.append("")
rendered_blocks = decompile_pseudocode_blocks(ir)
structured_lines = render_structured_pseudocode(rendered_blocks)
if structured_lines is not None:
for statement in structured_lines:
lines.append(f" {statement}" if statement else "")
else:
lines.extend(render_partially_structured_blocks(rendered_blocks))
lines.append("}")
return "\n".join(lines) + "\n"
def render_text(ir: dict[str, Any]) -> str:
labels = build_listing_labels(ir)
def format_operand(key: str, value: Any) -> str:
if value is None or value == "":
return ""
if key == "intrinsic_name_hint" and value:
return f"hint={value}"
if key == "target_event_name_hint" and value:
return f"event={value}"
if key == "target_offset" and isinstance(value, int):
label = labels.get(value + ir["event"]["derived_body_start"])
return f"->{label or f'0x{value + ir['event']['derived_body_start']:04X}'}"
if isinstance(value, int):
if key.endswith("_signed"):
return f"{key}={value}"
return f"{key}=0x{value:X}"
return f"{key}={value}"
lines = [
f"Class {ir['class']['class_name']} entry={ir['class']['entry_index']} class_id=0x{ir['class']['class_id']:X}",
f"Slot 0x{ir['event']['slot']:02X} hint={ir['event']['event_name_hint']} raw_word=0x{ir['event']['raw_event_entry_word']:04X} raw_code_off=0x{ir['event']['raw_code_offset']:08X}",
f"Body 0x{ir['event']['derived_body_start']:04X}..0x{ir['event']['derived_body_end']:04X} len={ir['event']['derived_body_length']} end={ir['body']['end_reason']} ops={ir['body']['decoded_op_count']}",
f"SHA1 {ir['body']['raw_body_sha1']}",
"",
]
for op in ir["ops"]:
absolute_offset = op["absolute_body_offset"]
label = labels.get(absolute_offset)
if label is not None:
lines.extend(["", f"{label}:"])
operand_items = [formatted for key, value in op["operands"].items() if (formatted := format_operand(key, value))]
lines.append(f"{absolute_offset:04X}: {op['opcode']:02X} {op['mnemonic']:<24} {' '.join(operand_items)} raw={op['raw_bytes']}")
if ir["debug_symbols"]:
lines.extend(["", f"Debug symbols @ 0x{ir['body']['debug_symbol_offset']:04X}:"])
for symbol in ir["debug_symbols"]:
lines.append(
f" {symbol['index']:02X}: unk1=0x{symbol['unknown1']:02X} type=0x{symbol['type_id']:02X} ('{symbol['type_char']}') {symbol['bp_repr']} unk3=0x{symbol['unknown3']:02X} name={symbol['name']}"
)
if ir["field_tags"]:
lines.extend(["", "Field tags:"])
for tag in ir["field_tags"]:
lines.append(
f" {tag['tag_label']} ({tag['bp_repr']})"
)
if ir["body"]["unknown_trailing_bytes"]:
lines.extend(["", f"unknown_trailing_bytes={ir['body']['unknown_trailing_bytes']}"])
return "\n".join(lines) + "\n"
def main() -> None:
parser = argparse.ArgumentParser(description="Proof-of-concept Crusader USECODE parser over extracted owner-loaded artifacts")
parser.add_argument("--class", dest="class_name", required=True, help="Class name from class_event_index.tsv, for example NPCTRIG")
parser.add_argument("--slot", required=True, help="Event slot, for example 0x0A")
parser.add_argument("--extracted-root", default=str(EXTRACTED_ROOT), help="Extracted USECODE root containing class_event_index.tsv and chunks/")
parser.add_argument("--variant", choices=["auto", "regret", "remorse"], default="auto", help="Crusader intrinsic numbering to apply (default: auto, fallback regret)")
parser.add_argument("--output", help="Write IR JSON to this file instead of stdout")
parser.add_argument("--emit-text", action="store_true", help="Emit a readable text listing beside the JSON")
parser.add_argument("--text-output", help="Write the text listing to this file")
parser.add_argument("--emit-script", action="store_true", help="Emit a decompiled script-style view beside the JSON")
parser.add_argument("--script-output", help="Write the script-style decompilation to this file")
parser.add_argument("--emit-pseudocode", action="store_true", help="Emit a higher-level pseudocode view beside the JSON")
parser.add_argument("--pseudocode-output", help="Write the pseudocode view to this file")
parser.add_argument("--family-diff", action="store_true", help="Emit repeated-body family diff report instead of (or alongside) the IR")
parser.add_argument("--family-diff-output", help="Write the family diff JSON to this file")
parser.add_argument("--family-diff-text-output", help="Write the family diff text report to this file")
args = parser.parse_args()
slot = parse_int(args.slot)
extracted_root = Path(args.extracted_root)
event_row, layout_row = select_rows(args.class_name, slot, extracted_root)
ir = parse_body_ir(event_row, layout_row, None if args.variant == "auto" else args.variant, extracted_root)
rendered_json = json.dumps(ir, indent=2)
if args.output:
Path(args.output).write_text(rendered_json + "\n", encoding="utf-8")
else:
print(rendered_json)
if args.emit_text:
rendered_text = render_text(ir)
if args.text_output:
Path(args.text_output).write_text(rendered_text, encoding="utf-8")
else:
print(rendered_text)
if args.emit_script:
rendered_script = render_script(ir)
if args.script_output:
Path(args.script_output).write_text(rendered_script, encoding="utf-8")
else:
print(rendered_script)
if args.emit_pseudocode:
rendered_pseudocode = render_pseudocode(ir)
if args.pseudocode_output:
Path(args.pseudocode_output).write_text(rendered_pseudocode, encoding="utf-8")
else:
print(rendered_pseudocode)
if args.family_diff:
diff = compute_family_diff(args.class_name, slot, extracted_root)
diff_json = json.dumps(diff, indent=2)
if args.family_diff_output:
Path(args.family_diff_output).write_text(diff_json + "\n", encoding="utf-8")
else:
print(diff_json)
diff_text = render_family_diff_text(diff)
if args.family_diff_text_output:
Path(args.family_diff_text_output).write_text(diff_text, encoding="utf-8")
else:
print(diff_text)
if __name__ == "__main__":
main()