from __future__ import annotations import argparse import csv import hashlib import json from dataclasses import dataclass from pathlib import Path from typing import Any REPO_ROOT = Path(__file__).resolve().parents[1] EXTRACTED_ROOT = REPO_ROOT / "USECODE" / "EUSECODE_extracted" CLASS_EVENT_INDEX = EXTRACTED_ROOT / "class_event_index.tsv" CLASS_LAYOUT_INDEX = EXTRACTED_ROOT / "class_layout_index.tsv" CHUNKS_DIR = EXTRACTED_ROOT / "chunks" EVENT_NAME_HINTS = { 0x00: "look", 0x01: "use", 0x02: "anim", 0x03: "setActivity", 0x04: "cachein", 0x05: "hit", 0x06: "gotHit", 0x07: "hatch", 0x08: "schedule", 0x09: "release", 0x0A: "equip", 0x0B: "unequip", 0x0C: "combine", 0x0D: "func0D", 0x0E: "calledFromAnim", 0x0F: "enterFastArea", 0x10: "leaveFastArea", 0x11: "cast", 0x12: "justMoved", 0x13: "avatarStoleSomething", 0x14: "animGetHit", 0x15: "func15", 0x16: "func16", 0x17: "func17", 0x18: "func18", 0x19: "func19", 0x1A: "func1A", 0x1B: "func1B", 0x1C: "func1C", 0x1D: "func1D", 0x1E: "func1E", 0x1F: "func1F", } # Intrinsic table extracted from Pentagram ConvertUsecodeCrusader.h # Source note: "current discovered intrinsics are for regret1.21 only" # This is used as a hint only – ordinal mapping may differ between builds. INTRINSIC_HINTS: dict[int, str] = { 0x0000: "Intrinsic0000()", 0x0001: "Item::getFrame(void)", 0x0002: "Item::setFrame(uint16)", 0x0003: "Item::getMapNum(void)", 0x0004: "Item::getStatus(void)", 0x0005: "Item::orStatus(sint16)", 0x0006: "Item::callEvent0A(sint16)", 0x0007: "Intrinsic0007()", 0x0008: "Item::isNpc(void)", 0x0009: "Item::getZ(void)", 0x000A: "Intrinsic000A()", 0x000B: "Item::getQLo(void)", 0x000C: "Item::destroy(void)", 0x000D: "Intrinsic000D()", 0x000E: "Item::getX(void)", 0x000F: "Item::getY(void)", 0x0010: "Intrinsic0010()", 0x0011: "Item::getType(void)", 0x0012: "Intrinsic0012()", 0x0013: "Intrinsic0013()", 0x0014: "Item::legal_create(uint16,uint16,uint16,uint16,uint16)", 0x0015: "Item::andStatus(void)", 0x0016: "Intrinsic0016()", 0x0017: "Intrinsic00C3()", 0x0018: "Intrinsic00DA()", 0x0019: "Intrinsic0019()", 0x001A: "Item::create(uint16,uint16)", 0x001B: "Item::pop(uint16,uint16,uint8)", 0x001C: "Intrinsic00FA()", 0x001D: "Item::push(void)", 0x001E: "Intrinsic001E()", 0x001F: "Item::getQLo(void)", 0x0020: "Item::setQLo(sint16)", 0x0021: "Item::getQHi(void)", 0x0022: "Item::setQHi(sint16)", 0x0023: "Intrinsic0023()", 0x0024: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0025: "Item::getCY(void)", 0x0026: "Item::getCX(void)", 0x0027: "Intrinsic0027()", 0x0028: "Item::setNpcNum(sint16)", 0x0029: "Intrinsic0029()", 0x002A: "Intrinsic002A()", 0x002B: "Item::pop(void)", 0x002C: "Intrinsic002C()", 0x002D: "Item::isCompletelyOn(uint16)", 0x002E: "Item::pop(uint16)", 0x002F: "Intrinsic002F()", 0x0030: "Intrinsic0030()", 0x0031: "Item::getFamily(void)", 0x0032: "Item::destroyContents(void)", 0x0033: "Intrinsic0033()", 0x0034: "Item::getDirToItem(uint16)", 0x0035: "Intrinsic0035()", 0x0036: "Intrinsic0036()", 0x0037: "Intrinsic0037()", 0x0038: "Item::andStatus(void)", 0x0039: "Kernel::resetRef(uint16,ProcessType)", 0x003A: "Item::touch(void)", 0x003B: "Egg::getEggId(void)", 0x003C: "Intrinsic003C()", 0x003D: "Intrinsic003D()", 0x003E: "Item::callEvent11(sint16)", 0x003F: "Intrinsic003F()", 0x0040: "Intrinsic0040()", 0x0041: "Item::isOn(uint16)", 0x0042: "Item::getQHi(void)", 0x0043: "Item::isOn(uint16)", 0x0044: "Item::getQHi(void)", 0x0045: "Item::isOn(uint16)", 0x0046: "Item::getQHi(void)", 0x0047: "Item::isOn(uint16)", 0x0048: "Item::getQHi(void)", 0x0049: "Item::isOn(uint16)", 0x004A: "Item::getQHi(void)", 0x004B: "Item::isOn(uint16)", 0x004C: "Item::getQHi(void)", 0x004D: "Intrinsic004D()", 0x004E: "Npc::isDead(void)", 0x004F: "Intrinsic009C()", 0x0050: "Intrinsic0050()", 0x0051: "Intrinsic0051()", 0x0052: "Intrinsic0052()", 0x0053: "Intrinsic00BD()", 0x0054: "Intrinsic0054()", 0x0055: "Intrinsic0055()", 0x0056: "Intrinsic0056()", 0x0057: "Intrinsic0057()", 0x0058: "Item::use(void)", 0x0059: "Item::setQuantity(sint16)", 0x005A: "Intrinsic005A()", 0x005B: "Item::getSurfaceWeight(void)", 0x005C: "Intrinsic005C()", 0x005D: "Item::setFrame(uint16)", 0x005E: "Intrinsic00DA()", 0x005F: "Intrinsic005F()", 0x0060: "Intrinsic0060()", 0x0061: "Intrinsic0061()", 0x0062: "Intrinsic0062()", 0x0063: "Item::legal_create(uint16,uint16,WorldPoint&)", 0x0064: "Item::getPoint(WorldPoint&)", 0x0065: "Item::legal_move(WorldPoint&,uint16,uint16)", 0x0066: "Item::fall(void)", 0x0067: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0068: "Kernel::getNumProcesses(uint16,ProcessType)", 0x0069: "Item::getCY(void)", 0x006A: "Intrinsic006A()", 0x006B: "Intrinsic006B()", 0x006C: "Intrinsic006C()", 0x006D: "Intrinsic006D()", 0x006E: "Intrinsic006E()", 0x006F: "Item::isInNpc(void)", 0x0070: "Intrinsic0070()", 0x0071: "Intrinsic0071()", 0x0072: "Intrinsic0072()", 0x0073: "Intrinsic0073()", 0x0074: "Npc::isDead(void)", 0x0075: "Item::getNpcNum(void)", 0x0076: "IntrinsicReturn0", 0x0077: "Intrinsic0077()", 0x0078: "Item::callEvent0B(sint16)", 0x0079: "Item::andStatus(void)", 0x007A: "Item::move(uint16,uint16,uint8)", 0x007B: "Intrinsic007B()", 0x007C: "Intrinsic007C()", 0x007D: "Intrinsic007D()", 0x007E: "Intrinsic007E()", 0x007F: "Intrinsic007F()", 0x0080: "Intrinsic0080()", 0x0081: "Intrinsic0081()", 0x0082: "Intrinsic0082()", 0x0083: "Intrinsic0083()", 0x0084: "Intrinsic0084()", 0x0085: "Intrinsic0085()", 0x0086: "teleportToEgg(sint16,int,uint8)", 0x0087: "Intrinsic0087()", 0x0088: "Intrinsic0088()", 0x0089: "Intrinsic00BD()", 0x008A: "Item::getQuality(void)", 0x008B: "Item::setQuality(sint16)", 0x008C: "Intrinsic008C()", 0x008D: "Intrinsic008D()", 0x008E: "Intrinsic008E()", 0x008F: "Camera::getX(void)", 0x0090: "Camera::getY(void)", 0x0091: "Item::setMapNum(sint16)", 0x0092: "Item::getNpcNum(void)", 0x0093: "Item::shoot(WorldPoint&,sint16,sint16)", 0x0094: "Intrinsic0094()", 0x0095: "Item::enterFastArea(void)", 0x0096: "Intrinsic00CA()", 0x0097: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0098: "Item::getNpcNum(void)", 0x0099: "Intrinsic0099()", 0x009A: "teleportToEgg(sint16,uint8)", 0x009B: "Intrinsic009B()", 0x009C: "Intrinsic009C()", 0x009D: "Intrinsic009D()", 0x009E: "Intrinsic009E()", 0x009F: "Intrinsic009F()", 0x00A0: "Item::andStatus(void)", 0x00A1: "Item::getUnkEggType(void)", 0x00A2: "Egg::setEggXRange(uint16)", 0x00A3: "Item::setFrame(uint16)", 0x00A4: "Item::overlaps(uint16)", 0x00A5: "Item::isOn(uint16)", 0x00A6: "Item::getQHi(void)", 0x00A7: "Intrinsic00DA()", 0x00A8: "Item::getCY(void)", 0x00A9: "Intrinsic00A9()", 0x00AA: "Item::isOn(uint16)", 0x00AB: "Npc::isDead(void)", 0x00AC: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00AD: "Intrinsic00AD()", 0x00AE: "Item::getQHi(void)", 0x00AF: "Item::andStatus(void)", 0x00B0: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00B1: "Item::andStatus(void)", 0x00B2: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00B3: "Item::andStatus(void)", 0x00B4: "Item::getDirToCoords(uint16,uint16)", 0x00B5: "Intrinsic00B5()", 0x00B6: "Intrinsic00B6()", 0x00B7: "Item::getNpcNum(void)", 0x00B8: "Item::getCY(void)", 0x00B9: "Item::isOn(uint16)", 0x00BA: "Item::getFootpad(sint16&,sint16&,sint16&)", 0x00BB: "Npc::isDead(void)", 0x00BC: "Intrinsic00BC()", 0x00BD: "Intrinsic00BD()", 0x00BE: "Intrinsic00BE()", 0x00BF: "Item::andStatus(void)", 0x00C0: "Intrinsic00C0()", 0x00C1: "Intrinsic00C1()", 0x00C2: "IntrinsicReturn0", 0x00C3: "Intrinsic00C3()", 0x00C4: "Item::getQHi(void)", 0x00C5: "Item::setQuality(sint16)", 0x00C6: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00C7: "Intrinsic00C7()", 0x00C8: "Intrinsic00C8()", 0x00C9: "Item::callEvent0A(sint16)", 0x00CA: "Intrinsic00CA()", 0x00CB: "Item::isOn(uint16)", 0x00CC: "Intrinsic00CC()", 0x00CD: "Intrinsic00CD()", 0x00CE: "Item::getQHi(void)", 0x00CF: "Item::isOn(uint16)", 0x00D0: "Intrinsic00D0()", 0x00D1: "Intrinsic00D1()", 0x00D2: "Intrinsic00D2()", 0x00D3: "Intrinsic00FA()", 0x00D4: "Camera::getY(void)", 0x00D5: "Intrinsic00D5()", 0x00D6: "Intrinsic00D6()", 0x00D7: "Intrinsic00D7()", 0x00D8: "Intrinsic00D8()", 0x00D9: "Intrinsic00D9()", 0x00DA: "Intrinsic00DA()", 0x00DB: "Intrinsic00DB()", 0x00DC: "Item::getQLo(void)", 0x00DD: "Item::getQHi(void)", 0x00DE: "Item::getNpcNum(void)", 0x00DF: "Intrinsic00DF()", 0x00E0: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00E1: "Intrinsic00FA()", 0x00E2: "Item::getQLo(void)", 0x00E3: "Item::getCY(void)", 0x00E4: "Item::getNpcNum(void)", 0x00E5: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00E6: "Item::getNpcNum(void)", 0x00E7: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00E8: "Item::getNpcNum(void)", 0x00E9: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00EA: "Item::getNpcNum(void)", 0x00EB: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00EC: "Item::getNpcNum(void)", 0x00ED: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00EE: "Item::getNpcNum(void)", 0x00EF: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00F0: "Item::getNpcNum(void)", 0x00F1: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00F2: "Item::getNpcNum(void)", 0x00F3: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00F4: "Item::getNpcNum(void)", 0x00F5: "Item::hurl(sint16,sint16,sint16,sint16)", 0x00F6: "Item::getNpcNum(void)", 0x00F7: "Item::andStatus(void)", 0x00F8: "Intrinsic00FA()", 0x00F9: "Item::getQLo(void)", 0x00FA: "Intrinsic00FA()", 0x00FB: "Intrinsic00FB()", 0x00FC: "Intrinsic00FC()", 0x00FD: "Item::getQLo(void)", 0x00FE: "Intrinsic00FE()", 0x00FF: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0100: "Item::andStatus(void)", 0x0101: "Item::isOn(uint16)", 0x0102: "Npc::isDead(void)", 0x0103: "Intrinsic00BD()", 0x0104: "Item::getQHi(void)", 0x0105: "Intrinsic00DA()", 0x0106: "Intrinsic00FA()", 0x0107: "Item::getQLo(void)", 0x0108: "Item::isOn(uint16)", 0x0109: "Item::getQHi(void)", 0x010A: "Item::isOn(uint16)", 0x010B: "Item::getQHi(void)", 0x010C: "Item::hurl(sint16,sint16,sint16,sint16)", 0x010D: "Item::getNpcNum(void)", 0x010E: "Item::getCY(void)", 0x010F: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0110: "Item::isOn(uint16)", 0x0111: "Intrinsic0111()", 0x0112: "IntrinsicReturn0", 0x0113: "Npc::isDead(void)", 0x0114: "Intrinsic0088()", 0x0115: "Intrinsic00C1()", 0x0116: "Item::getQHi(void)", 0x0117: "Intrinsic00BD()", 0x0118: "Item::andStatus(void)", 0x0119: "Item::getNpcNum(void)", 0x011A: "Item::andStatus(void)", 0x011B: "Item::getNpcNum(void)", 0x011C: "Intrinsic011C()", 0x011D: "Item::andStatus(void)", 0x011E: "Item::getNpcNum(void)", 0x011F: "Item::AvatarStoleSomehting(uint16)", 0x0120: "Item::andStatus(void)", 0x0121: "Item::getNpcNum(void)", 0x0122: "Item::getQ(void)", 0x0123: "Item::setQ(uint)", 0x0124: "Item::andStatus(void)", 0x0125: "Item::getNpcNum(void)", 0x0126: "Item::andStatus(void)", 0x0127: "Item::getNpcNum(void)", 0x0128: "Item::andStatus(void)", 0x0129: "Item::getNpcNum(void)", 0x012A: "Item::andStatus(void)", 0x012B: "Item::getNpcNum(void)", 0x012C: "Item::andStatus(void)", 0x012D: "Item::getNpcNum(void)", 0x012E: "Intrinsic00C3()", 0x012F: "Item::andStatus(void)", 0x0130: "Item::getNpcNum(void)", 0x0131: "Intrinsic0131()", 0x0132: "Item::andStatus(void)", 0x0133: "Item::hurl(sint16,sint16,sint16,sint16)", 0x0134: "Item::andStatus(void)", 0x0135: "Camera::getY(void)", 0x0136: "Camera::getZ(void)", 0x0137: "Intrinsic0137()", 0x0138: "Intrinsic009C()", 0x0139: "Item::getTypeFlagCrusader(sint16)", 0x013A: "Item::getNpcNum(void)", 0x013B: "Item::hurl(sint16,sint16,sint16,sint16)", 0x013C: "Item::getCY(void)", 0x013D: "Item::getCZ(void)", 0x013E: "Item::setFrame(uint16)", 0x013F: "Intrinsic013F()", 0x0140: "Intrinsic0140()", 0x0141: "Intrinsic0141()", 0x0142: "Intrinsic0142()", 0x0143: "Npc::isDead(void)", 0x0144: "Intrinsic00FA()", 0x0145: "Intrinsic0145()", 0x0146: "Intrinsic0146()", 0x0147: "Intrinsic0147()", 0x0148: "Item::getNpcNum(void)", 0x0149: "Item::getQLo(void)", 0x014A: "Item::andStatus(void)", 0x014B: "Intrinsic014B()", 0x014C: "Intrinsic014C()", 0x014D: "Intrinsic014D()", 0x014E: "Intrinsic003C()", 0x014F: "Egg::getEggXRange(void)", 0x0150: "Intrinsic009C()", 0x0151: "Intrinsic0072()", 0x0152: "Item::setFrame(uint16)", 0x0153: "Intrinsic00C1()", 0x0154: "Intrinsic00C3()", 0x0155: "Intrinsic00C1()", 0x0156: "Item::isOn(uint16)", 0x0157: "Intrinsic00C3()", 0x0158: "Intrinsic00FA()", 0x0159: "Item::getQHi(void)", 0x015A: "Item::getQLo(void)", 0x015B: "Intrinsic00C1()", 0x015C: "Intrinsic00C3()", 0x015D: "Intrinsic015D()", } NO_ARG_MNEMONICS = { 0x08: "pop_result", 0x12: "pop_temp", 0x14: "add", 0x15: "add_dword", 0x16: "concat", 0x17: "append_list", 0x1C: "sub", 0x1D: "sub_dword", 0x1E: "mul", 0x1F: "mul_dword", 0x20: "div", 0x21: "div_dword", 0x22: "mod", 0x23: "mod_dword", 0x24: "cmp", 0x25: "cmp_dword", 0x26: "strcmp", 0x28: "lt", 0x29: "lt_dword", 0x2A: "le", 0x2B: "le_dword", 0x2C: "gt", 0x2D: "gt_dword", 0x2E: "ge", 0x2F: "ge_dword", 0x30: "not", 0x31: "not_dword", 0x32: "and", 0x33: "and_dword", 0x34: "or", 0x35: "or_dword", 0x36: "ne", 0x37: "ne_dword", 0x39: "bit_and", 0x3A: "bit_or", 0x3B: "bit_not", 0x3C: "lsh", 0x3D: "rsh", 0x50: "ret", 0x53: "suspend", 0x59: "push_pid", 0x5D: "push_retval_byte", 0x5E: "push_retval_word", 0x5F: "push_retval_dword", 0x60: "word_to_dword", 0x61: "dword_to_word", 0x6B: "str_to_ptr", 0x6D: "push_process_result", 0x73: "loopnext", 0x77: "set_info", 0x78: "process_exclude", 0x7A: "end", } def parse_int(value: str) -> int: return int(value, 0) def try_parse_int(value: str) -> int | None: try: return parse_int(value) except (TypeError, ValueError): return None def signed_byte(value: int) -> int: return value - 0x100 if value & 0x80 else value def bp_repr(value: int) -> str: disp = signed_byte(value) sign = "+" if disp >= 0 else "-" return f"[BP{sign}{abs(disp):02X}h]" def sp_repr(value: int) -> str: disp = signed_byte(value) sign = "+" if disp >= 0 else "-" return f"[SP{sign}{abs(disp):02X}h]" @dataclass class ParseResult: op: dict[str, Any] | None next_offset: int end_reason: str | None = None unknown_tail: bytes | None = None class BodyReader: def __init__(self, data: bytes, offset: int = 0) -> None: self.data = data self.offset = offset def read_u8(self) -> int: value = self.data[self.offset] self.offset += 1 return value def read_u16(self) -> int: value = int.from_bytes(self.data[self.offset:self.offset + 2], "little") self.offset += 2 return value def read_u32(self) -> int: value = int.from_bytes(self.data[self.offset:self.offset + 4], "little") self.offset += 4 return value def read_cstring(self) -> str: chars: list[str] = [] while self.offset < len(self.data): byte = self.read_u8() if byte == 0: break chars.append(chr(byte)) return "".join(chars) def read_fixed_string(self, length: int) -> str: raw = self.data[self.offset:self.offset + length] self.offset += length return raw.decode("latin-1", errors="replace").rstrip("\x00") def op_record(start: int, absolute_start: int, opcode: int, raw_bytes: bytes, mnemonic: str, operands: dict[str, Any]) -> dict[str, Any]: return { "offset": start, "absolute_body_offset": absolute_start, "opcode": opcode, "mnemonic": mnemonic, "raw_bytes": raw_bytes.hex(), "operands": operands, } def parse_one_op(body: bytes, start: int) -> ParseResult: reader = BodyReader(body, start) opcode = reader.read_u8() operands: dict[str, Any] = {} mnemonic = NO_ARG_MNEMONICS.get(opcode) if opcode == 0x00: operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])} mnemonic = "pop_local_byte" elif opcode == 0x01: operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])} mnemonic = "pop_local_word" elif opcode == 0x02: operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])} mnemonic = "pop_local_dword" elif opcode == 0x03: bp_offset = reader.read_u8() size = reader.read_u8() operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "size": size} mnemonic = "pop_local_blob" elif opcode == 0x09: bp_offset = reader.read_u8() element_size = reader.read_u8() slist_flag = reader.read_u8() operands = { "bp_offset": bp_offset, "target": bp_repr(bp_offset), "element_size": element_size, "slist_flag": slist_flag, } mnemonic = "pop_list_element" elif opcode == 0x0A: value = reader.read_u8() operands = {"value_u8": value, "value_signed": signed_byte(value)} mnemonic = "push_byte_immediate" elif opcode == 0x0B: operands = {"value_u16": reader.read_u16()} mnemonic = "push_word_immediate" elif opcode == 0x0C: operands = {"value_u32": reader.read_u32()} mnemonic = "push_dword_immediate" elif opcode == 0x0D: declared_length = reader.read_u16() text = reader.read_cstring() operands = {"declared_length": declared_length, "string": text} mnemonic = "push_string_immediate" elif opcode == 0x0E: element_size = reader.read_u8() count = reader.read_u8() operands = {"element_size": element_size, "count": count} mnemonic = "create_list" elif opcode == 0x0F: arg_bytes = reader.read_u8() intrinsic_ordinal = reader.read_u16() operands = { "arg_bytes": arg_bytes, "intrinsic_ordinal": intrinsic_ordinal, "intrinsic_name_hint": INTRINSIC_HINTS.get(intrinsic_ordinal), } mnemonic = "call_intrinsic" elif opcode == 0x11: target_class_id = reader.read_u16() target_slot = reader.read_u16() operands = { "target_class_id": target_class_id, "target_event_slot": target_slot, "target_event_name_hint": EVENT_NAME_HINTS.get(target_slot), } mnemonic = "call_class_event" elif opcode in {0x19, 0x1A, 0x1B}: element_size = reader.read_u8() operands = {"element_size": element_size} mnemonic = {0x19: "append_slist", 0x1A: "remove_slist", 0x1B: "remove_list"}[opcode] elif opcode == 0x38: element_size = reader.read_u8() slist_flag = reader.read_u8() operands = {"element_size": element_size, "slist_flag": slist_flag} mnemonic = "in_list" elif opcode in {0x3E, 0x3F, 0x40, 0x41, 0x43, 0x4B, 0x62, 0x63, 0x64, 0x69}: bp_offset = reader.read_u8() operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset)} mnemonic = { 0x3E: "push_local_byte", 0x3F: "push_local_word", 0x40: "push_local_dword", 0x41: "push_local_string", 0x43: "push_local_slist", 0x4B: "push_local_addr", 0x62: "free_local_string", 0x63: "free_local_slist", 0x64: "free_local_list", 0x69: "push_string_ptr", }[opcode] elif opcode == 0x42: bp_offset = reader.read_u8() element_size = reader.read_u8() operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "element_size": element_size} mnemonic = "push_local_list" elif opcode == 0x44: element_size = reader.read_u8() slist_flag = reader.read_u8() operands = {"element_size": element_size, "slist_flag": slist_flag} mnemonic = "push_list_element" elif opcode == 0x45: byte0 = reader.read_u8() byte1 = reader.read_u8() operands = {"value_a": byte0, "value_b": byte1} mnemonic = "push_huge" elif opcode in {0x4C, 0x4D}: size = reader.read_u8() operands = {"size": size} mnemonic = {0x4C: "push_indirect", 0x4D: "pop_indirect"}[opcode] elif opcode in {0x4E, 0x4F}: global_id = reader.read_u16() size = reader.read_u8() operands = {"global_id": global_id, "size": size} mnemonic = {0x4E: "push_global", 0x4F: "pop_global"}[opcode] elif opcode in {0x51, 0x52}: relative = reader.read_u16() signed_relative = relative - 0x10000 if relative & 0x8000 else relative target = reader.offset + signed_relative operands = {"relative_u16": relative, "relative_signed": signed_relative, "target_offset": target} mnemonic = {0x51: "jne", 0x52: "jmp"}[opcode] elif opcode == 0x54: arg0 = reader.read_u8() arg1 = reader.read_u8() operands = {"arg0": arg0, "arg1": arg1} mnemonic = "implies" elif opcode == 0x57: arg_bytes = reader.read_u8() this_size = reader.read_u8() target_class_id = reader.read_u16() target_slot = reader.read_u16() operands = { "arg_bytes": arg_bytes, "this_size": this_size, "target_class_id": target_class_id, "target_event_slot": target_slot, "target_event_name_hint": EVENT_NAME_HINTS.get(target_slot), } mnemonic = "spawn" elif opcode == 0x58: target_class_id = reader.read_u16() target_slot = reader.read_u16() inline_offset = reader.read_u16() this_size = reader.read_u8() unknown = reader.read_u8() operands = { "target_class_id": target_class_id, "target_event_slot": target_slot, "target_event_name_hint": EVENT_NAME_HINTS.get(target_slot), "inline_offset": inline_offset, "this_size": this_size, "unknown": unknown, } mnemonic = "spawn_inline" elif opcode == 0x5A: operands = {"local_bytes": reader.read_u8()} mnemonic = "init" elif opcode == 0x5B: operands = {"line_number": reader.read_u16()} mnemonic = "line_number" elif opcode == 0x5C: relative = reader.read_u16() symbol_offset = reader.offset + (relative - 0x10000 if relative & 0x8000 else relative) symbol = reader.read_fixed_string(8) trailing_zero = reader.read_u8() operands = { "symbol_offset": symbol_offset, "symbol": symbol, "trailing_zero": trailing_zero, } mnemonic = "symbol_info" elif opcode in {0x65, 0x66, 0x67, 0x6E, 0x6F, 0x74}: value = reader.read_u8() operands = {"value_u8": value} if opcode in {0x65, 0x66, 0x67}: operands["target"] = sp_repr(value) mnemonic = { 0x65: "free_stack_string", 0x66: "free_stack_list", 0x67: "free_stack_slist", 0x6E: "add_sp", 0x6F: "push_stack_addr", 0x74: "loopscr", }[opcode] elif opcode == 0x6C: bp_offset = reader.read_u8() copy_type = reader.read_u8() operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "copy_type": copy_type} mnemonic = "param_pid_chg" elif opcode == 0x70: current_var = reader.read_u8() string_bytes = reader.read_u8() loop_type = reader.read_u8() operands = {"current_var": current_var, "string_bytes": string_bytes, "loop_type": loop_type} mnemonic = "loop" elif opcode in {0x75, 0x76}: bp_offset = reader.read_u8() element_size = reader.read_u8() branch = reader.read_u16() signed_branch = branch - 0x10000 if branch & 0x8000 else branch target = reader.offset + signed_branch operands = { "bp_offset": bp_offset, "target_var": bp_repr(bp_offset), "element_size": element_size, "relative_u16": branch, "relative_signed": signed_branch, "target_offset": target, } mnemonic = {0x75: "foreach_list", 0x76: "foreach_slist"}[opcode] elif opcode == 0x79: operands = {"global_id": reader.read_u16()} mnemonic = "global_address" elif mnemonic is None: return ParseResult(op=None, next_offset=start, end_reason="unknown_opcode", unknown_tail=body[start:]) raw = body[start:reader.offset] op = op_record(start, start, opcode, raw, mnemonic, operands) end_reason = "end_opcode" if opcode == 0x7A else None return ParseResult(op=op, next_offset=reader.offset, end_reason=end_reason) def load_tsv_rows(path: Path) -> list[dict[str, str]]: with path.open("r", encoding="utf-8", newline="") as handle: return list(csv.DictReader(handle, delimiter="\t")) def find_chunk_file(entry_index: int) -> Path: matches = sorted(CHUNKS_DIR.glob(f"chunk_{entry_index}_*.bin")) if not matches: raise FileNotFoundError(f"No chunk file found for entry_index={entry_index}") return matches[0] def select_rows(class_name: str, slot: int) -> tuple[dict[str, str], dict[str, str]]: event_rows = load_tsv_rows(CLASS_EVENT_INDEX) layout_rows = load_tsv_rows(CLASS_LAYOUT_INDEX) event_row = next( ( row for row in event_rows if row["class_name_hint"].upper() == class_name.upper() and try_parse_int(row.get("slot", "")) == slot and try_parse_int(row.get("entry_index", "")) is not None ), None, ) if event_row is None: raise KeyError(f"No class_event_index row found for class={class_name} slot=0x{slot:02X}") if not event_row["derived_body_start"] or not event_row["derived_body_end"]: raise ValueError(f"Selected row has no derived body range for class={class_name} slot=0x{slot:02X}") entry_index = parse_int(event_row["entry_index"]) layout_row = next( (row for row in layout_rows if try_parse_int(row.get("entry_index", "")) == entry_index), None, ) if layout_row is None: raise KeyError(f"No class_layout_index row found for entry_index={entry_index}") return event_row, layout_row def annotation_hints(event_row: dict[str, str], payload_shape_hint: str) -> dict[str, Any]: slot = parse_int(event_row["slot"]) return { "runtime_family": "slot-backed-owner-loaded-body", "payload_shape_hint": payload_shape_hint, "compiled_anchors": [ {"address": "000d:51fd", "role": "slot_value_loader"}, {"address": "000d:5572", "role": "slot_value_plus_offset"}, {"address": "000d:46ec", "role": "context_create_from_slot"}, {"address": "000d:22bc", "role": "matrix_pushback_stage"}, {"address": "000d:ebe3", "role": "opcode_sequence_run"}, ], "slot_taxonomy": {"slot": slot, "event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot)}, } def infer_payload_shape(slot: int) -> str | None: if slot in {0x10, 0x12}: return "none" if slot in {0x0A, 0x0B, 0x11, 0x14}: return "word" if slot == 0x13: return "signed_word" return None def parse_body_ir(event_row: dict[str, str], layout_row: dict[str, str]) -> dict[str, Any]: entry_index = parse_int(event_row["entry_index"]) chunk_file = find_chunk_file(entry_index) chunk_bytes = chunk_file.read_bytes() body_start = parse_int(event_row["derived_body_start"]) body_end = parse_int(event_row["derived_body_end"]) body = chunk_bytes[body_start:body_end] ops: list[dict[str, Any]] = [] offset = 0 end_reason = "body_exhausted" unknown_tail = b"" while offset < len(body): result = parse_one_op(body, offset) if result.op is not None: result.op["absolute_body_offset"] = body_start + result.op["offset"] ops.append(result.op) if result.end_reason is not None: end_reason = result.end_reason unknown_tail = result.unknown_tail or b"" if result.end_reason == "end_opcode": unknown_tail = body[result.next_offset:] offset = result.next_offset break offset = result.next_offset if offset >= len(body) and end_reason == "body_exhausted": unknown_tail = b"" slot = parse_int(event_row["slot"]) payload_shape = infer_payload_shape(slot) return { "schema_version": "crusader-usecode-ir-v1-poc", "source": { "flex_path": "USECODE/EUSECODE.FLX", "extracted_root": "USECODE/EUSECODE_extracted", "chunk_file": str(chunk_file.relative_to(REPO_ROOT)).replace("\\", "/"), }, "class": { "entry_index": entry_index, "object_index": parse_int(layout_row["object_index"]), "class_id": parse_int(layout_row["class_id"]), "class_name": layout_row["class_name_hint"], "raw_code_base_u32": parse_int(layout_row["raw_code_base_u32"]), "code_base_minus_one": parse_int(layout_row["code_base_minus_one"]), "conservative_event_count": parse_int(layout_row["conservative_event_count"]), }, "event": { "slot": slot, "event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot), "raw_event_entry_word": parse_int(event_row["raw_event_entry_word"]), "raw_code_offset": parse_int(event_row["raw_code_offset"]), "derived_body_start": body_start, "derived_body_end": body_end, "derived_body_length": parse_int(event_row["derived_body_length"]), "repeated_template_status": event_row["repeated_template_status"], }, "body": { "end_reason": end_reason, "raw_body_sha1": hashlib.sha1(body).hexdigest(), "unknown_trailing_bytes": unknown_tail.hex(), "decoded_op_count": len(ops), }, "ops": ops, "annotation_hints": annotation_hints(event_row, payload_shape), } # --------------------------------------------------------------------------- # Family diff helpers # --------------------------------------------------------------------------- def _common_prefix_len(a: bytes, b: bytes) -> int: limit = min(len(a), len(b)) for i in range(limit): if a[i] != b[i]: return i return limit def _common_suffix_len(a: bytes, b: bytes, prefix_len: int) -> int: la, lb = len(a), len(b) limit = min(la - prefix_len, lb - prefix_len) for i in range(1, limit + 1): if a[la - i] != b[lb - i]: return i - 1 return limit def compute_family_diff(class_name: str, slot: int) -> dict[str, Any]: """ Find all event rows that share the same repeated_template_status family tag as the named class/slot row, then decode each body and compute pairwise diff statistics against the reference body. Returns a dict with: reference_entry – entry_index for the named class/slot family_tag – repeated_template_status value used for grouping sibling_count – number of additional rows in the same family members – list of per-member records (entry, class, body stats, diff vs ref) """ event_rows = load_tsv_rows(CLASS_EVENT_INDEX) layout_rows = load_tsv_rows(CLASS_LAYOUT_INDEX) layout_by_entry: dict[int, dict[str, str]] = {} for row in layout_rows: idx = try_parse_int(row.get("entry_index", "")) if idx is not None: layout_by_entry[idx] = row # Locate the reference row ref_row = next( ( row for row in event_rows if row["class_name_hint"].upper() == class_name.upper() and try_parse_int(row.get("slot", "")) == slot and try_parse_int(row.get("entry_index", "")) is not None ), None, ) if ref_row is None: raise KeyError(f"No class_event_index row for class={class_name} slot=0x{slot:02X}") family_tag = ref_row.get("repeated_template_status", "").strip() ref_entry = parse_int(ref_row["entry_index"]) # Collect family members – same family_tag if non-empty, else same slot if family_tag and family_tag not in {"", "unique"}: family_rows = [ row for row in event_rows if row.get("repeated_template_status", "").strip() == family_tag and try_parse_int(row.get("entry_index", "")) is not None ] else: # Fall back: same slot across all classes family_rows = [ row for row in event_rows if try_parse_int(row.get("slot", "")) == slot and try_parse_int(row.get("entry_index", "")) is not None ] # Load reference body bytes def _load_body(row: dict[str, str]) -> bytes | None: body_start_str = row.get("derived_body_start", "") body_end_str = row.get("derived_body_end", "") if not body_start_str or not body_end_str: return None try: chunk = find_chunk_file(parse_int(row["entry_index"])) data = chunk.read_bytes() return data[parse_int(body_start_str):parse_int(body_end_str)] except (FileNotFoundError, ValueError): return None ref_body = _load_body(ref_row) if ref_body is None: raise ValueError(f"Cannot load reference body for class={class_name} slot=0x{slot:02X}") members: list[dict[str, Any]] = [] for row in family_rows: entry_idx = parse_int(row["entry_index"]) body = _load_body(row) is_ref = entry_idx == ref_entry member: dict[str, Any] = { "entry_index": entry_idx, "class_name": row["class_name_hint"], "slot": try_parse_int(row.get("slot", "")), "body_length": len(body) if body is not None else None, "is_reference": is_ref, } if body is not None and not is_ref: prefix = _common_prefix_len(ref_body, body) suffix = _common_suffix_len(ref_body, body, prefix) ref_diff_window = ref_body[prefix:len(ref_body) - suffix] if suffix else ref_body[prefix:] member_diff_window = body[prefix:len(body) - suffix] if suffix else body[prefix:] member["diff_vs_reference"] = { "common_prefix_bytes": prefix, "common_suffix_bytes": suffix, "ref_diff_window_hex": ref_diff_window.hex(), "member_diff_window_hex": member_diff_window.hex(), "diff_window_length_ref": len(ref_diff_window), "diff_window_length_member": len(member_diff_window), "identical": ref_body == body, } elif is_ref: member["diff_vs_reference"] = {"identical": True, "note": "reference"} members.append(member) members.sort(key=lambda m: (0 if m["is_reference"] else 1, m["entry_index"])) return { "reference_entry": ref_entry, "reference_class": class_name, "slot": slot, "family_tag": family_tag or f"slot_0x{slot:02X}_all", "member_count": len(members), "sibling_count": len(members) - 1, "members": members, } def render_family_diff_text(diff: dict[str, Any]) -> str: lines = [ f"Family diff: {diff['family_tag']}", f"Reference entry={diff['reference_entry']} class={diff['reference_class']} slot=0x{diff['slot']:02X}", f"Members: {diff['member_count']} Siblings: {diff['sibling_count']}", "", ] for m in diff["members"]: tag = " [REF]" if m["is_reference"] else "" body_len = m["body_length"] if m["body_length"] is not None else "?" lines.append(f" entry={m['entry_index']} class={m['class_name']} slot=0x{m['slot']:02X} body_len={body_len}{tag}") d = m.get("diff_vs_reference") if d and not m["is_reference"]: if d["identical"]: lines.append(" identical to reference") else: lines.append(f" prefix={d['common_prefix_bytes']} suffix={d['common_suffix_bytes']}") lines.append(f" ref_diff_window ({d['diff_window_length_ref']}B): {d['ref_diff_window_hex']}") lines.append(f" mem_diff_window ({d['diff_window_length_member']}B): {d['member_diff_window_hex']}") return "\n".join(lines) + "\n" def render_text(ir: dict[str, Any]) -> str: lines = [ f"Class {ir['class']['class_name']} entry={ir['class']['entry_index']} class_id=0x{ir['class']['class_id']:X}", f"Slot 0x{ir['event']['slot']:02X} hint={ir['event']['event_name_hint']} body=0x{ir['event']['derived_body_start']:04X}..0x{ir['event']['derived_body_end']:04X}", f"End reason: {ir['body']['end_reason']} ops={ir['body']['decoded_op_count']} sha1={ir['body']['raw_body_sha1']}", "", ] for op in ir["ops"]: operand_items = [] for key, value in op["operands"].items(): if isinstance(value, int): operand_items.append(f"{key}=0x{value:X}") else: operand_items.append(f"{key}={value}") lines.append( f"{op['absolute_body_offset']:04X}: {op['opcode']:02X} {op['mnemonic']} {' '.join(operand_items)} raw={op['raw_bytes']}" ) if ir["body"]["unknown_trailing_bytes"]: lines.extend(["", f"unknown_trailing_bytes={ir['body']['unknown_trailing_bytes']}"]) return "\n".join(lines) + "\n" def main() -> None: parser = argparse.ArgumentParser(description="Proof-of-concept Crusader USECODE parser over extracted owner-loaded artifacts") parser.add_argument("--class", dest="class_name", required=True, help="Class name from class_event_index.tsv, for example NPCTRIG") parser.add_argument("--slot", required=True, help="Event slot, for example 0x0A") parser.add_argument("--output", help="Write IR JSON to this file instead of stdout") parser.add_argument("--emit-text", action="store_true", help="Emit a readable text listing beside the JSON") parser.add_argument("--text-output", help="Write the text listing to this file") parser.add_argument("--family-diff", action="store_true", help="Emit repeated-body family diff report instead of (or alongside) the IR") parser.add_argument("--family-diff-output", help="Write the family diff JSON to this file") parser.add_argument("--family-diff-text-output", help="Write the family diff text report to this file") args = parser.parse_args() slot = parse_int(args.slot) event_row, layout_row = select_rows(args.class_name, slot) ir = parse_body_ir(event_row, layout_row) rendered_json = json.dumps(ir, indent=2) if args.output: Path(args.output).write_text(rendered_json + "\n", encoding="utf-8") else: print(rendered_json) if args.emit_text: rendered_text = render_text(ir) if args.text_output: Path(args.text_output).write_text(rendered_text, encoding="utf-8") else: print(rendered_text) if args.family_diff: diff = compute_family_diff(args.class_name, slot) diff_json = json.dumps(diff, indent=2) if args.family_diff_output: Path(args.family_diff_output).write_text(diff_json + "\n", encoding="utf-8") else: print(diff_json) diff_text = render_family_diff_text(diff) if args.family_diff_text_output: Path(args.family_diff_text_output).write_text(diff_text, encoding="utf-8") else: print(diff_text) if __name__ == "__main__": main()