Crusader_Decomp/tools/poc_crusader_usecode_parser.py

1129 lines
40 KiB
Python
Raw Normal View History

from __future__ import annotations
import argparse
import csv
import hashlib
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Any
REPO_ROOT = Path(__file__).resolve().parents[1]
EXTRACTED_ROOT = REPO_ROOT / "USECODE" / "EUSECODE_extracted"
CLASS_EVENT_INDEX = EXTRACTED_ROOT / "class_event_index.tsv"
CLASS_LAYOUT_INDEX = EXTRACTED_ROOT / "class_layout_index.tsv"
CHUNKS_DIR = EXTRACTED_ROOT / "chunks"
EVENT_NAME_HINTS = {
0x00: "look",
0x01: "use",
0x02: "anim",
0x03: "setActivity",
0x04: "cachein",
0x05: "hit",
0x06: "gotHit",
0x07: "hatch",
0x08: "schedule",
0x09: "release",
0x0A: "equip",
0x0B: "unequip",
0x0C: "combine",
0x0D: "func0D",
0x0E: "calledFromAnim",
0x0F: "enterFastArea",
0x10: "leaveFastArea",
0x11: "cast",
0x12: "justMoved",
0x13: "avatarStoleSomething",
0x14: "animGetHit",
0x15: "func15",
0x16: "func16",
0x17: "func17",
0x18: "func18",
0x19: "func19",
0x1A: "func1A",
0x1B: "func1B",
0x1C: "func1C",
0x1D: "func1D",
0x1E: "func1E",
0x1F: "func1F",
}
# Intrinsic table extracted from Pentagram ConvertUsecodeCrusader.h
# Source note: "current discovered intrinsics are for regret1.21 only"
# This is used as a hint only ordinal mapping may differ between builds.
INTRINSIC_HINTS: dict[int, str] = {
0x0000: "Intrinsic0000()",
0x0001: "Item::getFrame(void)",
0x0002: "Item::setFrame(uint16)",
0x0003: "Item::getMapNum(void)",
0x0004: "Item::getStatus(void)",
0x0005: "Item::orStatus(sint16)",
0x0006: "Item::callEvent0A(sint16)",
0x0007: "Intrinsic0007()",
0x0008: "Item::isNpc(void)",
0x0009: "Item::getZ(void)",
0x000A: "Intrinsic000A()",
0x000B: "Item::getQLo(void)",
0x000C: "Item::destroy(void)",
0x000D: "Intrinsic000D()",
0x000E: "Item::getX(void)",
0x000F: "Item::getY(void)",
0x0010: "Intrinsic0010()",
0x0011: "Item::getType(void)",
0x0012: "Intrinsic0012()",
0x0013: "Intrinsic0013()",
0x0014: "Item::legal_create(uint16,uint16,uint16,uint16,uint16)",
0x0015: "Item::andStatus(void)",
0x0016: "Intrinsic0016()",
0x0017: "Intrinsic00C3()",
0x0018: "Intrinsic00DA()",
0x0019: "Intrinsic0019()",
0x001A: "Item::create(uint16,uint16)",
0x001B: "Item::pop(uint16,uint16,uint8)",
0x001C: "Intrinsic00FA()",
0x001D: "Item::push(void)",
0x001E: "Intrinsic001E()",
0x001F: "Item::getQLo(void)",
0x0020: "Item::setQLo(sint16)",
0x0021: "Item::getQHi(void)",
0x0022: "Item::setQHi(sint16)",
0x0023: "Intrinsic0023()",
0x0024: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0025: "Item::getCY(void)",
0x0026: "Item::getCX(void)",
0x0027: "Intrinsic0027()",
0x0028: "Item::setNpcNum(sint16)",
0x0029: "Intrinsic0029()",
0x002A: "Intrinsic002A()",
0x002B: "Item::pop(void)",
0x002C: "Intrinsic002C()",
0x002D: "Item::isCompletelyOn(uint16)",
0x002E: "Item::pop(uint16)",
0x002F: "Intrinsic002F()",
0x0030: "Intrinsic0030()",
0x0031: "Item::getFamily(void)",
0x0032: "Item::destroyContents(void)",
0x0033: "Intrinsic0033()",
0x0034: "Item::getDirToItem(uint16)",
0x0035: "Intrinsic0035()",
0x0036: "Intrinsic0036()",
0x0037: "Intrinsic0037()",
0x0038: "Item::andStatus(void)",
0x0039: "Kernel::resetRef(uint16,ProcessType)",
0x003A: "Item::touch(void)",
0x003B: "Egg::getEggId(void)",
0x003C: "Intrinsic003C()",
0x003D: "Intrinsic003D()",
0x003E: "Item::callEvent11(sint16)",
0x003F: "Intrinsic003F()",
0x0040: "Intrinsic0040()",
0x0041: "Item::isOn(uint16)",
0x0042: "Item::getQHi(void)",
0x0043: "Item::isOn(uint16)",
0x0044: "Item::getQHi(void)",
0x0045: "Item::isOn(uint16)",
0x0046: "Item::getQHi(void)",
0x0047: "Item::isOn(uint16)",
0x0048: "Item::getQHi(void)",
0x0049: "Item::isOn(uint16)",
0x004A: "Item::getQHi(void)",
0x004B: "Item::isOn(uint16)",
0x004C: "Item::getQHi(void)",
0x004D: "Intrinsic004D()",
0x004E: "Npc::isDead(void)",
0x004F: "Intrinsic009C()",
0x0050: "Intrinsic0050()",
0x0051: "Intrinsic0051()",
0x0052: "Intrinsic0052()",
0x0053: "Intrinsic00BD()",
0x0054: "Intrinsic0054()",
0x0055: "Intrinsic0055()",
0x0056: "Intrinsic0056()",
0x0057: "Intrinsic0057()",
0x0058: "Item::use(void)",
0x0059: "Item::setQuantity(sint16)",
0x005A: "Intrinsic005A()",
0x005B: "Item::getSurfaceWeight(void)",
0x005C: "Intrinsic005C()",
0x005D: "Item::setFrame(uint16)",
0x005E: "Intrinsic00DA()",
0x005F: "Intrinsic005F()",
0x0060: "Intrinsic0060()",
0x0061: "Intrinsic0061()",
0x0062: "Intrinsic0062()",
0x0063: "Item::legal_create(uint16,uint16,WorldPoint&)",
0x0064: "Item::getPoint(WorldPoint&)",
0x0065: "Item::legal_move(WorldPoint&,uint16,uint16)",
0x0066: "Item::fall(void)",
0x0067: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0068: "Kernel::getNumProcesses(uint16,ProcessType)",
0x0069: "Item::getCY(void)",
0x006A: "Intrinsic006A()",
0x006B: "Intrinsic006B()",
0x006C: "Intrinsic006C()",
0x006D: "Intrinsic006D()",
0x006E: "Intrinsic006E()",
0x006F: "Item::isInNpc(void)",
0x0070: "Intrinsic0070()",
0x0071: "Intrinsic0071()",
0x0072: "Intrinsic0072()",
0x0073: "Intrinsic0073()",
0x0074: "Npc::isDead(void)",
0x0075: "Item::getNpcNum(void)",
0x0076: "IntrinsicReturn0",
0x0077: "Intrinsic0077()",
0x0078: "Item::callEvent0B(sint16)",
0x0079: "Item::andStatus(void)",
0x007A: "Item::move(uint16,uint16,uint8)",
0x007B: "Intrinsic007B()",
0x007C: "Intrinsic007C()",
0x007D: "Intrinsic007D()",
0x007E: "Intrinsic007E()",
0x007F: "Intrinsic007F()",
0x0080: "Intrinsic0080()",
0x0081: "Intrinsic0081()",
0x0082: "Intrinsic0082()",
0x0083: "Intrinsic0083()",
0x0084: "Intrinsic0084()",
0x0085: "Intrinsic0085()",
0x0086: "teleportToEgg(sint16,int,uint8)",
0x0087: "Intrinsic0087()",
0x0088: "Intrinsic0088()",
0x0089: "Intrinsic00BD()",
0x008A: "Item::getQuality(void)",
0x008B: "Item::setQuality(sint16)",
0x008C: "Intrinsic008C()",
0x008D: "Intrinsic008D()",
0x008E: "Intrinsic008E()",
0x008F: "Camera::getX(void)",
0x0090: "Camera::getY(void)",
0x0091: "Item::setMapNum(sint16)",
0x0092: "Item::getNpcNum(void)",
0x0093: "Item::shoot(WorldPoint&,sint16,sint16)",
0x0094: "Intrinsic0094()",
0x0095: "Item::enterFastArea(void)",
0x0096: "Intrinsic00CA()",
0x0097: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0098: "Item::getNpcNum(void)",
0x0099: "Intrinsic0099()",
0x009A: "teleportToEgg(sint16,uint8)",
0x009B: "Intrinsic009B()",
0x009C: "Intrinsic009C()",
0x009D: "Intrinsic009D()",
0x009E: "Intrinsic009E()",
0x009F: "Intrinsic009F()",
0x00A0: "Item::andStatus(void)",
0x00A1: "Item::getUnkEggType(void)",
0x00A2: "Egg::setEggXRange(uint16)",
0x00A3: "Item::setFrame(uint16)",
0x00A4: "Item::overlaps(uint16)",
0x00A5: "Item::isOn(uint16)",
0x00A6: "Item::getQHi(void)",
0x00A7: "Intrinsic00DA()",
0x00A8: "Item::getCY(void)",
0x00A9: "Intrinsic00A9()",
0x00AA: "Item::isOn(uint16)",
0x00AB: "Npc::isDead(void)",
0x00AC: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00AD: "Intrinsic00AD()",
0x00AE: "Item::getQHi(void)",
0x00AF: "Item::andStatus(void)",
0x00B0: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00B1: "Item::andStatus(void)",
0x00B2: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00B3: "Item::andStatus(void)",
0x00B4: "Item::getDirToCoords(uint16,uint16)",
0x00B5: "Intrinsic00B5()",
0x00B6: "Intrinsic00B6()",
0x00B7: "Item::getNpcNum(void)",
0x00B8: "Item::getCY(void)",
0x00B9: "Item::isOn(uint16)",
0x00BA: "Item::getFootpad(sint16&,sint16&,sint16&)",
0x00BB: "Npc::isDead(void)",
0x00BC: "Intrinsic00BC()",
0x00BD: "Intrinsic00BD()",
0x00BE: "Intrinsic00BE()",
0x00BF: "Item::andStatus(void)",
0x00C0: "Intrinsic00C0()",
0x00C1: "Intrinsic00C1()",
0x00C2: "IntrinsicReturn0",
0x00C3: "Intrinsic00C3()",
0x00C4: "Item::getQHi(void)",
0x00C5: "Item::setQuality(sint16)",
0x00C6: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00C7: "Intrinsic00C7()",
0x00C8: "Intrinsic00C8()",
0x00C9: "Item::callEvent0A(sint16)",
0x00CA: "Intrinsic00CA()",
0x00CB: "Item::isOn(uint16)",
0x00CC: "Intrinsic00CC()",
0x00CD: "Intrinsic00CD()",
0x00CE: "Item::getQHi(void)",
0x00CF: "Item::isOn(uint16)",
0x00D0: "Intrinsic00D0()",
0x00D1: "Intrinsic00D1()",
0x00D2: "Intrinsic00D2()",
0x00D3: "Intrinsic00FA()",
0x00D4: "Camera::getY(void)",
0x00D5: "Intrinsic00D5()",
0x00D6: "Intrinsic00D6()",
0x00D7: "Intrinsic00D7()",
0x00D8: "Intrinsic00D8()",
0x00D9: "Intrinsic00D9()",
0x00DA: "Intrinsic00DA()",
0x00DB: "Intrinsic00DB()",
0x00DC: "Item::getQLo(void)",
0x00DD: "Item::getQHi(void)",
0x00DE: "Item::getNpcNum(void)",
0x00DF: "Intrinsic00DF()",
0x00E0: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E1: "Intrinsic00FA()",
0x00E2: "Item::getQLo(void)",
0x00E3: "Item::getCY(void)",
0x00E4: "Item::getNpcNum(void)",
0x00E5: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E6: "Item::getNpcNum(void)",
0x00E7: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E8: "Item::getNpcNum(void)",
0x00E9: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EA: "Item::getNpcNum(void)",
0x00EB: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EC: "Item::getNpcNum(void)",
0x00ED: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EE: "Item::getNpcNum(void)",
0x00EF: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F0: "Item::getNpcNum(void)",
0x00F1: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F2: "Item::getNpcNum(void)",
0x00F3: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F4: "Item::getNpcNum(void)",
0x00F5: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F6: "Item::getNpcNum(void)",
0x00F7: "Item::andStatus(void)",
0x00F8: "Intrinsic00FA()",
0x00F9: "Item::getQLo(void)",
0x00FA: "Intrinsic00FA()",
0x00FB: "Intrinsic00FB()",
0x00FC: "Intrinsic00FC()",
0x00FD: "Item::getQLo(void)",
0x00FE: "Intrinsic00FE()",
0x00FF: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0100: "Item::andStatus(void)",
0x0101: "Item::isOn(uint16)",
0x0102: "Npc::isDead(void)",
0x0103: "Intrinsic00BD()",
0x0104: "Item::getQHi(void)",
0x0105: "Intrinsic00DA()",
0x0106: "Intrinsic00FA()",
0x0107: "Item::getQLo(void)",
0x0108: "Item::isOn(uint16)",
0x0109: "Item::getQHi(void)",
0x010A: "Item::isOn(uint16)",
0x010B: "Item::getQHi(void)",
0x010C: "Item::hurl(sint16,sint16,sint16,sint16)",
0x010D: "Item::getNpcNum(void)",
0x010E: "Item::getCY(void)",
0x010F: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0110: "Item::isOn(uint16)",
0x0111: "Intrinsic0111()",
0x0112: "IntrinsicReturn0",
0x0113: "Npc::isDead(void)",
0x0114: "Intrinsic0088()",
0x0115: "Intrinsic00C1()",
0x0116: "Item::getQHi(void)",
0x0117: "Intrinsic00BD()",
0x0118: "Item::andStatus(void)",
0x0119: "Item::getNpcNum(void)",
0x011A: "Item::andStatus(void)",
0x011B: "Item::getNpcNum(void)",
0x011C: "Intrinsic011C()",
0x011D: "Item::andStatus(void)",
0x011E: "Item::getNpcNum(void)",
0x011F: "Item::AvatarStoleSomehting(uint16)",
0x0120: "Item::andStatus(void)",
0x0121: "Item::getNpcNum(void)",
0x0122: "Item::getQ(void)",
0x0123: "Item::setQ(uint)",
0x0124: "Item::andStatus(void)",
0x0125: "Item::getNpcNum(void)",
0x0126: "Item::andStatus(void)",
0x0127: "Item::getNpcNum(void)",
0x0128: "Item::andStatus(void)",
0x0129: "Item::getNpcNum(void)",
0x012A: "Item::andStatus(void)",
0x012B: "Item::getNpcNum(void)",
0x012C: "Item::andStatus(void)",
0x012D: "Item::getNpcNum(void)",
0x012E: "Intrinsic00C3()",
0x012F: "Item::andStatus(void)",
0x0130: "Item::getNpcNum(void)",
0x0131: "Intrinsic0131()",
0x0132: "Item::andStatus(void)",
0x0133: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0134: "Item::andStatus(void)",
0x0135: "Camera::getY(void)",
0x0136: "Camera::getZ(void)",
0x0137: "Intrinsic0137()",
0x0138: "Intrinsic009C()",
0x0139: "Item::getTypeFlagCrusader(sint16)",
0x013A: "Item::getNpcNum(void)",
0x013B: "Item::hurl(sint16,sint16,sint16,sint16)",
0x013C: "Item::getCY(void)",
0x013D: "Item::getCZ(void)",
0x013E: "Item::setFrame(uint16)",
0x013F: "Intrinsic013F()",
0x0140: "Intrinsic0140()",
0x0141: "Intrinsic0141()",
0x0142: "Intrinsic0142()",
0x0143: "Npc::isDead(void)",
0x0144: "Intrinsic00FA()",
0x0145: "Intrinsic0145()",
0x0146: "Intrinsic0146()",
0x0147: "Intrinsic0147()",
0x0148: "Item::getNpcNum(void)",
0x0149: "Item::getQLo(void)",
0x014A: "Item::andStatus(void)",
0x014B: "Intrinsic014B()",
0x014C: "Intrinsic014C()",
0x014D: "Intrinsic014D()",
0x014E: "Intrinsic003C()",
0x014F: "Egg::getEggXRange(void)",
0x0150: "Intrinsic009C()",
0x0151: "Intrinsic0072()",
0x0152: "Item::setFrame(uint16)",
0x0153: "Intrinsic00C1()",
0x0154: "Intrinsic00C3()",
0x0155: "Intrinsic00C1()",
0x0156: "Item::isOn(uint16)",
0x0157: "Intrinsic00C3()",
0x0158: "Intrinsic00FA()",
0x0159: "Item::getQHi(void)",
0x015A: "Item::getQLo(void)",
0x015B: "Intrinsic00C1()",
0x015C: "Intrinsic00C3()",
0x015D: "Intrinsic015D()",
}
NO_ARG_MNEMONICS = {
0x08: "pop_result",
0x12: "pop_temp",
0x14: "add",
0x15: "add_dword",
0x16: "concat",
0x17: "append_list",
0x1C: "sub",
0x1D: "sub_dword",
0x1E: "mul",
0x1F: "mul_dword",
0x20: "div",
0x21: "div_dword",
0x22: "mod",
0x23: "mod_dword",
0x24: "cmp",
0x25: "cmp_dword",
0x26: "strcmp",
0x28: "lt",
0x29: "lt_dword",
0x2A: "le",
0x2B: "le_dword",
0x2C: "gt",
0x2D: "gt_dword",
0x2E: "ge",
0x2F: "ge_dword",
0x30: "not",
0x31: "not_dword",
0x32: "and",
0x33: "and_dword",
0x34: "or",
0x35: "or_dword",
0x36: "ne",
0x37: "ne_dword",
0x39: "bit_and",
0x3A: "bit_or",
0x3B: "bit_not",
0x3C: "lsh",
0x3D: "rsh",
0x50: "ret",
0x53: "suspend",
0x59: "push_pid",
0x5D: "push_retval_byte",
0x5E: "push_retval_word",
0x5F: "push_retval_dword",
0x60: "word_to_dword",
0x61: "dword_to_word",
0x6B: "str_to_ptr",
0x6D: "push_process_result",
0x73: "loopnext",
0x77: "set_info",
0x78: "process_exclude",
0x7A: "end",
}
def parse_int(value: str) -> int:
return int(value, 0)
def try_parse_int(value: str) -> int | None:
try:
return parse_int(value)
except (TypeError, ValueError):
return None
def signed_byte(value: int) -> int:
return value - 0x100 if value & 0x80 else value
def bp_repr(value: int) -> str:
disp = signed_byte(value)
sign = "+" if disp >= 0 else "-"
return f"[BP{sign}{abs(disp):02X}h]"
def sp_repr(value: int) -> str:
disp = signed_byte(value)
sign = "+" if disp >= 0 else "-"
return f"[SP{sign}{abs(disp):02X}h]"
@dataclass
class ParseResult:
op: dict[str, Any] | None
next_offset: int
end_reason: str | None = None
unknown_tail: bytes | None = None
class BodyReader:
def __init__(self, data: bytes, offset: int = 0) -> None:
self.data = data
self.offset = offset
def read_u8(self) -> int:
value = self.data[self.offset]
self.offset += 1
return value
def read_u16(self) -> int:
value = int.from_bytes(self.data[self.offset:self.offset + 2], "little")
self.offset += 2
return value
def read_u32(self) -> int:
value = int.from_bytes(self.data[self.offset:self.offset + 4], "little")
self.offset += 4
return value
def read_cstring(self) -> str:
chars: list[str] = []
while self.offset < len(self.data):
byte = self.read_u8()
if byte == 0:
break
chars.append(chr(byte))
return "".join(chars)
def read_fixed_string(self, length: int) -> str:
raw = self.data[self.offset:self.offset + length]
self.offset += length
return raw.decode("latin-1", errors="replace").rstrip("\x00")
def op_record(start: int, absolute_start: int, opcode: int, raw_bytes: bytes, mnemonic: str, operands: dict[str, Any]) -> dict[str, Any]:
return {
"offset": start,
"absolute_body_offset": absolute_start,
"opcode": opcode,
"mnemonic": mnemonic,
"raw_bytes": raw_bytes.hex(),
"operands": operands,
}
def parse_one_op(body: bytes, start: int) -> ParseResult:
reader = BodyReader(body, start)
opcode = reader.read_u8()
operands: dict[str, Any] = {}
mnemonic = NO_ARG_MNEMONICS.get(opcode)
if opcode == 0x00:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_byte"
elif opcode == 0x01:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_word"
elif opcode == 0x02:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_dword"
elif opcode == 0x03:
bp_offset = reader.read_u8()
size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "size": size}
mnemonic = "pop_local_blob"
elif opcode == 0x09:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {
"bp_offset": bp_offset,
"target": bp_repr(bp_offset),
"element_size": element_size,
"slist_flag": slist_flag,
}
mnemonic = "pop_list_element"
elif opcode == 0x0A:
value = reader.read_u8()
operands = {"value_u8": value, "value_signed": signed_byte(value)}
mnemonic = "push_byte_immediate"
elif opcode == 0x0B:
operands = {"value_u16": reader.read_u16()}
mnemonic = "push_word_immediate"
elif opcode == 0x0C:
operands = {"value_u32": reader.read_u32()}
mnemonic = "push_dword_immediate"
elif opcode == 0x0D:
declared_length = reader.read_u16()
text = reader.read_cstring()
operands = {"declared_length": declared_length, "string": text}
mnemonic = "push_string_immediate"
elif opcode == 0x0E:
element_size = reader.read_u8()
count = reader.read_u8()
operands = {"element_size": element_size, "count": count}
mnemonic = "create_list"
elif opcode == 0x0F:
arg_bytes = reader.read_u8()
intrinsic_ordinal = reader.read_u16()
operands = {
"arg_bytes": arg_bytes,
"intrinsic_ordinal": intrinsic_ordinal,
"intrinsic_name_hint": INTRINSIC_HINTS.get(intrinsic_ordinal),
}
mnemonic = "call_intrinsic"
elif opcode == 0x11:
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
operands = {
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
}
mnemonic = "call_class_event"
elif opcode in {0x19, 0x1A, 0x1B}:
element_size = reader.read_u8()
operands = {"element_size": element_size}
mnemonic = {0x19: "append_slist", 0x1A: "remove_slist", 0x1B: "remove_list"}[opcode]
elif opcode == 0x38:
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {"element_size": element_size, "slist_flag": slist_flag}
mnemonic = "in_list"
elif opcode in {0x3E, 0x3F, 0x40, 0x41, 0x43, 0x4B, 0x62, 0x63, 0x64, 0x69}:
bp_offset = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset)}
mnemonic = {
0x3E: "push_local_byte",
0x3F: "push_local_word",
0x40: "push_local_dword",
0x41: "push_local_string",
0x43: "push_local_slist",
0x4B: "push_local_addr",
0x62: "free_local_string",
0x63: "free_local_slist",
0x64: "free_local_list",
0x69: "push_string_ptr",
}[opcode]
elif opcode == 0x42:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "element_size": element_size}
mnemonic = "push_local_list"
elif opcode == 0x44:
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {"element_size": element_size, "slist_flag": slist_flag}
mnemonic = "push_list_element"
elif opcode == 0x45:
byte0 = reader.read_u8()
byte1 = reader.read_u8()
operands = {"value_a": byte0, "value_b": byte1}
mnemonic = "push_huge"
elif opcode in {0x4C, 0x4D}:
size = reader.read_u8()
operands = {"size": size}
mnemonic = {0x4C: "push_indirect", 0x4D: "pop_indirect"}[opcode]
elif opcode in {0x4E, 0x4F}:
global_id = reader.read_u16()
size = reader.read_u8()
operands = {"global_id": global_id, "size": size}
mnemonic = {0x4E: "push_global", 0x4F: "pop_global"}[opcode]
elif opcode in {0x51, 0x52}:
relative = reader.read_u16()
signed_relative = relative - 0x10000 if relative & 0x8000 else relative
target = reader.offset + signed_relative
operands = {"relative_u16": relative, "relative_signed": signed_relative, "target_offset": target}
mnemonic = {0x51: "jne", 0x52: "jmp"}[opcode]
elif opcode == 0x54:
arg0 = reader.read_u8()
arg1 = reader.read_u8()
operands = {"arg0": arg0, "arg1": arg1}
mnemonic = "implies"
elif opcode == 0x57:
arg_bytes = reader.read_u8()
this_size = reader.read_u8()
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
operands = {
"arg_bytes": arg_bytes,
"this_size": this_size,
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
}
mnemonic = "spawn"
elif opcode == 0x58:
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
inline_offset = reader.read_u16()
this_size = reader.read_u8()
unknown = reader.read_u8()
operands = {
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
"inline_offset": inline_offset,
"this_size": this_size,
"unknown": unknown,
}
mnemonic = "spawn_inline"
elif opcode == 0x5A:
operands = {"local_bytes": reader.read_u8()}
mnemonic = "init"
elif opcode == 0x5B:
operands = {"line_number": reader.read_u16()}
mnemonic = "line_number"
elif opcode == 0x5C:
relative = reader.read_u16()
symbol_offset = reader.offset + (relative - 0x10000 if relative & 0x8000 else relative)
symbol = reader.read_fixed_string(8)
trailing_zero = reader.read_u8()
operands = {
"symbol_offset": symbol_offset,
"symbol": symbol,
"trailing_zero": trailing_zero,
}
mnemonic = "symbol_info"
elif opcode in {0x65, 0x66, 0x67, 0x6E, 0x6F, 0x74}:
value = reader.read_u8()
operands = {"value_u8": value}
if opcode in {0x65, 0x66, 0x67}:
operands["target"] = sp_repr(value)
mnemonic = {
0x65: "free_stack_string",
0x66: "free_stack_list",
0x67: "free_stack_slist",
0x6E: "add_sp",
0x6F: "push_stack_addr",
0x74: "loopscr",
}[opcode]
elif opcode == 0x6C:
bp_offset = reader.read_u8()
copy_type = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "copy_type": copy_type}
mnemonic = "param_pid_chg"
elif opcode == 0x70:
current_var = reader.read_u8()
string_bytes = reader.read_u8()
loop_type = reader.read_u8()
operands = {"current_var": current_var, "string_bytes": string_bytes, "loop_type": loop_type}
mnemonic = "loop"
elif opcode in {0x75, 0x76}:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
branch = reader.read_u16()
signed_branch = branch - 0x10000 if branch & 0x8000 else branch
target = reader.offset + signed_branch
operands = {
"bp_offset": bp_offset,
"target_var": bp_repr(bp_offset),
"element_size": element_size,
"relative_u16": branch,
"relative_signed": signed_branch,
"target_offset": target,
}
mnemonic = {0x75: "foreach_list", 0x76: "foreach_slist"}[opcode]
elif opcode == 0x79:
operands = {"global_id": reader.read_u16()}
mnemonic = "global_address"
elif mnemonic is None:
return ParseResult(op=None, next_offset=start, end_reason="unknown_opcode", unknown_tail=body[start:])
raw = body[start:reader.offset]
op = op_record(start, start, opcode, raw, mnemonic, operands)
end_reason = "end_opcode" if opcode == 0x7A else None
return ParseResult(op=op, next_offset=reader.offset, end_reason=end_reason)
def load_tsv_rows(path: Path) -> list[dict[str, str]]:
with path.open("r", encoding="utf-8", newline="") as handle:
return list(csv.DictReader(handle, delimiter="\t"))
def find_chunk_file(entry_index: int) -> Path:
matches = sorted(CHUNKS_DIR.glob(f"chunk_{entry_index}_*.bin"))
if not matches:
raise FileNotFoundError(f"No chunk file found for entry_index={entry_index}")
return matches[0]
def select_rows(class_name: str, slot: int) -> tuple[dict[str, str], dict[str, str]]:
event_rows = load_tsv_rows(CLASS_EVENT_INDEX)
layout_rows = load_tsv_rows(CLASS_LAYOUT_INDEX)
event_row = next(
(
row for row in event_rows
if row["class_name_hint"].upper() == class_name.upper()
and try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
),
None,
)
if event_row is None:
raise KeyError(f"No class_event_index row found for class={class_name} slot=0x{slot:02X}")
if not event_row["derived_body_start"] or not event_row["derived_body_end"]:
raise ValueError(f"Selected row has no derived body range for class={class_name} slot=0x{slot:02X}")
entry_index = parse_int(event_row["entry_index"])
layout_row = next(
(row for row in layout_rows if try_parse_int(row.get("entry_index", "")) == entry_index),
None,
)
if layout_row is None:
raise KeyError(f"No class_layout_index row found for entry_index={entry_index}")
return event_row, layout_row
def annotation_hints(event_row: dict[str, str], payload_shape_hint: str) -> dict[str, Any]:
slot = parse_int(event_row["slot"])
return {
"runtime_family": "slot-backed-owner-loaded-body",
"payload_shape_hint": payload_shape_hint,
"compiled_anchors": [
{"address": "000d:51fd", "role": "slot_value_loader"},
{"address": "000d:5572", "role": "slot_value_plus_offset"},
{"address": "000d:46ec", "role": "context_create_from_slot"},
{"address": "000d:22bc", "role": "matrix_pushback_stage"},
{"address": "000d:ebe3", "role": "opcode_sequence_run"},
],
"slot_taxonomy": {"slot": slot, "event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot)},
}
def infer_payload_shape(slot: int) -> str | None:
if slot in {0x10, 0x12}:
return "none"
if slot in {0x0A, 0x0B, 0x11, 0x14}:
return "word"
if slot == 0x13:
return "signed_word"
return None
def parse_body_ir(event_row: dict[str, str], layout_row: dict[str, str]) -> dict[str, Any]:
entry_index = parse_int(event_row["entry_index"])
chunk_file = find_chunk_file(entry_index)
chunk_bytes = chunk_file.read_bytes()
body_start = parse_int(event_row["derived_body_start"])
body_end = parse_int(event_row["derived_body_end"])
body = chunk_bytes[body_start:body_end]
ops: list[dict[str, Any]] = []
offset = 0
end_reason = "body_exhausted"
unknown_tail = b""
while offset < len(body):
result = parse_one_op(body, offset)
if result.op is not None:
result.op["absolute_body_offset"] = body_start + result.op["offset"]
ops.append(result.op)
if result.end_reason is not None:
end_reason = result.end_reason
unknown_tail = result.unknown_tail or b""
if result.end_reason == "end_opcode":
unknown_tail = body[result.next_offset:]
offset = result.next_offset
break
offset = result.next_offset
if offset >= len(body) and end_reason == "body_exhausted":
unknown_tail = b""
slot = parse_int(event_row["slot"])
payload_shape = infer_payload_shape(slot)
return {
"schema_version": "crusader-usecode-ir-v1-poc",
"source": {
"flex_path": "USECODE/EUSECODE.FLX",
"extracted_root": "USECODE/EUSECODE_extracted",
"chunk_file": str(chunk_file.relative_to(REPO_ROOT)).replace("\\", "/"),
},
"class": {
"entry_index": entry_index,
"object_index": parse_int(layout_row["object_index"]),
"class_id": parse_int(layout_row["class_id"]),
"class_name": layout_row["class_name_hint"],
"raw_code_base_u32": parse_int(layout_row["raw_code_base_u32"]),
"code_base_minus_one": parse_int(layout_row["code_base_minus_one"]),
"conservative_event_count": parse_int(layout_row["conservative_event_count"]),
},
"event": {
"slot": slot,
"event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot),
"raw_event_entry_word": parse_int(event_row["raw_event_entry_word"]),
"raw_code_offset": parse_int(event_row["raw_code_offset"]),
"derived_body_start": body_start,
"derived_body_end": body_end,
"derived_body_length": parse_int(event_row["derived_body_length"]),
"repeated_template_status": event_row["repeated_template_status"],
},
"body": {
"end_reason": end_reason,
"raw_body_sha1": hashlib.sha1(body).hexdigest(),
"unknown_trailing_bytes": unknown_tail.hex(),
"decoded_op_count": len(ops),
},
"ops": ops,
"annotation_hints": annotation_hints(event_row, payload_shape),
}
# ---------------------------------------------------------------------------
# Family diff helpers
# ---------------------------------------------------------------------------
def _common_prefix_len(a: bytes, b: bytes) -> int:
limit = min(len(a), len(b))
for i in range(limit):
if a[i] != b[i]:
return i
return limit
def _common_suffix_len(a: bytes, b: bytes, prefix_len: int) -> int:
la, lb = len(a), len(b)
limit = min(la - prefix_len, lb - prefix_len)
for i in range(1, limit + 1):
if a[la - i] != b[lb - i]:
return i - 1
return limit
def compute_family_diff(class_name: str, slot: int) -> dict[str, Any]:
"""
Find all event rows that share the same repeated_template_status family tag
as the named class/slot row, then decode each body and compute pairwise diff
statistics against the reference body.
Returns a dict with:
reference_entry entry_index for the named class/slot
family_tag repeated_template_status value used for grouping
sibling_count number of additional rows in the same family
members list of per-member records (entry, class, body stats, diff vs ref)
"""
event_rows = load_tsv_rows(CLASS_EVENT_INDEX)
layout_rows = load_tsv_rows(CLASS_LAYOUT_INDEX)
layout_by_entry: dict[int, dict[str, str]] = {}
for row in layout_rows:
idx = try_parse_int(row.get("entry_index", ""))
if idx is not None:
layout_by_entry[idx] = row
# Locate the reference row
ref_row = next(
(
row for row in event_rows
if row["class_name_hint"].upper() == class_name.upper()
and try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
),
None,
)
if ref_row is None:
raise KeyError(f"No class_event_index row for class={class_name} slot=0x{slot:02X}")
family_tag = ref_row.get("repeated_template_status", "").strip()
ref_entry = parse_int(ref_row["entry_index"])
# Collect family members same family_tag if non-empty, else same slot
if family_tag and family_tag not in {"", "unique"}:
family_rows = [
row for row in event_rows
if row.get("repeated_template_status", "").strip() == family_tag
and try_parse_int(row.get("entry_index", "")) is not None
]
else:
# Fall back: same slot across all classes
family_rows = [
row for row in event_rows
if try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
]
# Load reference body bytes
def _load_body(row: dict[str, str]) -> bytes | None:
body_start_str = row.get("derived_body_start", "")
body_end_str = row.get("derived_body_end", "")
if not body_start_str or not body_end_str:
return None
try:
chunk = find_chunk_file(parse_int(row["entry_index"]))
data = chunk.read_bytes()
return data[parse_int(body_start_str):parse_int(body_end_str)]
except (FileNotFoundError, ValueError):
return None
ref_body = _load_body(ref_row)
if ref_body is None:
raise ValueError(f"Cannot load reference body for class={class_name} slot=0x{slot:02X}")
members: list[dict[str, Any]] = []
for row in family_rows:
entry_idx = parse_int(row["entry_index"])
body = _load_body(row)
is_ref = entry_idx == ref_entry
member: dict[str, Any] = {
"entry_index": entry_idx,
"class_name": row["class_name_hint"],
"slot": try_parse_int(row.get("slot", "")),
"body_length": len(body) if body is not None else None,
"is_reference": is_ref,
}
if body is not None and not is_ref:
prefix = _common_prefix_len(ref_body, body)
suffix = _common_suffix_len(ref_body, body, prefix)
ref_diff_window = ref_body[prefix:len(ref_body) - suffix] if suffix else ref_body[prefix:]
member_diff_window = body[prefix:len(body) - suffix] if suffix else body[prefix:]
member["diff_vs_reference"] = {
"common_prefix_bytes": prefix,
"common_suffix_bytes": suffix,
"ref_diff_window_hex": ref_diff_window.hex(),
"member_diff_window_hex": member_diff_window.hex(),
"diff_window_length_ref": len(ref_diff_window),
"diff_window_length_member": len(member_diff_window),
"identical": ref_body == body,
}
elif is_ref:
member["diff_vs_reference"] = {"identical": True, "note": "reference"}
members.append(member)
members.sort(key=lambda m: (0 if m["is_reference"] else 1, m["entry_index"]))
return {
"reference_entry": ref_entry,
"reference_class": class_name,
"slot": slot,
"family_tag": family_tag or f"slot_0x{slot:02X}_all",
"member_count": len(members),
"sibling_count": len(members) - 1,
"members": members,
}
def render_family_diff_text(diff: dict[str, Any]) -> str:
lines = [
f"Family diff: {diff['family_tag']}",
f"Reference entry={diff['reference_entry']} class={diff['reference_class']} slot=0x{diff['slot']:02X}",
f"Members: {diff['member_count']} Siblings: {diff['sibling_count']}",
"",
]
for m in diff["members"]:
tag = " [REF]" if m["is_reference"] else ""
body_len = m["body_length"] if m["body_length"] is not None else "?"
lines.append(f" entry={m['entry_index']} class={m['class_name']} slot=0x{m['slot']:02X} body_len={body_len}{tag}")
d = m.get("diff_vs_reference")
if d and not m["is_reference"]:
if d["identical"]:
lines.append(" identical to reference")
else:
lines.append(f" prefix={d['common_prefix_bytes']} suffix={d['common_suffix_bytes']}")
lines.append(f" ref_diff_window ({d['diff_window_length_ref']}B): {d['ref_diff_window_hex']}")
lines.append(f" mem_diff_window ({d['diff_window_length_member']}B): {d['member_diff_window_hex']}")
return "\n".join(lines) + "\n"
def render_text(ir: dict[str, Any]) -> str:
lines = [
f"Class {ir['class']['class_name']} entry={ir['class']['entry_index']} class_id=0x{ir['class']['class_id']:X}",
f"Slot 0x{ir['event']['slot']:02X} hint={ir['event']['event_name_hint']} body=0x{ir['event']['derived_body_start']:04X}..0x{ir['event']['derived_body_end']:04X}",
f"End reason: {ir['body']['end_reason']} ops={ir['body']['decoded_op_count']} sha1={ir['body']['raw_body_sha1']}",
"",
]
for op in ir["ops"]:
operand_items = []
for key, value in op["operands"].items():
if isinstance(value, int):
operand_items.append(f"{key}=0x{value:X}")
else:
operand_items.append(f"{key}={value}")
lines.append(
f"{op['absolute_body_offset']:04X}: {op['opcode']:02X} {op['mnemonic']} {' '.join(operand_items)} raw={op['raw_bytes']}"
)
if ir["body"]["unknown_trailing_bytes"]:
lines.extend(["", f"unknown_trailing_bytes={ir['body']['unknown_trailing_bytes']}"])
return "\n".join(lines) + "\n"
def main() -> None:
parser = argparse.ArgumentParser(description="Proof-of-concept Crusader USECODE parser over extracted owner-loaded artifacts")
parser.add_argument("--class", dest="class_name", required=True, help="Class name from class_event_index.tsv, for example NPCTRIG")
parser.add_argument("--slot", required=True, help="Event slot, for example 0x0A")
parser.add_argument("--output", help="Write IR JSON to this file instead of stdout")
parser.add_argument("--emit-text", action="store_true", help="Emit a readable text listing beside the JSON")
parser.add_argument("--text-output", help="Write the text listing to this file")
parser.add_argument("--family-diff", action="store_true", help="Emit repeated-body family diff report instead of (or alongside) the IR")
parser.add_argument("--family-diff-output", help="Write the family diff JSON to this file")
parser.add_argument("--family-diff-text-output", help="Write the family diff text report to this file")
args = parser.parse_args()
slot = parse_int(args.slot)
event_row, layout_row = select_rows(args.class_name, slot)
ir = parse_body_ir(event_row, layout_row)
rendered_json = json.dumps(ir, indent=2)
if args.output:
Path(args.output).write_text(rendered_json + "\n", encoding="utf-8")
else:
print(rendered_json)
if args.emit_text:
rendered_text = render_text(ir)
if args.text_output:
Path(args.text_output).write_text(rendered_text, encoding="utf-8")
else:
print(rendered_text)
if args.family_diff:
diff = compute_family_diff(args.class_name, slot)
diff_json = json.dumps(diff, indent=2)
if args.family_diff_output:
Path(args.family_diff_output).write_text(diff_json + "\n", encoding="utf-8")
else:
print(diff_json)
diff_text = render_family_diff_text(diff)
if args.family_diff_text_output:
Path(args.family_diff_text_output).write_text(diff_text, encoding="utf-8")
else:
print(diff_text)
if __name__ == "__main__":
main()