Crusader_Decomp/tools/poc_crusader_usecode_parser.py
MaddoScientisto daa363c3d2 Add 'annotate-usecode' command to import USECODE IR JSON annotations
- Introduced a new command 'annotate-usecode' to import USECODE IR JSON annotation hints as Ghidra comments on compiled anchors.
- Added argument parsing for multiple IR JSON files, comment type selection, and a dry-run option.
- Implemented logic to read annotation records from the provided IR files and set comments on the corresponding addresses in Ghidra.
- Enhanced JSON schema to include response structure for the new command.
2026-03-24 18:14:20 +01:00

1129 lines
No EOL
40 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import argparse
import csv
import hashlib
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Any
REPO_ROOT = Path(__file__).resolve().parents[1]
EXTRACTED_ROOT = REPO_ROOT / "USECODE" / "EUSECODE_extracted"
CLASS_EVENT_INDEX = EXTRACTED_ROOT / "class_event_index.tsv"
CLASS_LAYOUT_INDEX = EXTRACTED_ROOT / "class_layout_index.tsv"
CHUNKS_DIR = EXTRACTED_ROOT / "chunks"
EVENT_NAME_HINTS = {
0x00: "look",
0x01: "use",
0x02: "anim",
0x03: "setActivity",
0x04: "cachein",
0x05: "hit",
0x06: "gotHit",
0x07: "hatch",
0x08: "schedule",
0x09: "release",
0x0A: "equip",
0x0B: "unequip",
0x0C: "combine",
0x0D: "func0D",
0x0E: "calledFromAnim",
0x0F: "enterFastArea",
0x10: "leaveFastArea",
0x11: "cast",
0x12: "justMoved",
0x13: "avatarStoleSomething",
0x14: "animGetHit",
0x15: "func15",
0x16: "func16",
0x17: "func17",
0x18: "func18",
0x19: "func19",
0x1A: "func1A",
0x1B: "func1B",
0x1C: "func1C",
0x1D: "func1D",
0x1E: "func1E",
0x1F: "func1F",
}
# Intrinsic table extracted from Pentagram ConvertUsecodeCrusader.h
# Source note: "current discovered intrinsics are for regret1.21 only"
# This is used as a hint only ordinal mapping may differ between builds.
INTRINSIC_HINTS: dict[int, str] = {
0x0000: "Intrinsic0000()",
0x0001: "Item::getFrame(void)",
0x0002: "Item::setFrame(uint16)",
0x0003: "Item::getMapNum(void)",
0x0004: "Item::getStatus(void)",
0x0005: "Item::orStatus(sint16)",
0x0006: "Item::callEvent0A(sint16)",
0x0007: "Intrinsic0007()",
0x0008: "Item::isNpc(void)",
0x0009: "Item::getZ(void)",
0x000A: "Intrinsic000A()",
0x000B: "Item::getQLo(void)",
0x000C: "Item::destroy(void)",
0x000D: "Intrinsic000D()",
0x000E: "Item::getX(void)",
0x000F: "Item::getY(void)",
0x0010: "Intrinsic0010()",
0x0011: "Item::getType(void)",
0x0012: "Intrinsic0012()",
0x0013: "Intrinsic0013()",
0x0014: "Item::legal_create(uint16,uint16,uint16,uint16,uint16)",
0x0015: "Item::andStatus(void)",
0x0016: "Intrinsic0016()",
0x0017: "Intrinsic00C3()",
0x0018: "Intrinsic00DA()",
0x0019: "Intrinsic0019()",
0x001A: "Item::create(uint16,uint16)",
0x001B: "Item::pop(uint16,uint16,uint8)",
0x001C: "Intrinsic00FA()",
0x001D: "Item::push(void)",
0x001E: "Intrinsic001E()",
0x001F: "Item::getQLo(void)",
0x0020: "Item::setQLo(sint16)",
0x0021: "Item::getQHi(void)",
0x0022: "Item::setQHi(sint16)",
0x0023: "Intrinsic0023()",
0x0024: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0025: "Item::getCY(void)",
0x0026: "Item::getCX(void)",
0x0027: "Intrinsic0027()",
0x0028: "Item::setNpcNum(sint16)",
0x0029: "Intrinsic0029()",
0x002A: "Intrinsic002A()",
0x002B: "Item::pop(void)",
0x002C: "Intrinsic002C()",
0x002D: "Item::isCompletelyOn(uint16)",
0x002E: "Item::pop(uint16)",
0x002F: "Intrinsic002F()",
0x0030: "Intrinsic0030()",
0x0031: "Item::getFamily(void)",
0x0032: "Item::destroyContents(void)",
0x0033: "Intrinsic0033()",
0x0034: "Item::getDirToItem(uint16)",
0x0035: "Intrinsic0035()",
0x0036: "Intrinsic0036()",
0x0037: "Intrinsic0037()",
0x0038: "Item::andStatus(void)",
0x0039: "Kernel::resetRef(uint16,ProcessType)",
0x003A: "Item::touch(void)",
0x003B: "Egg::getEggId(void)",
0x003C: "Intrinsic003C()",
0x003D: "Intrinsic003D()",
0x003E: "Item::callEvent11(sint16)",
0x003F: "Intrinsic003F()",
0x0040: "Intrinsic0040()",
0x0041: "Item::isOn(uint16)",
0x0042: "Item::getQHi(void)",
0x0043: "Item::isOn(uint16)",
0x0044: "Item::getQHi(void)",
0x0045: "Item::isOn(uint16)",
0x0046: "Item::getQHi(void)",
0x0047: "Item::isOn(uint16)",
0x0048: "Item::getQHi(void)",
0x0049: "Item::isOn(uint16)",
0x004A: "Item::getQHi(void)",
0x004B: "Item::isOn(uint16)",
0x004C: "Item::getQHi(void)",
0x004D: "Intrinsic004D()",
0x004E: "Npc::isDead(void)",
0x004F: "Intrinsic009C()",
0x0050: "Intrinsic0050()",
0x0051: "Intrinsic0051()",
0x0052: "Intrinsic0052()",
0x0053: "Intrinsic00BD()",
0x0054: "Intrinsic0054()",
0x0055: "Intrinsic0055()",
0x0056: "Intrinsic0056()",
0x0057: "Intrinsic0057()",
0x0058: "Item::use(void)",
0x0059: "Item::setQuantity(sint16)",
0x005A: "Intrinsic005A()",
0x005B: "Item::getSurfaceWeight(void)",
0x005C: "Intrinsic005C()",
0x005D: "Item::setFrame(uint16)",
0x005E: "Intrinsic00DA()",
0x005F: "Intrinsic005F()",
0x0060: "Intrinsic0060()",
0x0061: "Intrinsic0061()",
0x0062: "Intrinsic0062()",
0x0063: "Item::legal_create(uint16,uint16,WorldPoint&)",
0x0064: "Item::getPoint(WorldPoint&)",
0x0065: "Item::legal_move(WorldPoint&,uint16,uint16)",
0x0066: "Item::fall(void)",
0x0067: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0068: "Kernel::getNumProcesses(uint16,ProcessType)",
0x0069: "Item::getCY(void)",
0x006A: "Intrinsic006A()",
0x006B: "Intrinsic006B()",
0x006C: "Intrinsic006C()",
0x006D: "Intrinsic006D()",
0x006E: "Intrinsic006E()",
0x006F: "Item::isInNpc(void)",
0x0070: "Intrinsic0070()",
0x0071: "Intrinsic0071()",
0x0072: "Intrinsic0072()",
0x0073: "Intrinsic0073()",
0x0074: "Npc::isDead(void)",
0x0075: "Item::getNpcNum(void)",
0x0076: "IntrinsicReturn0",
0x0077: "Intrinsic0077()",
0x0078: "Item::callEvent0B(sint16)",
0x0079: "Item::andStatus(void)",
0x007A: "Item::move(uint16,uint16,uint8)",
0x007B: "Intrinsic007B()",
0x007C: "Intrinsic007C()",
0x007D: "Intrinsic007D()",
0x007E: "Intrinsic007E()",
0x007F: "Intrinsic007F()",
0x0080: "Intrinsic0080()",
0x0081: "Intrinsic0081()",
0x0082: "Intrinsic0082()",
0x0083: "Intrinsic0083()",
0x0084: "Intrinsic0084()",
0x0085: "Intrinsic0085()",
0x0086: "teleportToEgg(sint16,int,uint8)",
0x0087: "Intrinsic0087()",
0x0088: "Intrinsic0088()",
0x0089: "Intrinsic00BD()",
0x008A: "Item::getQuality(void)",
0x008B: "Item::setQuality(sint16)",
0x008C: "Intrinsic008C()",
0x008D: "Intrinsic008D()",
0x008E: "Intrinsic008E()",
0x008F: "Camera::getX(void)",
0x0090: "Camera::getY(void)",
0x0091: "Item::setMapNum(sint16)",
0x0092: "Item::getNpcNum(void)",
0x0093: "Item::shoot(WorldPoint&,sint16,sint16)",
0x0094: "Intrinsic0094()",
0x0095: "Item::enterFastArea(void)",
0x0096: "Intrinsic00CA()",
0x0097: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0098: "Item::getNpcNum(void)",
0x0099: "Intrinsic0099()",
0x009A: "teleportToEgg(sint16,uint8)",
0x009B: "Intrinsic009B()",
0x009C: "Intrinsic009C()",
0x009D: "Intrinsic009D()",
0x009E: "Intrinsic009E()",
0x009F: "Intrinsic009F()",
0x00A0: "Item::andStatus(void)",
0x00A1: "Item::getUnkEggType(void)",
0x00A2: "Egg::setEggXRange(uint16)",
0x00A3: "Item::setFrame(uint16)",
0x00A4: "Item::overlaps(uint16)",
0x00A5: "Item::isOn(uint16)",
0x00A6: "Item::getQHi(void)",
0x00A7: "Intrinsic00DA()",
0x00A8: "Item::getCY(void)",
0x00A9: "Intrinsic00A9()",
0x00AA: "Item::isOn(uint16)",
0x00AB: "Npc::isDead(void)",
0x00AC: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00AD: "Intrinsic00AD()",
0x00AE: "Item::getQHi(void)",
0x00AF: "Item::andStatus(void)",
0x00B0: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00B1: "Item::andStatus(void)",
0x00B2: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00B3: "Item::andStatus(void)",
0x00B4: "Item::getDirToCoords(uint16,uint16)",
0x00B5: "Intrinsic00B5()",
0x00B6: "Intrinsic00B6()",
0x00B7: "Item::getNpcNum(void)",
0x00B8: "Item::getCY(void)",
0x00B9: "Item::isOn(uint16)",
0x00BA: "Item::getFootpad(sint16&,sint16&,sint16&)",
0x00BB: "Npc::isDead(void)",
0x00BC: "Intrinsic00BC()",
0x00BD: "Intrinsic00BD()",
0x00BE: "Intrinsic00BE()",
0x00BF: "Item::andStatus(void)",
0x00C0: "Intrinsic00C0()",
0x00C1: "Intrinsic00C1()",
0x00C2: "IntrinsicReturn0",
0x00C3: "Intrinsic00C3()",
0x00C4: "Item::getQHi(void)",
0x00C5: "Item::setQuality(sint16)",
0x00C6: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00C7: "Intrinsic00C7()",
0x00C8: "Intrinsic00C8()",
0x00C9: "Item::callEvent0A(sint16)",
0x00CA: "Intrinsic00CA()",
0x00CB: "Item::isOn(uint16)",
0x00CC: "Intrinsic00CC()",
0x00CD: "Intrinsic00CD()",
0x00CE: "Item::getQHi(void)",
0x00CF: "Item::isOn(uint16)",
0x00D0: "Intrinsic00D0()",
0x00D1: "Intrinsic00D1()",
0x00D2: "Intrinsic00D2()",
0x00D3: "Intrinsic00FA()",
0x00D4: "Camera::getY(void)",
0x00D5: "Intrinsic00D5()",
0x00D6: "Intrinsic00D6()",
0x00D7: "Intrinsic00D7()",
0x00D8: "Intrinsic00D8()",
0x00D9: "Intrinsic00D9()",
0x00DA: "Intrinsic00DA()",
0x00DB: "Intrinsic00DB()",
0x00DC: "Item::getQLo(void)",
0x00DD: "Item::getQHi(void)",
0x00DE: "Item::getNpcNum(void)",
0x00DF: "Intrinsic00DF()",
0x00E0: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E1: "Intrinsic00FA()",
0x00E2: "Item::getQLo(void)",
0x00E3: "Item::getCY(void)",
0x00E4: "Item::getNpcNum(void)",
0x00E5: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E6: "Item::getNpcNum(void)",
0x00E7: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E8: "Item::getNpcNum(void)",
0x00E9: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EA: "Item::getNpcNum(void)",
0x00EB: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EC: "Item::getNpcNum(void)",
0x00ED: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EE: "Item::getNpcNum(void)",
0x00EF: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F0: "Item::getNpcNum(void)",
0x00F1: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F2: "Item::getNpcNum(void)",
0x00F3: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F4: "Item::getNpcNum(void)",
0x00F5: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F6: "Item::getNpcNum(void)",
0x00F7: "Item::andStatus(void)",
0x00F8: "Intrinsic00FA()",
0x00F9: "Item::getQLo(void)",
0x00FA: "Intrinsic00FA()",
0x00FB: "Intrinsic00FB()",
0x00FC: "Intrinsic00FC()",
0x00FD: "Item::getQLo(void)",
0x00FE: "Intrinsic00FE()",
0x00FF: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0100: "Item::andStatus(void)",
0x0101: "Item::isOn(uint16)",
0x0102: "Npc::isDead(void)",
0x0103: "Intrinsic00BD()",
0x0104: "Item::getQHi(void)",
0x0105: "Intrinsic00DA()",
0x0106: "Intrinsic00FA()",
0x0107: "Item::getQLo(void)",
0x0108: "Item::isOn(uint16)",
0x0109: "Item::getQHi(void)",
0x010A: "Item::isOn(uint16)",
0x010B: "Item::getQHi(void)",
0x010C: "Item::hurl(sint16,sint16,sint16,sint16)",
0x010D: "Item::getNpcNum(void)",
0x010E: "Item::getCY(void)",
0x010F: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0110: "Item::isOn(uint16)",
0x0111: "Intrinsic0111()",
0x0112: "IntrinsicReturn0",
0x0113: "Npc::isDead(void)",
0x0114: "Intrinsic0088()",
0x0115: "Intrinsic00C1()",
0x0116: "Item::getQHi(void)",
0x0117: "Intrinsic00BD()",
0x0118: "Item::andStatus(void)",
0x0119: "Item::getNpcNum(void)",
0x011A: "Item::andStatus(void)",
0x011B: "Item::getNpcNum(void)",
0x011C: "Intrinsic011C()",
0x011D: "Item::andStatus(void)",
0x011E: "Item::getNpcNum(void)",
0x011F: "Item::AvatarStoleSomehting(uint16)",
0x0120: "Item::andStatus(void)",
0x0121: "Item::getNpcNum(void)",
0x0122: "Item::getQ(void)",
0x0123: "Item::setQ(uint)",
0x0124: "Item::andStatus(void)",
0x0125: "Item::getNpcNum(void)",
0x0126: "Item::andStatus(void)",
0x0127: "Item::getNpcNum(void)",
0x0128: "Item::andStatus(void)",
0x0129: "Item::getNpcNum(void)",
0x012A: "Item::andStatus(void)",
0x012B: "Item::getNpcNum(void)",
0x012C: "Item::andStatus(void)",
0x012D: "Item::getNpcNum(void)",
0x012E: "Intrinsic00C3()",
0x012F: "Item::andStatus(void)",
0x0130: "Item::getNpcNum(void)",
0x0131: "Intrinsic0131()",
0x0132: "Item::andStatus(void)",
0x0133: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0134: "Item::andStatus(void)",
0x0135: "Camera::getY(void)",
0x0136: "Camera::getZ(void)",
0x0137: "Intrinsic0137()",
0x0138: "Intrinsic009C()",
0x0139: "Item::getTypeFlagCrusader(sint16)",
0x013A: "Item::getNpcNum(void)",
0x013B: "Item::hurl(sint16,sint16,sint16,sint16)",
0x013C: "Item::getCY(void)",
0x013D: "Item::getCZ(void)",
0x013E: "Item::setFrame(uint16)",
0x013F: "Intrinsic013F()",
0x0140: "Intrinsic0140()",
0x0141: "Intrinsic0141()",
0x0142: "Intrinsic0142()",
0x0143: "Npc::isDead(void)",
0x0144: "Intrinsic00FA()",
0x0145: "Intrinsic0145()",
0x0146: "Intrinsic0146()",
0x0147: "Intrinsic0147()",
0x0148: "Item::getNpcNum(void)",
0x0149: "Item::getQLo(void)",
0x014A: "Item::andStatus(void)",
0x014B: "Intrinsic014B()",
0x014C: "Intrinsic014C()",
0x014D: "Intrinsic014D()",
0x014E: "Intrinsic003C()",
0x014F: "Egg::getEggXRange(void)",
0x0150: "Intrinsic009C()",
0x0151: "Intrinsic0072()",
0x0152: "Item::setFrame(uint16)",
0x0153: "Intrinsic00C1()",
0x0154: "Intrinsic00C3()",
0x0155: "Intrinsic00C1()",
0x0156: "Item::isOn(uint16)",
0x0157: "Intrinsic00C3()",
0x0158: "Intrinsic00FA()",
0x0159: "Item::getQHi(void)",
0x015A: "Item::getQLo(void)",
0x015B: "Intrinsic00C1()",
0x015C: "Intrinsic00C3()",
0x015D: "Intrinsic015D()",
}
NO_ARG_MNEMONICS = {
0x08: "pop_result",
0x12: "pop_temp",
0x14: "add",
0x15: "add_dword",
0x16: "concat",
0x17: "append_list",
0x1C: "sub",
0x1D: "sub_dword",
0x1E: "mul",
0x1F: "mul_dword",
0x20: "div",
0x21: "div_dword",
0x22: "mod",
0x23: "mod_dword",
0x24: "cmp",
0x25: "cmp_dword",
0x26: "strcmp",
0x28: "lt",
0x29: "lt_dword",
0x2A: "le",
0x2B: "le_dword",
0x2C: "gt",
0x2D: "gt_dword",
0x2E: "ge",
0x2F: "ge_dword",
0x30: "not",
0x31: "not_dword",
0x32: "and",
0x33: "and_dword",
0x34: "or",
0x35: "or_dword",
0x36: "ne",
0x37: "ne_dword",
0x39: "bit_and",
0x3A: "bit_or",
0x3B: "bit_not",
0x3C: "lsh",
0x3D: "rsh",
0x50: "ret",
0x53: "suspend",
0x59: "push_pid",
0x5D: "push_retval_byte",
0x5E: "push_retval_word",
0x5F: "push_retval_dword",
0x60: "word_to_dword",
0x61: "dword_to_word",
0x6B: "str_to_ptr",
0x6D: "push_process_result",
0x73: "loopnext",
0x77: "set_info",
0x78: "process_exclude",
0x7A: "end",
}
def parse_int(value: str) -> int:
return int(value, 0)
def try_parse_int(value: str) -> int | None:
try:
return parse_int(value)
except (TypeError, ValueError):
return None
def signed_byte(value: int) -> int:
return value - 0x100 if value & 0x80 else value
def bp_repr(value: int) -> str:
disp = signed_byte(value)
sign = "+" if disp >= 0 else "-"
return f"[BP{sign}{abs(disp):02X}h]"
def sp_repr(value: int) -> str:
disp = signed_byte(value)
sign = "+" if disp >= 0 else "-"
return f"[SP{sign}{abs(disp):02X}h]"
@dataclass
class ParseResult:
op: dict[str, Any] | None
next_offset: int
end_reason: str | None = None
unknown_tail: bytes | None = None
class BodyReader:
def __init__(self, data: bytes, offset: int = 0) -> None:
self.data = data
self.offset = offset
def read_u8(self) -> int:
value = self.data[self.offset]
self.offset += 1
return value
def read_u16(self) -> int:
value = int.from_bytes(self.data[self.offset:self.offset + 2], "little")
self.offset += 2
return value
def read_u32(self) -> int:
value = int.from_bytes(self.data[self.offset:self.offset + 4], "little")
self.offset += 4
return value
def read_cstring(self) -> str:
chars: list[str] = []
while self.offset < len(self.data):
byte = self.read_u8()
if byte == 0:
break
chars.append(chr(byte))
return "".join(chars)
def read_fixed_string(self, length: int) -> str:
raw = self.data[self.offset:self.offset + length]
self.offset += length
return raw.decode("latin-1", errors="replace").rstrip("\x00")
def op_record(start: int, absolute_start: int, opcode: int, raw_bytes: bytes, mnemonic: str, operands: dict[str, Any]) -> dict[str, Any]:
return {
"offset": start,
"absolute_body_offset": absolute_start,
"opcode": opcode,
"mnemonic": mnemonic,
"raw_bytes": raw_bytes.hex(),
"operands": operands,
}
def parse_one_op(body: bytes, start: int) -> ParseResult:
reader = BodyReader(body, start)
opcode = reader.read_u8()
operands: dict[str, Any] = {}
mnemonic = NO_ARG_MNEMONICS.get(opcode)
if opcode == 0x00:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_byte"
elif opcode == 0x01:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_word"
elif opcode == 0x02:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_dword"
elif opcode == 0x03:
bp_offset = reader.read_u8()
size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "size": size}
mnemonic = "pop_local_blob"
elif opcode == 0x09:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {
"bp_offset": bp_offset,
"target": bp_repr(bp_offset),
"element_size": element_size,
"slist_flag": slist_flag,
}
mnemonic = "pop_list_element"
elif opcode == 0x0A:
value = reader.read_u8()
operands = {"value_u8": value, "value_signed": signed_byte(value)}
mnemonic = "push_byte_immediate"
elif opcode == 0x0B:
operands = {"value_u16": reader.read_u16()}
mnemonic = "push_word_immediate"
elif opcode == 0x0C:
operands = {"value_u32": reader.read_u32()}
mnemonic = "push_dword_immediate"
elif opcode == 0x0D:
declared_length = reader.read_u16()
text = reader.read_cstring()
operands = {"declared_length": declared_length, "string": text}
mnemonic = "push_string_immediate"
elif opcode == 0x0E:
element_size = reader.read_u8()
count = reader.read_u8()
operands = {"element_size": element_size, "count": count}
mnemonic = "create_list"
elif opcode == 0x0F:
arg_bytes = reader.read_u8()
intrinsic_ordinal = reader.read_u16()
operands = {
"arg_bytes": arg_bytes,
"intrinsic_ordinal": intrinsic_ordinal,
"intrinsic_name_hint": INTRINSIC_HINTS.get(intrinsic_ordinal),
}
mnemonic = "call_intrinsic"
elif opcode == 0x11:
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
operands = {
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
}
mnemonic = "call_class_event"
elif opcode in {0x19, 0x1A, 0x1B}:
element_size = reader.read_u8()
operands = {"element_size": element_size}
mnemonic = {0x19: "append_slist", 0x1A: "remove_slist", 0x1B: "remove_list"}[opcode]
elif opcode == 0x38:
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {"element_size": element_size, "slist_flag": slist_flag}
mnemonic = "in_list"
elif opcode in {0x3E, 0x3F, 0x40, 0x41, 0x43, 0x4B, 0x62, 0x63, 0x64, 0x69}:
bp_offset = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset)}
mnemonic = {
0x3E: "push_local_byte",
0x3F: "push_local_word",
0x40: "push_local_dword",
0x41: "push_local_string",
0x43: "push_local_slist",
0x4B: "push_local_addr",
0x62: "free_local_string",
0x63: "free_local_slist",
0x64: "free_local_list",
0x69: "push_string_ptr",
}[opcode]
elif opcode == 0x42:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "element_size": element_size}
mnemonic = "push_local_list"
elif opcode == 0x44:
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {"element_size": element_size, "slist_flag": slist_flag}
mnemonic = "push_list_element"
elif opcode == 0x45:
byte0 = reader.read_u8()
byte1 = reader.read_u8()
operands = {"value_a": byte0, "value_b": byte1}
mnemonic = "push_huge"
elif opcode in {0x4C, 0x4D}:
size = reader.read_u8()
operands = {"size": size}
mnemonic = {0x4C: "push_indirect", 0x4D: "pop_indirect"}[opcode]
elif opcode in {0x4E, 0x4F}:
global_id = reader.read_u16()
size = reader.read_u8()
operands = {"global_id": global_id, "size": size}
mnemonic = {0x4E: "push_global", 0x4F: "pop_global"}[opcode]
elif opcode in {0x51, 0x52}:
relative = reader.read_u16()
signed_relative = relative - 0x10000 if relative & 0x8000 else relative
target = reader.offset + signed_relative
operands = {"relative_u16": relative, "relative_signed": signed_relative, "target_offset": target}
mnemonic = {0x51: "jne", 0x52: "jmp"}[opcode]
elif opcode == 0x54:
arg0 = reader.read_u8()
arg1 = reader.read_u8()
operands = {"arg0": arg0, "arg1": arg1}
mnemonic = "implies"
elif opcode == 0x57:
arg_bytes = reader.read_u8()
this_size = reader.read_u8()
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
operands = {
"arg_bytes": arg_bytes,
"this_size": this_size,
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
}
mnemonic = "spawn"
elif opcode == 0x58:
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
inline_offset = reader.read_u16()
this_size = reader.read_u8()
unknown = reader.read_u8()
operands = {
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
"inline_offset": inline_offset,
"this_size": this_size,
"unknown": unknown,
}
mnemonic = "spawn_inline"
elif opcode == 0x5A:
operands = {"local_bytes": reader.read_u8()}
mnemonic = "init"
elif opcode == 0x5B:
operands = {"line_number": reader.read_u16()}
mnemonic = "line_number"
elif opcode == 0x5C:
relative = reader.read_u16()
symbol_offset = reader.offset + (relative - 0x10000 if relative & 0x8000 else relative)
symbol = reader.read_fixed_string(8)
trailing_zero = reader.read_u8()
operands = {
"symbol_offset": symbol_offset,
"symbol": symbol,
"trailing_zero": trailing_zero,
}
mnemonic = "symbol_info"
elif opcode in {0x65, 0x66, 0x67, 0x6E, 0x6F, 0x74}:
value = reader.read_u8()
operands = {"value_u8": value}
if opcode in {0x65, 0x66, 0x67}:
operands["target"] = sp_repr(value)
mnemonic = {
0x65: "free_stack_string",
0x66: "free_stack_list",
0x67: "free_stack_slist",
0x6E: "add_sp",
0x6F: "push_stack_addr",
0x74: "loopscr",
}[opcode]
elif opcode == 0x6C:
bp_offset = reader.read_u8()
copy_type = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "copy_type": copy_type}
mnemonic = "param_pid_chg"
elif opcode == 0x70:
current_var = reader.read_u8()
string_bytes = reader.read_u8()
loop_type = reader.read_u8()
operands = {"current_var": current_var, "string_bytes": string_bytes, "loop_type": loop_type}
mnemonic = "loop"
elif opcode in {0x75, 0x76}:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
branch = reader.read_u16()
signed_branch = branch - 0x10000 if branch & 0x8000 else branch
target = reader.offset + signed_branch
operands = {
"bp_offset": bp_offset,
"target_var": bp_repr(bp_offset),
"element_size": element_size,
"relative_u16": branch,
"relative_signed": signed_branch,
"target_offset": target,
}
mnemonic = {0x75: "foreach_list", 0x76: "foreach_slist"}[opcode]
elif opcode == 0x79:
operands = {"global_id": reader.read_u16()}
mnemonic = "global_address"
elif mnemonic is None:
return ParseResult(op=None, next_offset=start, end_reason="unknown_opcode", unknown_tail=body[start:])
raw = body[start:reader.offset]
op = op_record(start, start, opcode, raw, mnemonic, operands)
end_reason = "end_opcode" if opcode == 0x7A else None
return ParseResult(op=op, next_offset=reader.offset, end_reason=end_reason)
def load_tsv_rows(path: Path) -> list[dict[str, str]]:
with path.open("r", encoding="utf-8", newline="") as handle:
return list(csv.DictReader(handle, delimiter="\t"))
def find_chunk_file(entry_index: int) -> Path:
matches = sorted(CHUNKS_DIR.glob(f"chunk_{entry_index}_*.bin"))
if not matches:
raise FileNotFoundError(f"No chunk file found for entry_index={entry_index}")
return matches[0]
def select_rows(class_name: str, slot: int) -> tuple[dict[str, str], dict[str, str]]:
event_rows = load_tsv_rows(CLASS_EVENT_INDEX)
layout_rows = load_tsv_rows(CLASS_LAYOUT_INDEX)
event_row = next(
(
row for row in event_rows
if row["class_name_hint"].upper() == class_name.upper()
and try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
),
None,
)
if event_row is None:
raise KeyError(f"No class_event_index row found for class={class_name} slot=0x{slot:02X}")
if not event_row["derived_body_start"] or not event_row["derived_body_end"]:
raise ValueError(f"Selected row has no derived body range for class={class_name} slot=0x{slot:02X}")
entry_index = parse_int(event_row["entry_index"])
layout_row = next(
(row for row in layout_rows if try_parse_int(row.get("entry_index", "")) == entry_index),
None,
)
if layout_row is None:
raise KeyError(f"No class_layout_index row found for entry_index={entry_index}")
return event_row, layout_row
def annotation_hints(event_row: dict[str, str], payload_shape_hint: str) -> dict[str, Any]:
slot = parse_int(event_row["slot"])
return {
"runtime_family": "slot-backed-owner-loaded-body",
"payload_shape_hint": payload_shape_hint,
"compiled_anchors": [
{"address": "000d:51fd", "role": "slot_value_loader"},
{"address": "000d:5572", "role": "slot_value_plus_offset"},
{"address": "000d:46ec", "role": "context_create_from_slot"},
{"address": "000d:22bc", "role": "matrix_pushback_stage"},
{"address": "000d:ebe3", "role": "opcode_sequence_run"},
],
"slot_taxonomy": {"slot": slot, "event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot)},
}
def infer_payload_shape(slot: int) -> str | None:
if slot in {0x10, 0x12}:
return "none"
if slot in {0x0A, 0x0B, 0x11, 0x14}:
return "word"
if slot == 0x13:
return "signed_word"
return None
def parse_body_ir(event_row: dict[str, str], layout_row: dict[str, str]) -> dict[str, Any]:
entry_index = parse_int(event_row["entry_index"])
chunk_file = find_chunk_file(entry_index)
chunk_bytes = chunk_file.read_bytes()
body_start = parse_int(event_row["derived_body_start"])
body_end = parse_int(event_row["derived_body_end"])
body = chunk_bytes[body_start:body_end]
ops: list[dict[str, Any]] = []
offset = 0
end_reason = "body_exhausted"
unknown_tail = b""
while offset < len(body):
result = parse_one_op(body, offset)
if result.op is not None:
result.op["absolute_body_offset"] = body_start + result.op["offset"]
ops.append(result.op)
if result.end_reason is not None:
end_reason = result.end_reason
unknown_tail = result.unknown_tail or b""
if result.end_reason == "end_opcode":
unknown_tail = body[result.next_offset:]
offset = result.next_offset
break
offset = result.next_offset
if offset >= len(body) and end_reason == "body_exhausted":
unknown_tail = b""
slot = parse_int(event_row["slot"])
payload_shape = infer_payload_shape(slot)
return {
"schema_version": "crusader-usecode-ir-v1-poc",
"source": {
"flex_path": "USECODE/EUSECODE.FLX",
"extracted_root": "USECODE/EUSECODE_extracted",
"chunk_file": str(chunk_file.relative_to(REPO_ROOT)).replace("\\", "/"),
},
"class": {
"entry_index": entry_index,
"object_index": parse_int(layout_row["object_index"]),
"class_id": parse_int(layout_row["class_id"]),
"class_name": layout_row["class_name_hint"],
"raw_code_base_u32": parse_int(layout_row["raw_code_base_u32"]),
"code_base_minus_one": parse_int(layout_row["code_base_minus_one"]),
"conservative_event_count": parse_int(layout_row["conservative_event_count"]),
},
"event": {
"slot": slot,
"event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot),
"raw_event_entry_word": parse_int(event_row["raw_event_entry_word"]),
"raw_code_offset": parse_int(event_row["raw_code_offset"]),
"derived_body_start": body_start,
"derived_body_end": body_end,
"derived_body_length": parse_int(event_row["derived_body_length"]),
"repeated_template_status": event_row["repeated_template_status"],
},
"body": {
"end_reason": end_reason,
"raw_body_sha1": hashlib.sha1(body).hexdigest(),
"unknown_trailing_bytes": unknown_tail.hex(),
"decoded_op_count": len(ops),
},
"ops": ops,
"annotation_hints": annotation_hints(event_row, payload_shape),
}
# ---------------------------------------------------------------------------
# Family diff helpers
# ---------------------------------------------------------------------------
def _common_prefix_len(a: bytes, b: bytes) -> int:
limit = min(len(a), len(b))
for i in range(limit):
if a[i] != b[i]:
return i
return limit
def _common_suffix_len(a: bytes, b: bytes, prefix_len: int) -> int:
la, lb = len(a), len(b)
limit = min(la - prefix_len, lb - prefix_len)
for i in range(1, limit + 1):
if a[la - i] != b[lb - i]:
return i - 1
return limit
def compute_family_diff(class_name: str, slot: int) -> dict[str, Any]:
"""
Find all event rows that share the same repeated_template_status family tag
as the named class/slot row, then decode each body and compute pairwise diff
statistics against the reference body.
Returns a dict with:
reference_entry entry_index for the named class/slot
family_tag repeated_template_status value used for grouping
sibling_count number of additional rows in the same family
members list of per-member records (entry, class, body stats, diff vs ref)
"""
event_rows = load_tsv_rows(CLASS_EVENT_INDEX)
layout_rows = load_tsv_rows(CLASS_LAYOUT_INDEX)
layout_by_entry: dict[int, dict[str, str]] = {}
for row in layout_rows:
idx = try_parse_int(row.get("entry_index", ""))
if idx is not None:
layout_by_entry[idx] = row
# Locate the reference row
ref_row = next(
(
row for row in event_rows
if row["class_name_hint"].upper() == class_name.upper()
and try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
),
None,
)
if ref_row is None:
raise KeyError(f"No class_event_index row for class={class_name} slot=0x{slot:02X}")
family_tag = ref_row.get("repeated_template_status", "").strip()
ref_entry = parse_int(ref_row["entry_index"])
# Collect family members same family_tag if non-empty, else same slot
if family_tag and family_tag not in {"", "unique"}:
family_rows = [
row for row in event_rows
if row.get("repeated_template_status", "").strip() == family_tag
and try_parse_int(row.get("entry_index", "")) is not None
]
else:
# Fall back: same slot across all classes
family_rows = [
row for row in event_rows
if try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
]
# Load reference body bytes
def _load_body(row: dict[str, str]) -> bytes | None:
body_start_str = row.get("derived_body_start", "")
body_end_str = row.get("derived_body_end", "")
if not body_start_str or not body_end_str:
return None
try:
chunk = find_chunk_file(parse_int(row["entry_index"]))
data = chunk.read_bytes()
return data[parse_int(body_start_str):parse_int(body_end_str)]
except (FileNotFoundError, ValueError):
return None
ref_body = _load_body(ref_row)
if ref_body is None:
raise ValueError(f"Cannot load reference body for class={class_name} slot=0x{slot:02X}")
members: list[dict[str, Any]] = []
for row in family_rows:
entry_idx = parse_int(row["entry_index"])
body = _load_body(row)
is_ref = entry_idx == ref_entry
member: dict[str, Any] = {
"entry_index": entry_idx,
"class_name": row["class_name_hint"],
"slot": try_parse_int(row.get("slot", "")),
"body_length": len(body) if body is not None else None,
"is_reference": is_ref,
}
if body is not None and not is_ref:
prefix = _common_prefix_len(ref_body, body)
suffix = _common_suffix_len(ref_body, body, prefix)
ref_diff_window = ref_body[prefix:len(ref_body) - suffix] if suffix else ref_body[prefix:]
member_diff_window = body[prefix:len(body) - suffix] if suffix else body[prefix:]
member["diff_vs_reference"] = {
"common_prefix_bytes": prefix,
"common_suffix_bytes": suffix,
"ref_diff_window_hex": ref_diff_window.hex(),
"member_diff_window_hex": member_diff_window.hex(),
"diff_window_length_ref": len(ref_diff_window),
"diff_window_length_member": len(member_diff_window),
"identical": ref_body == body,
}
elif is_ref:
member["diff_vs_reference"] = {"identical": True, "note": "reference"}
members.append(member)
members.sort(key=lambda m: (0 if m["is_reference"] else 1, m["entry_index"]))
return {
"reference_entry": ref_entry,
"reference_class": class_name,
"slot": slot,
"family_tag": family_tag or f"slot_0x{slot:02X}_all",
"member_count": len(members),
"sibling_count": len(members) - 1,
"members": members,
}
def render_family_diff_text(diff: dict[str, Any]) -> str:
lines = [
f"Family diff: {diff['family_tag']}",
f"Reference entry={diff['reference_entry']} class={diff['reference_class']} slot=0x{diff['slot']:02X}",
f"Members: {diff['member_count']} Siblings: {diff['sibling_count']}",
"",
]
for m in diff["members"]:
tag = " [REF]" if m["is_reference"] else ""
body_len = m["body_length"] if m["body_length"] is not None else "?"
lines.append(f" entry={m['entry_index']} class={m['class_name']} slot=0x{m['slot']:02X} body_len={body_len}{tag}")
d = m.get("diff_vs_reference")
if d and not m["is_reference"]:
if d["identical"]:
lines.append(" identical to reference")
else:
lines.append(f" prefix={d['common_prefix_bytes']} suffix={d['common_suffix_bytes']}")
lines.append(f" ref_diff_window ({d['diff_window_length_ref']}B): {d['ref_diff_window_hex']}")
lines.append(f" mem_diff_window ({d['diff_window_length_member']}B): {d['member_diff_window_hex']}")
return "\n".join(lines) + "\n"
def render_text(ir: dict[str, Any]) -> str:
lines = [
f"Class {ir['class']['class_name']} entry={ir['class']['entry_index']} class_id=0x{ir['class']['class_id']:X}",
f"Slot 0x{ir['event']['slot']:02X} hint={ir['event']['event_name_hint']} body=0x{ir['event']['derived_body_start']:04X}..0x{ir['event']['derived_body_end']:04X}",
f"End reason: {ir['body']['end_reason']} ops={ir['body']['decoded_op_count']} sha1={ir['body']['raw_body_sha1']}",
"",
]
for op in ir["ops"]:
operand_items = []
for key, value in op["operands"].items():
if isinstance(value, int):
operand_items.append(f"{key}=0x{value:X}")
else:
operand_items.append(f"{key}={value}")
lines.append(
f"{op['absolute_body_offset']:04X}: {op['opcode']:02X} {op['mnemonic']} {' '.join(operand_items)} raw={op['raw_bytes']}"
)
if ir["body"]["unknown_trailing_bytes"]:
lines.extend(["", f"unknown_trailing_bytes={ir['body']['unknown_trailing_bytes']}"])
return "\n".join(lines) + "\n"
def main() -> None:
parser = argparse.ArgumentParser(description="Proof-of-concept Crusader USECODE parser over extracted owner-loaded artifacts")
parser.add_argument("--class", dest="class_name", required=True, help="Class name from class_event_index.tsv, for example NPCTRIG")
parser.add_argument("--slot", required=True, help="Event slot, for example 0x0A")
parser.add_argument("--output", help="Write IR JSON to this file instead of stdout")
parser.add_argument("--emit-text", action="store_true", help="Emit a readable text listing beside the JSON")
parser.add_argument("--text-output", help="Write the text listing to this file")
parser.add_argument("--family-diff", action="store_true", help="Emit repeated-body family diff report instead of (or alongside) the IR")
parser.add_argument("--family-diff-output", help="Write the family diff JSON to this file")
parser.add_argument("--family-diff-text-output", help="Write the family diff text report to this file")
args = parser.parse_args()
slot = parse_int(args.slot)
event_row, layout_row = select_rows(args.class_name, slot)
ir = parse_body_ir(event_row, layout_row)
rendered_json = json.dumps(ir, indent=2)
if args.output:
Path(args.output).write_text(rendered_json + "\n", encoding="utf-8")
else:
print(rendered_json)
if args.emit_text:
rendered_text = render_text(ir)
if args.text_output:
Path(args.text_output).write_text(rendered_text, encoding="utf-8")
else:
print(rendered_text)
if args.family_diff:
diff = compute_family_diff(args.class_name, slot)
diff_json = json.dumps(diff, indent=2)
if args.family_diff_output:
Path(args.family_diff_output).write_text(diff_json + "\n", encoding="utf-8")
else:
print(diff_json)
diff_text = render_family_diff_text(diff)
if args.family_diff_text_output:
Path(args.family_diff_text_output).write_text(diff_text, encoding="utf-8")
else:
print(diff_text)
if __name__ == "__main__":
main()