Crusader_Decomp/tools/poc_crusader_usecode_parser.py

3146 lines
No EOL
116 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import argparse
import ast
import csv
import hashlib
import json
import re
from functools import lru_cache
from dataclasses import dataclass
from pathlib import Path
from typing import Any
REPO_ROOT = Path(__file__).resolve().parents[1]
EXTRACTED_ROOT = REPO_ROOT / "USECODE" / "EUSECODE_extracted"
CLASS_EVENT_INDEX = EXTRACTED_ROOT / "class_event_index.tsv"
CLASS_LAYOUT_INDEX = EXTRACTED_ROOT / "class_layout_index.tsv"
RUNTIME_VM_IR_INDEX = EXTRACTED_ROOT / "runtime_vm_ir.tsv"
CHUNKS_DIR = EXTRACTED_ROOT / "chunks"
UNKCOFFS_DIR = REPO_ROOT / "tools" / "unkcoffs"
DEFAULT_GAME_VARIANT = "regret"
INTRINSIC_HINT_PATHS = {
"regret": UNKCOFFS_DIR / "regret_ints.py",
"remorse": UNKCOFFS_DIR / "remorse_ints.py",
}
def resolve_extracted_root(extracted_root: Path | str | None = None) -> Path:
if extracted_root is None:
return EXTRACTED_ROOT
return Path(extracted_root)
def extracted_root_paths(extracted_root: Path | str | None = None) -> tuple[Path, Path, Path, Path]:
root = resolve_extracted_root(extracted_root)
return (
root / "class_event_index.tsv",
root / "class_layout_index.tsv",
root / "runtime_vm_ir.tsv",
root / "chunks",
)
def repo_relative_path(path: Path) -> str:
try:
return str(path.relative_to(REPO_ROOT)).replace("\\", "/")
except ValueError:
return str(path).replace("\\", "/")
def infer_flex_path(extracted_root: Path | str | None = None) -> str:
root = resolve_extracted_root(extracted_root)
parent = root.parent
if parent == REPO_ROOT:
return "EUSECODE.FLX"
return f"{repo_relative_path(parent)}/EUSECODE.FLX"
EVENT_NAME_HINTS = {
0x00: "look",
0x01: "use",
0x02: "anim",
0x03: "setActivity",
0x04: "cachein",
0x05: "hit",
0x06: "gotHit",
0x07: "hatch",
0x08: "schedule",
0x09: "release",
0x0A: "equip",
0x0B: "unequip",
0x0C: "combine",
0x0D: "func0D",
0x0E: "calledFromAnim",
0x0F: "enterFastArea",
0x10: "leaveFastArea",
0x11: "cast",
0x12: "justMoved",
0x13: "avatarStoleSomething",
0x14: "animGetHit",
0x15: "func15",
0x16: "func16",
0x17: "func17",
0x18: "func18",
0x19: "func19",
0x1A: "func1A",
0x1B: "func1B",
0x1C: "func1C",
0x1D: "func1D",
0x1E: "func1E",
0x1F: "func1F",
}
# Intrinsic table extracted from Pentagram ConvertUsecodeCrusader.h
# Source note: "current discovered intrinsics are for regret1.21 only"
# This is used as a hint only ordinal mapping may differ between builds.
BASE_INTRINSIC_HINTS: dict[int, str] = {
0x0000: "Intrinsic0000()",
0x0001: "Item::getFrame(void)",
0x0002: "Item::setFrame(uint16)",
0x0003: "Item::getMapNum(void)",
0x0004: "Item::getStatus(void)",
0x0005: "Item::orStatus(sint16)",
0x0006: "Item::callEvent0A(sint16)",
0x0007: "Intrinsic0007()",
0x0008: "Item::isNpc(void)",
0x0009: "Item::getZ(void)",
0x000A: "Intrinsic000A()",
0x000B: "Item::getQLo(void)",
0x000C: "Item::destroy(void)",
0x000D: "Intrinsic000D()",
0x000E: "Item::getX(void)",
0x000F: "Item::getY(void)",
0x0010: "Intrinsic0010()",
0x0011: "Item::getType(void)",
0x0012: "Intrinsic0012()",
0x0013: "Intrinsic0013()",
0x0014: "Item::legal_create(uint16,uint16,uint16,uint16,uint16)",
0x0015: "Item::andStatus(void)",
0x0016: "Intrinsic0016()",
0x0017: "Intrinsic00C3()",
0x0018: "Intrinsic00DA()",
0x0019: "Intrinsic0019()",
0x001A: "Item::create(uint16,uint16)",
0x001B: "Item::pop(uint16,uint16,uint8)",
0x001C: "Intrinsic00FA()",
0x001D: "Item::push(void)",
0x001E: "Intrinsic001E()",
0x001F: "Item::getQLo(void)",
0x0020: "Item::setQLo(sint16)",
0x0021: "Item::getQHi(void)",
0x0022: "Item::setQHi(sint16)",
0x0023: "Intrinsic0023()",
0x0024: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0025: "Item::getCY(void)",
0x0026: "Item::getCX(void)",
0x0027: "Intrinsic0027()",
0x0028: "Item::setNpcNum(sint16)",
0x0029: "Intrinsic0029()",
0x002A: "Intrinsic002A()",
0x002B: "Item::pop(void)",
0x002C: "Intrinsic002C()",
0x002D: "Item::isCompletelyOn(uint16)",
0x002E: "Item::pop(uint16)",
0x002F: "Intrinsic002F()",
0x0030: "Intrinsic0030()",
0x0031: "Item::getFamily(void)",
0x0032: "Item::destroyContents(void)",
0x0033: "Intrinsic0033()",
0x0034: "Item::getDirToItem(uint16)",
0x0035: "Intrinsic0035()",
0x0036: "Intrinsic0036()",
0x0037: "Intrinsic0037()",
0x0038: "Item::andStatus(void)",
0x0039: "Kernel::resetRef(uint16,ProcessType)",
0x003A: "Item::touch(void)",
0x003B: "Egg::getEggId(void)",
0x003C: "Intrinsic003C()",
0x003D: "Intrinsic003D()",
0x003E: "Item::callEvent11(sint16)",
0x003F: "Intrinsic003F()",
0x0040: "Intrinsic0040()",
0x0041: "Item::isOn(uint16)",
0x0042: "Item::getQHi(void)",
0x0043: "Item::isOn(uint16)",
0x0044: "Item::getQHi(void)",
0x0045: "Item::isOn(uint16)",
0x0046: "Item::getQHi(void)",
0x0047: "Item::isOn(uint16)",
0x0048: "Item::getQHi(void)",
0x0049: "Item::isOn(uint16)",
0x004A: "Item::getQHi(void)",
0x004B: "Item::isOn(uint16)",
0x004C: "Item::getQHi(void)",
0x004D: "Intrinsic004D()",
0x004E: "Npc::isDead(void)",
0x004F: "Intrinsic009C()",
0x0050: "Intrinsic0050()",
0x0051: "Intrinsic0051()",
0x0052: "Intrinsic0052()",
0x0053: "Intrinsic00BD()",
0x0054: "Intrinsic0054()",
0x0055: "Intrinsic0055()",
0x0056: "Intrinsic0056()",
0x0057: "Intrinsic0057()",
0x0058: "Item::use(void)",
0x0059: "Item::setQuantity(sint16)",
0x005A: "Intrinsic005A()",
0x005B: "Item::getSurfaceWeight(void)",
0x005C: "Intrinsic005C()",
0x005D: "Item::setFrame(uint16)",
0x005E: "Intrinsic00DA()",
0x005F: "Intrinsic005F()",
0x0060: "Intrinsic0060()",
0x0061: "Intrinsic0061()",
0x0062: "Intrinsic0062()",
0x0063: "Item::legal_create(uint16,uint16,WorldPoint&)",
0x0064: "Item::getPoint(WorldPoint&)",
0x0065: "Item::legal_move(WorldPoint&,uint16,uint16)",
0x0066: "Item::fall(void)",
0x0067: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0068: "Kernel::getNumProcesses(uint16,ProcessType)",
0x0069: "Item::getCY(void)",
0x006A: "Intrinsic006A()",
0x006B: "Intrinsic006B()",
0x006C: "Intrinsic006C()",
0x006D: "Intrinsic006D()",
0x006E: "Intrinsic006E()",
0x006F: "Item::isInNpc(void)",
0x0070: "Intrinsic0070()",
0x0071: "Intrinsic0071()",
0x0072: "Intrinsic0072()",
0x0073: "Intrinsic0073()",
0x0074: "Npc::isDead(void)",
0x0075: "Item::getNpcNum(void)",
0x0076: "IntrinsicReturn0",
0x0077: "Intrinsic0077()",
0x0078: "Item::callEvent0B(sint16)",
0x0079: "Item::andStatus(void)",
0x007A: "Item::move(uint16,uint16,uint8)",
0x007B: "Intrinsic007B()",
0x007C: "Intrinsic007C()",
0x007D: "Intrinsic007D()",
0x007E: "Intrinsic007E()",
0x007F: "Intrinsic007F()",
0x0080: "Intrinsic0080()",
0x0081: "Intrinsic0081()",
0x0082: "Intrinsic0082()",
0x0083: "Intrinsic0083()",
0x0084: "Intrinsic0084()",
0x0085: "Intrinsic0085()",
0x0086: "teleportToEgg(sint16,int,uint8)",
0x0087: "Intrinsic0087()",
0x0088: "Intrinsic0088()",
0x0089: "Intrinsic00BD()",
0x008A: "Item::getQuality(void)",
0x008B: "Item::setQuality(sint16)",
0x008C: "Intrinsic008C()",
0x008D: "Intrinsic008D()",
0x008E: "Intrinsic008E()",
0x008F: "Camera::getX(void)",
0x0090: "Camera::getY(void)",
0x0091: "Item::setMapNum(sint16)",
0x0092: "Item::getNpcNum(void)",
0x0093: "Item::shoot(WorldPoint&,sint16,sint16)",
0x0094: "Intrinsic0094()",
0x0095: "Item::enterFastArea(void)",
0x0096: "Intrinsic00CA()",
0x0097: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0098: "Item::getNpcNum(void)",
0x0099: "Intrinsic0099()",
0x009A: "teleportToEgg(sint16,uint8)",
0x009B: "Intrinsic009B()",
0x009C: "Intrinsic009C()",
0x009D: "Intrinsic009D()",
0x009E: "Intrinsic009E()",
0x009F: "Intrinsic009F()",
0x00A0: "Item::andStatus(void)",
0x00A1: "Item::getUnkEggType(void)",
0x00A2: "Egg::setEggXRange(uint16)",
0x00A3: "Item::setFrame(uint16)",
0x00A4: "Item::overlaps(uint16)",
0x00A5: "Item::isOn(uint16)",
0x00A6: "Item::getQHi(void)",
0x00A7: "Intrinsic00DA()",
0x00A8: "Item::getCY(void)",
0x00A9: "Intrinsic00A9()",
0x00AA: "Item::isOn(uint16)",
0x00AB: "Npc::isDead(void)",
0x00AC: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00AD: "Intrinsic00AD()",
0x00AE: "Item::getQHi(void)",
0x00AF: "Item::andStatus(void)",
0x00B0: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00B1: "Item::andStatus(void)",
0x00B2: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00B3: "Item::andStatus(void)",
0x00B4: "Item::getDirToCoords(uint16,uint16)",
0x00B5: "Intrinsic00B5()",
0x00B6: "Intrinsic00B6()",
0x00B7: "Item::getNpcNum(void)",
0x00B8: "Item::getCY(void)",
0x00B9: "Item::isOn(uint16)",
0x00BA: "Item::getFootpad(sint16&,sint16&,sint16&)",
0x00BB: "Npc::isDead(void)",
0x00BC: "Intrinsic00BC()",
0x00BD: "Intrinsic00BD()",
0x00BE: "Intrinsic00BE()",
0x00BF: "Item::andStatus(void)",
0x00C0: "Intrinsic00C0()",
0x00C1: "Intrinsic00C1()",
0x00C2: "IntrinsicReturn0",
0x00C3: "Intrinsic00C3()",
0x00C4: "Item::getQHi(void)",
0x00C5: "Item::setQuality(sint16)",
0x00C6: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00C7: "Intrinsic00C7()",
0x00C8: "Intrinsic00C8()",
0x00C9: "Item::callEvent0A(sint16)",
0x00CA: "Intrinsic00CA()",
0x00CB: "Item::isOn(uint16)",
0x00CC: "Intrinsic00CC()",
0x00CD: "Intrinsic00CD()",
0x00CE: "Item::getQHi(void)",
0x00CF: "Item::isOn(uint16)",
0x00D0: "Intrinsic00D0()",
0x00D1: "Intrinsic00D1()",
0x00D2: "Intrinsic00D2()",
0x00D3: "Intrinsic00FA()",
0x00D4: "Camera::getY(void)",
0x00D5: "Intrinsic00D5()",
0x00D6: "Intrinsic00D6()",
0x00D7: "Intrinsic00D7()",
0x00D8: "Intrinsic00D8()",
0x00D9: "Intrinsic00D9()",
0x00DA: "Intrinsic00DA()",
0x00DB: "Intrinsic00DB()",
0x00DC: "Item::getQLo(void)",
0x00DD: "Item::getQHi(void)",
0x00DE: "Item::getNpcNum(void)",
0x00DF: "Intrinsic00DF()",
0x00E0: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E1: "Intrinsic00FA()",
0x00E2: "Item::getQLo(void)",
0x00E3: "Item::getCY(void)",
0x00E4: "Item::getNpcNum(void)",
0x00E5: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E6: "Item::getNpcNum(void)",
0x00E7: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00E8: "Item::getNpcNum(void)",
0x00E9: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EA: "Item::getNpcNum(void)",
0x00EB: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EC: "Item::getNpcNum(void)",
0x00ED: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00EE: "Item::getNpcNum(void)",
0x00EF: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F0: "Item::getNpcNum(void)",
0x00F1: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F2: "Item::getNpcNum(void)",
0x00F3: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F4: "Item::getNpcNum(void)",
0x00F5: "Item::hurl(sint16,sint16,sint16,sint16)",
0x00F6: "Item::getNpcNum(void)",
0x00F7: "Item::andStatus(void)",
0x00F8: "Intrinsic00FA()",
0x00F9: "Item::getQLo(void)",
0x00FA: "Intrinsic00FA()",
0x00FB: "Intrinsic00FB()",
0x00FC: "Intrinsic00FC()",
0x00FD: "Item::getQLo(void)",
0x00FE: "Intrinsic00FE()",
0x00FF: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0100: "Item::andStatus(void)",
0x0101: "Item::isOn(uint16)",
0x0102: "Npc::isDead(void)",
0x0103: "Intrinsic00BD()",
0x0104: "Item::getQHi(void)",
0x0105: "Intrinsic00DA()",
0x0106: "Intrinsic00FA()",
0x0107: "Item::getQLo(void)",
0x0108: "Item::isOn(uint16)",
0x0109: "Item::getQHi(void)",
0x010A: "Item::isOn(uint16)",
0x010B: "Item::getQHi(void)",
0x010C: "Item::hurl(sint16,sint16,sint16,sint16)",
0x010D: "Item::getNpcNum(void)",
0x010E: "Item::getCY(void)",
0x010F: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0110: "Item::isOn(uint16)",
0x0111: "Intrinsic0111()",
0x0112: "IntrinsicReturn0",
0x0113: "Npc::isDead(void)",
0x0114: "Intrinsic0088()",
0x0115: "Intrinsic00C1()",
0x0116: "Item::getQHi(void)",
0x0117: "Intrinsic00BD()",
0x0118: "Item::andStatus(void)",
0x0119: "Item::getNpcNum(void)",
0x011A: "Item::andStatus(void)",
0x011B: "Item::getNpcNum(void)",
0x011C: "Intrinsic011C()",
0x011D: "Item::andStatus(void)",
0x011E: "Item::getNpcNum(void)",
0x011F: "Item::AvatarStoleSomehting(uint16)",
0x0120: "Item::andStatus(void)",
0x0121: "Item::getNpcNum(void)",
0x0122: "Item::getQ(void)",
0x0123: "Item::setQ(uint)",
0x0124: "Item::andStatus(void)",
0x0125: "Item::getNpcNum(void)",
0x0126: "Item::andStatus(void)",
0x0127: "Item::getNpcNum(void)",
0x0128: "Item::andStatus(void)",
0x0129: "Item::getNpcNum(void)",
0x012A: "Item::andStatus(void)",
0x012B: "Item::getNpcNum(void)",
0x012C: "Item::andStatus(void)",
0x012D: "Item::getNpcNum(void)",
0x012E: "Intrinsic00C3()",
0x012F: "Item::andStatus(void)",
0x0130: "Item::getNpcNum(void)",
0x0131: "Intrinsic0131()",
0x0132: "Item::andStatus(void)",
0x0133: "Item::hurl(sint16,sint16,sint16,sint16)",
0x0134: "Item::andStatus(void)",
0x0135: "Camera::getY(void)",
0x0136: "Camera::getZ(void)",
0x0137: "Intrinsic0137()",
0x0138: "Intrinsic009C()",
0x0139: "Item::getTypeFlagCrusader(sint16)",
0x013A: "Item::getNpcNum(void)",
0x013B: "Item::hurl(sint16,sint16,sint16,sint16)",
0x013C: "Item::getCY(void)",
0x013D: "Item::getCZ(void)",
0x013E: "Item::setFrame(uint16)",
0x013F: "Intrinsic013F()",
0x0140: "Intrinsic0140()",
0x0141: "Intrinsic0141()",
0x0142: "Intrinsic0142()",
0x0143: "Npc::isDead(void)",
0x0144: "Intrinsic00FA()",
0x0145: "Intrinsic0145()",
0x0146: "Intrinsic0146()",
0x0147: "Intrinsic0147()",
0x0148: "Item::getNpcNum(void)",
0x0149: "Item::getQLo(void)",
0x014A: "Item::andStatus(void)",
0x014B: "Intrinsic014B()",
0x014C: "Intrinsic014C()",
0x014D: "Intrinsic014D()",
0x014E: "Intrinsic003C()",
0x014F: "Egg::getEggXRange(void)",
0x0150: "Intrinsic009C()",
0x0151: "Intrinsic0072()",
0x0152: "Item::setFrame(uint16)",
0x0153: "Intrinsic00C1()",
0x0154: "Intrinsic00C3()",
0x0155: "Intrinsic00C1()",
0x0156: "Item::isOn(uint16)",
0x0157: "Intrinsic00C3()",
0x0158: "Intrinsic00FA()",
0x0159: "Item::getQHi(void)",
0x015A: "Item::getQLo(void)",
0x015B: "Intrinsic00C1()",
0x015C: "Intrinsic00C3()",
0x015D: "Intrinsic015D()",
}
VARIANT_INTRINSIC_CALLSITE_HINTS: dict[str, dict[tuple[int, int], str]] = {
"regret": {
(0x001E, 0x10): "Item::I_fireWeapon(Item *, x, y, z, byte, int, byte)",
},
"remorse": {},
}
CLASS_EVENT_NAME_HINTS: dict[tuple[int, int], str] = {
(0x0A0C, 0x32): "waitNTimerTicks",
}
LOOP_SELECTOR_FIELD_HINTS = {
0x3A: "family",
0x40: "shape",
}
SHAPE_CATALOG_FILENAME = "usecode_shape_catalog.csv"
SHAPE_CATALOG_FILENAMES = {
"remorse": "usecode_shape_catalog_remorse.csv",
"regret": "usecode_shape_catalog_regret.csv",
}
SHAPE_CATALOG_FIELDNAMES = ["shape_code", "human_readable_id", "description"]
NUMERIC_SHAPE_LITERAL_PATTERN = r"(?:0x[0-9A-Fa-f]+|\d+)"
SHAPE_REFERENCE_PATTERNS = (
re.compile(rf"(?P<prefix>\bshape=)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"),
re.compile(
rf"(?P<prefix>\bItem\.(?:getShape|getType)\([^\)\n]*\)\s*(?:==|!=|<=|>=|<|>)\s*)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"
),
re.compile(rf"(?P<prefix>\bItem\.create\(\s*[^,\n]+,\s*)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"),
re.compile(rf"(?P<prefix>\bItem\.legal_create\(\s*)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"),
)
ShapeCatalog = dict[int, dict[str, str]]
def infer_shape_catalog_variant(extracted_root: Path | str | None = None, game_variant: str | None = None) -> str | None:
normalized = normalize_game_variant(game_variant)
if normalized is not None:
return normalized
root = resolve_extracted_root(extracted_root)
inferred = infer_game_variant_from_path(root)
if inferred is not None:
return inferred
try:
relative_root = root.resolve().relative_to(REPO_ROOT.resolve())
except ValueError:
relative_root = None
if relative_root is not None:
relative_parts = tuple(part.lower() for part in relative_root.parts)
if relative_parts[:2] == ("usecode", "eusecode_extracted"):
return "remorse"
if relative_parts[:3] == ("usecode", "regret", "regret_usecode_extracted"):
return "regret"
return None
def default_shape_catalog_path(
extracted_root: Path | str | None = None,
game_variant: str | None = None,
) -> Path:
root = resolve_extracted_root(extracted_root)
variant = infer_shape_catalog_variant(root, game_variant)
filename = SHAPE_CATALOG_FILENAMES.get(variant, SHAPE_CATALOG_FILENAME)
return root / filename
def format_shape_code(shape_code: int) -> str:
return f"0x{shape_code:04X}"
def load_shape_catalog(path: Path | str | None) -> ShapeCatalog:
if path is None:
return {}
shape_path = Path(path)
if not shape_path.exists():
return {}
catalog: ShapeCatalog = {}
with shape_path.open("r", encoding="utf-8", newline="") as handle:
reader = csv.DictReader(handle)
for row in reader:
shape_code = try_parse_int((row.get("shape_code") or "").strip())
if shape_code is None:
continue
catalog[shape_code] = {
"shape_code": format_shape_code(shape_code),
"human_readable_id": (row.get("human_readable_id") or "").strip(),
"description": row.get("description") or "",
}
return catalog
def shape_catalog_identifier(shape_code: int, shape_catalog: ShapeCatalog | None = None) -> str | None:
if not shape_catalog:
return None
row = shape_catalog.get(shape_code)
if row is None:
return None
human_readable_id = (row.get("human_readable_id") or "").strip()
if not human_readable_id:
return None
return sanitize_identifier(human_readable_id)
def format_shape_reference(
shape_code: int,
shape_catalog: ShapeCatalog | None = None,
frame_expr: str | None = None,
) -> str:
base = shape_catalog_identifier(shape_code, shape_catalog) or format_shape_code(shape_code)
if frame_expr is None or not frame_expr.strip():
return base
return f"{base}[{frame_expr}]"
def iter_shape_code_matches(text: str):
for pattern in SHAPE_REFERENCE_PATTERNS:
for match in pattern.finditer(text):
shape_code = try_parse_int(match.group("value"))
if shape_code is not None:
yield shape_code
def collect_shape_codes_from_pseudocode(text: str) -> set[int]:
return set(iter_shape_code_matches(text))
def apply_shape_catalog_to_pseudocode(text: str, shape_catalog: ShapeCatalog | None = None) -> str:
if not shape_catalog:
return text
def replace_match(match: re.Match[str]) -> str:
shape_code = try_parse_int(match.group("value"))
if shape_code is None:
return match.group(0)
shape_id = shape_catalog_identifier(shape_code, shape_catalog)
if shape_id is None:
return match.group(0)
return f"{match.group('prefix')}{shape_id}"
rendered = text
for pattern in SHAPE_REFERENCE_PATTERNS:
rendered = pattern.sub(replace_match, rendered)
return rendered
def generic_loop_selector_call(name: str, arguments: list[tuple[str, str]]) -> str:
rendered_args = ", ".join(f"{label}={expr}" for label, expr in arguments)
return f"{name}({rendered_args})"
def normalize_game_variant(value: str | None) -> str | None:
if value is None:
return None
normalized = value.strip().lower()
if not normalized or normalized == "auto":
return None
if normalized not in INTRINSIC_HINT_PATHS:
raise ValueError(f"Unsupported Crusader variant: {value}")
return normalized
def infer_game_variant_from_path(path: Path | None) -> str | None:
if path is None:
return None
lowered_parts = [part.lower() for part in path.parts]
if any("regret" in part for part in lowered_parts):
return "regret"
if any("remorse" in part for part in lowered_parts):
return "remorse"
return None
def resolve_game_variant(game_variant: str | None = None, source_root: Path | None = None) -> str:
normalized = normalize_game_variant(game_variant)
if normalized is not None:
return normalized
inferred = infer_game_variant_from_path(source_root)
if inferred is not None:
return inferred
return DEFAULT_GAME_VARIANT
def load_intrinsic_hints_from_file(path: Path) -> dict[int, str]:
if not path.exists():
return {}
try:
module = ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
except (OSError, SyntaxError):
return {}
for node in module.body:
if not isinstance(node, ast.Assign):
continue
if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name):
continue
if node.targets[0].id != "intrinsics":
continue
try:
values = ast.literal_eval(node.value)
except (SyntaxError, ValueError):
return {}
if not isinstance(values, list):
return {}
return {
index: str(value)
for index, value in enumerate(values)
if isinstance(value, str) and value.strip()
}
return {}
def normalize_intrinsic_hint(name: str) -> str:
normalized = name.strip()
normalized = re.sub(r"^(?:unsigned|signed|void|byte|char|short|long|int\d+|uint\d+|sint\d+)\s+(?=[A-Za-z_])", "", normalized)
normalized = re.sub(r"(?<![A-Za-z])udioProcess::", "AudioProcess::", normalized)
normalized = normalized.replace("MusicProcess:I_", "MusicProcess::I_")
normalized = normalized.replace("Somehting", "Something")
normalized = normalized.replace("Actor::I_setDead())", "Actor::I_setDead()")
return normalized
def build_intrinsic_hints(game_variant: str | None = None, source_root: Path | None = None) -> dict[int, str]:
variant = resolve_game_variant(game_variant, source_root)
hints = {index: normalize_intrinsic_hint(name) for index, name in BASE_INTRINSIC_HINTS.items()}
for index, name in load_intrinsic_hints_from_file(INTRINSIC_HINT_PATHS[variant]).items():
normalized = normalize_intrinsic_hint(name)
existing = hints.get(index)
if existing is None or not normalized.startswith("Intrinsic") or existing.startswith("Intrinsic"):
hints[index] = normalized
return hints
_INTRINSIC_HINTS_CACHE: dict[str, dict[int, str]] = {}
def get_intrinsic_hints(game_variant: str | None = None, source_root: Path | None = None) -> dict[int, str]:
variant = resolve_game_variant(game_variant, source_root)
cached = _INTRINSIC_HINTS_CACHE.get(variant)
if cached is None:
cached = build_intrinsic_hints(variant)
_INTRINSIC_HINTS_CACHE[variant] = cached
return cached
def get_intrinsic_callsite_hints(game_variant: str | None = None, source_root: Path | None = None) -> dict[tuple[int, int], str]:
variant = resolve_game_variant(game_variant, source_root)
return VARIANT_INTRINSIC_CALLSITE_HINTS.get(variant, {})
INTRINSIC_HINTS = get_intrinsic_hints(DEFAULT_GAME_VARIANT)
NO_ARG_MNEMONICS = {
0x08: "pop_result",
0x12: "pop_temp",
0x13: "pop_temp_dword",
0x14: "add",
0x15: "add_dword",
0x16: "concat",
0x17: "append_list",
0x1C: "sub",
0x1D: "sub_dword",
0x1E: "mul",
0x1F: "mul_dword",
0x20: "div",
0x21: "div_dword",
0x22: "mod",
0x23: "mod_dword",
0x24: "cmp",
0x25: "cmp_dword",
0x26: "strcmp",
0x27: "cmp_huge",
0x28: "lt",
0x29: "lt_dword",
0x2A: "le",
0x2B: "le_dword",
0x2C: "gt",
0x2D: "gt_dword",
0x2E: "ge",
0x2F: "ge_dword",
0x30: "not",
0x31: "not_dword",
0x32: "and",
0x33: "and_dword",
0x34: "or",
0x35: "or_dword",
0x36: "ne",
0x37: "ne_dword",
0x39: "bit_and",
0x3A: "bit_or",
0x3B: "bit_not",
0x3C: "lsh",
0x3D: "rsh",
0x50: "ret",
0x53: "suspend",
0x59: "push_pid",
0x5D: "push_retval_byte",
0x5E: "push_retval_word",
0x5F: "push_retval_dword",
0x60: "word_to_dword",
0x61: "dword_to_word",
0x68: "copy_string",
0x6A: "ptr_to_string",
0x6B: "str_to_ptr",
0x6D: "push_process_result",
0x73: "loopnext",
0x77: "set_info",
0x78: "process_exclude",
0x7A: "end",
}
def parse_int(value: str) -> int:
return int(value, 0)
def try_parse_int(value: str) -> int | None:
try:
return parse_int(value)
except (TypeError, ValueError):
return None
def signed_byte(value: int) -> int:
return value - 0x100 if value & 0x80 else value
def bp_repr(value: int) -> str:
disp = signed_byte(value)
sign = "+" if disp >= 0 else "-"
return f"[BP{sign}{abs(disp):02X}h]"
def sp_repr(value: int) -> str:
disp = signed_byte(value)
sign = "+" if disp >= 0 else "-"
return f"[SP{sign}{abs(disp):02X}h]"
@dataclass
class ParseResult:
op: dict[str, Any] | None
next_offset: int
end_reason: str | None = None
unknown_tail: bytes | None = None
@dataclass
class DebugSymbolRecord:
index: int
unknown1: int
type_id: int
type_char: str
bp_offset: int
bp_repr: str
unknown3: int
name: str
@dataclass
class DebugSymbolParseResult:
debug_symbols: list[DebugSymbolRecord]
end_offset: int
has_end_opcode: bool
trailing_bytes: bytes
@dataclass
class FieldTagRecord:
tag_id: int
bp_offset: int
value_kind: int
name: str
@dataclass
class FieldTagParseResult:
field_tags: list[FieldTagRecord]
end_offset: int
trailing_bytes: bytes
class BodyReader:
def __init__(self, data: bytes, offset: int = 0) -> None:
self.data = data
self.offset = offset
def read_u8(self) -> int:
value = self.data[self.offset]
self.offset += 1
return value
def read_u16(self) -> int:
value = int.from_bytes(self.data[self.offset:self.offset + 2], "little")
self.offset += 2
return value
def read_u32(self) -> int:
value = int.from_bytes(self.data[self.offset:self.offset + 4], "little")
self.offset += 4
return value
def read_cstring(self) -> str:
chars: list[str] = []
while self.offset < len(self.data):
byte = self.read_u8()
if byte == 0:
break
chars.append(chr(byte))
return "".join(chars)
def read_fixed_string(self, length: int) -> str:
raw = self.data[self.offset:self.offset + length]
self.offset += length
return raw.decode("latin-1", errors="replace").rstrip("\x00")
def op_record(start: int, absolute_start: int, opcode: int, raw_bytes: bytes, mnemonic: str, operands: dict[str, Any]) -> dict[str, Any]:
return {
"offset": start,
"absolute_body_offset": absolute_start,
"opcode": opcode,
"mnemonic": mnemonic,
"raw_bytes": raw_bytes.hex(),
"operands": operands,
}
def parse_one_op(
body: bytes,
start: int,
intrinsic_hints: dict[int, str] | None = None,
intrinsic_callsite_hints: dict[tuple[int, int], str] | None = None,
) -> ParseResult:
reader = BodyReader(body, start)
opcode = reader.read_u8()
operands: dict[str, Any] = {}
mnemonic = NO_ARG_MNEMONICS.get(opcode)
active_intrinsic_hints = intrinsic_hints or INTRINSIC_HINTS
active_callsite_hints = intrinsic_callsite_hints or get_intrinsic_callsite_hints(DEFAULT_GAME_VARIANT)
if opcode == 0x00:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_byte"
elif opcode == 0x01:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_word"
elif opcode == 0x02:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_local_dword"
elif opcode == 0x03:
bp_offset = reader.read_u8()
size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "size": size}
mnemonic = "pop_local_blob"
elif opcode == 0x04:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_member_byte"
elif opcode == 0x05:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_member_word"
elif opcode == 0x06:
operands = {"bp_offset": reader.read_u8(), "target": bp_repr(body[start + 1])}
mnemonic = "pop_member_dword"
elif opcode == 0x07:
bp_offset = reader.read_u8()
size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "size": size}
mnemonic = "pop_member_blob"
elif opcode == 0x09:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {
"bp_offset": bp_offset,
"target": bp_repr(bp_offset),
"element_size": element_size,
"slist_flag": slist_flag,
}
mnemonic = "pop_list_element"
elif opcode == 0x0A:
value = reader.read_u8()
operands = {"value_u8": value, "value_signed": signed_byte(value)}
mnemonic = "push_byte_immediate"
elif opcode == 0x0B:
operands = {"value_u16": reader.read_u16()}
mnemonic = "push_word_immediate"
elif opcode == 0x0C:
operands = {"value_u32": reader.read_u32()}
mnemonic = "push_dword_immediate"
elif opcode == 0x0D:
declared_length = reader.read_u16()
text = reader.read_cstring()
operands = {"declared_length": declared_length, "string": text}
mnemonic = "push_string_immediate"
elif opcode == 0x0E:
element_size = reader.read_u8()
count = reader.read_u8()
operands = {"element_size": element_size, "count": count}
mnemonic = "create_list"
elif opcode == 0x0F:
arg_bytes = reader.read_u8()
intrinsic_ordinal = reader.read_u16()
operands = {
"intrinsic_ordinal": intrinsic_ordinal,
"arg_bytes": arg_bytes,
"intrinsic_name_hint": active_callsite_hints.get((intrinsic_ordinal, arg_bytes), active_intrinsic_hints.get(intrinsic_ordinal)),
}
mnemonic = "call_intrinsic"
elif opcode == 0x10:
operands = {"target_offset": reader.read_u16()}
mnemonic = "call_near"
elif opcode == 0x11:
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
operands = {
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
}
mnemonic = "call_class_event"
elif opcode in {0x18, 0x19, 0x1A, 0x1B}:
element_size = reader.read_u8()
operands = {"element_size": element_size}
mnemonic = {
0x18: "append_unique_inline",
0x19: "append_unique_indirect",
0x1A: "remove_matching_indirect",
0x1B: "remove_matching_inline",
}[opcode]
elif opcode == 0x38:
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {"element_size": element_size, "slist_flag": slist_flag}
mnemonic = "in_list"
elif opcode in {0x3E, 0x3F, 0x40, 0x41, 0x43, 0x46, 0x47, 0x48, 0x49, 0x4B, 0x62, 0x63, 0x64, 0x69}:
bp_offset = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset)}
mnemonic = {
0x3E: "push_local_byte",
0x3F: "push_local_word",
0x40: "push_local_dword",
0x41: "push_local_string",
0x43: "push_local_slist",
0x46: "push_member_byte",
0x47: "push_member_word",
0x48: "push_member_dword",
0x49: "push_member_huge",
0x4B: "push_local_addr",
0x62: "free_local_string",
0x63: "free_local_slist",
0x64: "free_local_list",
0x69: "push_string_ptr",
}[opcode]
elif opcode == 0x42:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "element_size": element_size}
mnemonic = "push_local_list"
elif opcode == 0x44:
element_size = reader.read_u8()
slist_flag = reader.read_u8()
operands = {"element_size": element_size, "slist_flag": slist_flag}
mnemonic = "push_list_element"
elif opcode == 0x45:
byte0 = reader.read_u8()
byte1 = reader.read_u8()
operands = {"value_a": byte0, "value_b": byte1}
mnemonic = "push_huge"
elif opcode in {0x4C, 0x4D}:
size = reader.read_u8()
operands = {"size": size}
mnemonic = {0x4C: "push_indirect", 0x4D: "pop_indirect"}[opcode]
elif opcode in {0x4E, 0x4F}:
global_id = reader.read_u16()
size = reader.read_u8()
operands = {"global_id": global_id, "size": size}
mnemonic = {0x4E: "push_global", 0x4F: "pop_global"}[opcode]
elif opcode in {0x51, 0x52}:
relative = reader.read_u16()
signed_relative = relative - 0x10000 if relative & 0x8000 else relative
target = reader.offset + signed_relative
operands = {"relative_u16": relative, "relative_signed": signed_relative, "target_offset": target}
mnemonic = {0x51: "jne", 0x52: "jmp"}[opcode]
elif opcode == 0x54:
arg0 = reader.read_u8()
arg1 = reader.read_u8()
operands = {"arg0": arg0, "arg1": arg1}
mnemonic = "implies"
elif opcode == 0x57:
arg_bytes = reader.read_u8()
this_size = reader.read_u8()
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
operands = {
"arg_bytes": arg_bytes,
"this_size": this_size,
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
}
mnemonic = "spawn"
elif opcode == 0x58:
target_class_id = reader.read_u16()
target_slot = reader.read_u16()
inline_offset = reader.read_u16()
this_size = reader.read_u8()
unknown = reader.read_u8()
operands = {
"target_class_id": target_class_id,
"target_event_slot": target_slot,
"target_event_name_hint": EVENT_NAME_HINTS.get(target_slot),
"inline_offset": inline_offset,
"this_size": this_size,
"unknown": unknown,
}
mnemonic = "spawn_inline"
elif opcode == 0x5A:
operands = {"local_bytes": reader.read_u8()}
mnemonic = "init"
elif opcode == 0x5B:
operands = {"line_number": reader.read_u16()}
mnemonic = "line_number"
elif opcode == 0x5C:
relative = reader.read_u16()
symbol_offset = reader.offset + (relative - 0x10000 if relative & 0x8000 else relative)
symbol = reader.read_fixed_string(8)
trailing_zero = reader.read_u8()
operands = {
"symbol_offset": symbol_offset,
"symbol": symbol,
"trailing_zero": trailing_zero,
}
mnemonic = "symbol_info"
elif opcode in {0x65, 0x66, 0x67, 0x6E, 0x6F, 0x74}:
value = reader.read_u8()
operands = {"value_u8": value}
if opcode in {0x65, 0x66, 0x67}:
operands["target"] = sp_repr(value)
mnemonic = {
0x65: "free_stack_string",
0x66: "free_stack_list",
0x67: "free_stack_slist",
0x6E: "add_sp",
0x6F: "push_stack_addr",
0x74: "loopscr",
}[opcode]
elif opcode == 0x6C:
bp_offset = reader.read_u8()
copy_type = reader.read_u8()
operands = {"bp_offset": bp_offset, "target": bp_repr(bp_offset), "copy_type": copy_type}
mnemonic = "param_pid_chg"
elif opcode == 0x70:
current_var = reader.read_u8()
string_bytes = reader.read_u8()
loop_type = reader.read_u8()
operands = {"current_var": current_var, "string_bytes": string_bytes, "loop_type": loop_type}
mnemonic = "loop"
elif opcode in {0x75, 0x76}:
bp_offset = reader.read_u8()
element_size = reader.read_u8()
branch = reader.read_u16()
signed_branch = branch - 0x10000 if branch & 0x8000 else branch
target = reader.offset + signed_branch
operands = {
"bp_offset": bp_offset,
"target_var": bp_repr(bp_offset),
"element_size": element_size,
"relative_u16": branch,
"relative_signed": signed_branch,
"target_offset": target,
}
mnemonic = {0x75: "foreach_list", 0x76: "foreach_slist"}[opcode]
elif opcode == 0x79:
operands = {"global_id": reader.read_u16()}
mnemonic = "global_address"
elif mnemonic is None:
return ParseResult(op=None, next_offset=start, end_reason="unknown_opcode", unknown_tail=body[start:])
raw = body[start:reader.offset]
op = op_record(start, start, opcode, raw, mnemonic, operands)
end_reason = "end_opcode" if opcode == 0x7A else None
return ParseResult(op=op, next_offset=reader.offset, end_reason=end_reason)
def load_tsv_rows(path: Path) -> list[dict[str, str]]:
with path.open("r", encoding="utf-8", newline="") as handle:
return list(csv.DictReader(handle, delimiter="\t"))
@lru_cache(maxsize=None)
def load_class_name_hints_by_id(extracted_root_key: str) -> dict[int, str]:
_, class_layout_index, _, _ = extracted_root_paths(Path(extracted_root_key))
hints: dict[int, str] = {}
for row in load_tsv_rows(class_layout_index):
class_id = try_parse_int(row.get("class_id", ""))
class_name = (row.get("class_name_hint") or "").strip()
if class_id is None or not class_name:
continue
hints[class_id] = class_name
return hints
def class_name_hints_by_id(extracted_root: Path | str | None = None) -> dict[int, str]:
root = resolve_extracted_root(extracted_root)
return load_class_name_hints_by_id(str(root))
def find_chunk_file(entry_index: int, extracted_root: Path | str | None = None) -> Path:
_, _, _, chunks_dir = extracted_root_paths(extracted_root)
matches = sorted(chunks_dir.glob(f"chunk_{entry_index:03d}_*.bin"))
if not matches:
matches = sorted(chunks_dir.glob(f"chunk_{entry_index}_*.bin"))
if not matches:
raise FileNotFoundError(f"No chunk file found for entry_index={entry_index}")
return matches[0]
def select_rows(class_name: str, slot: int, extracted_root: Path | str | None = None) -> tuple[dict[str, str], dict[str, str]]:
class_event_index, class_layout_index, _, _ = extracted_root_paths(extracted_root)
event_rows = load_tsv_rows(class_event_index)
layout_rows = load_tsv_rows(class_layout_index)
event_row = next(
(
row for row in event_rows
if row["class_name_hint"].upper() == class_name.upper()
and try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
),
None,
)
if event_row is None:
raise KeyError(f"No class_event_index row found for class={class_name} slot=0x{slot:02X}")
if not event_row["derived_body_start"] or not event_row["derived_body_end"]:
raise ValueError(f"Selected row has no derived body range for class={class_name} slot=0x{slot:02X}")
entry_index = parse_int(event_row["entry_index"])
layout_row = next(
(row for row in layout_rows if try_parse_int(row.get("entry_index", "")) == entry_index),
None,
)
if layout_row is None:
raise KeyError(f"No class_layout_index row found for entry_index={entry_index}")
return event_row, layout_row
def load_runtime_ir_rows(extracted_root: Path | str | None = None) -> list[dict[str, str]]:
_, _, runtime_vm_ir_index, _ = extracted_root_paths(extracted_root)
return load_tsv_rows(runtime_vm_ir_index)
def runtime_stage_hints(ops: list[dict[str, Any]], extracted_root: Path | str | None = None) -> list[dict[str, str]]:
opcode_values = {op["opcode"] for op in ops}
hints: list[dict[str, str]] = []
for row in load_runtime_ir_rows(extracted_root):
opcode_or_lane = row.get("opcode_or_lane", "")
if opcode_or_lane.lower().startswith("opcode 0x"):
opcode_value = try_parse_int(opcode_or_lane.split()[1])
if opcode_value is None or opcode_value not in opcode_values:
continue
elif row.get("stage_address") not in {"000d:177c", "000d:1acb", "000d:2104", "000d:21ed", "000d:22bc"}:
continue
hints.append(row)
return hints
def annotation_hints(event_row: dict[str, str], payload_shape_hint: str, ops: list[dict[str, Any]], extracted_root: Path | str | None = None) -> dict[str, Any]:
slot = parse_int(event_row["slot"])
return {
"runtime_family": "slot-backed-owner-loaded-body",
"payload_shape_hint": payload_shape_hint,
"compiled_anchors": [
{"address": "000d:46ec", "role": "context_create_from_slot"},
{"address": "000d:0988", "role": "referent_chain_mutator"},
{"address": "000d:177c", "role": "push_frame_word_literal"},
{"address": "000d:1acb", "role": "compare_stream_dword_and_push_bool"},
{"address": "000d:208b", "role": "materialize_or_forward_value"},
{"address": "000d:21ed", "role": "prepend_inline_payload"},
{"address": "000d:22bc", "role": "matrix_pushback_stage"},
{"address": "000d:2104", "role": "finalize_to_outptr"},
{"address": "000d:ebe3", "role": "opcode_sequence_run"},
],
"runtime_stage_hints": runtime_stage_hints(ops, extracted_root),
"slot_taxonomy": {"slot": slot, "event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot)},
}
def infer_payload_shape(slot: int) -> str | None:
if slot in {0x10, 0x12}:
return "none"
if slot in {0x0A, 0x0B, 0x11, 0x14}:
return "word"
if slot == 0x13:
return "signed_word"
return None
def parse_debug_symbols(body: bytes, start: int) -> DebugSymbolParseResult | None:
if start >= len(body):
return None
if body[start] == 0x7A:
return DebugSymbolParseResult(debug_symbols=[], end_offset=start + 1, has_end_opcode=True, trailing_bytes=body[start + 1:])
reader = BodyReader(body, start)
count = reader.read_u8()
debug_symbols: list[DebugSymbolRecord] = []
try:
for index in range(count):
unknown1 = reader.read_u8()
type_id = reader.read_u8()
bp_offset = reader.read_u8()
unknown3 = reader.read_u8()
name = reader.read_cstring()
type_char = chr(type_id) if 0x20 <= type_id <= 0x7E else "."
debug_symbols.append(
DebugSymbolRecord(
index=index,
unknown1=unknown1,
type_id=type_id,
type_char=type_char,
bp_offset=bp_offset,
bp_repr=bp_repr(bp_offset),
unknown3=unknown3,
name=name,
)
)
except IndexError:
return None
has_end_opcode = reader.offset < len(body) and body[reader.offset] == 0x7A
end_offset = reader.offset + (1 if has_end_opcode else 0)
trailing_bytes = body[end_offset:]
if not has_end_opcode:
return None
return DebugSymbolParseResult(
debug_symbols=debug_symbols,
end_offset=end_offset,
has_end_opcode=has_end_opcode,
trailing_bytes=trailing_bytes,
)
def parse_field_tags(body: bytes, start: int) -> FieldTagParseResult | None:
if start >= len(body):
return None
reader = BodyReader(body, start)
field_tags: list[FieldTagRecord] = []
try:
while reader.offset < len(body) and body[reader.offset] != 0x7A:
tag_id = reader.read_u8()
bp_offset = reader.read_u8()
value_kind = reader.read_u8()
name = reader.read_cstring()
if not name:
return None
field_tags.append(FieldTagRecord(tag_id=tag_id, bp_offset=bp_offset, value_kind=value_kind, name=name))
if reader.offset < len(body) and body[reader.offset] == 0x00:
reader.offset += 1
except IndexError:
return None
if not field_tags or reader.offset >= len(body) or body[reader.offset] != 0x7A:
return None
end_offset = reader.offset + 1
return FieldTagParseResult(field_tags=field_tags, end_offset=end_offset, trailing_bytes=body[end_offset:])
def parse_body_ir(
event_row: dict[str, str],
layout_row: dict[str, str],
game_variant: str | None = None,
extracted_root: Path | str | None = None,
) -> dict[str, Any]:
resolved_extracted_root = resolve_extracted_root(extracted_root)
entry_index = parse_int(event_row["entry_index"])
chunk_file = find_chunk_file(entry_index, resolved_extracted_root)
chunk_bytes = chunk_file.read_bytes()
resolved_game_variant = resolve_game_variant(game_variant, chunk_file)
intrinsic_hints = get_intrinsic_hints(resolved_game_variant, chunk_file)
intrinsic_callsite_hints = get_intrinsic_callsite_hints(resolved_game_variant, chunk_file)
target_class_name_hints = class_name_hints_by_id(resolved_extracted_root)
body_start = parse_int(event_row["derived_body_start"])
body_end = parse_int(event_row["derived_body_end"])
body = chunk_bytes[body_start:body_end]
ops: list[dict[str, Any]] = []
offset = 0
end_reason = "body_exhausted"
unknown_tail = b""
debug_symbols: list[dict[str, Any]] = []
debug_symbol_offset: int | None = None
field_tags: list[dict[str, Any]] = []
while offset < len(body):
result = parse_one_op(body, offset, intrinsic_hints, intrinsic_callsite_hints)
if result.op is not None:
operands = result.op["operands"]
if "target_class_id" in operands:
class_id = operands["target_class_id"]
operands["target_class_name_hint"] = target_class_name_hints.get(class_id)
result.op["absolute_body_offset"] = body_start + result.op["offset"]
ops.append(result.op)
if result.end_reason is not None:
end_reason = result.end_reason
unknown_tail = result.unknown_tail or b""
if result.end_reason == "end_opcode":
unknown_tail = body[result.next_offset:]
offset = result.next_offset
break
offset = result.next_offset
if offset >= len(body) and end_reason == "body_exhausted":
unknown_tail = b""
candidate_debug_offsets = sorted(
{
operands["symbol_offset"]
for op in ops
for operands in [op["operands"]]
if op["mnemonic"] == "symbol_info"
and isinstance(operands.get("symbol_offset"), int)
and 0 <= operands["symbol_offset"] < len(body)
}
)
last_ret_index = next((index for index in range(len(ops) - 1, -1, -1) if ops[index]["mnemonic"] == "ret"), None)
if end_reason == "unknown_opcode" and last_ret_index is not None:
ret_end = ops[last_ret_index]["offset"] + (len(ops[last_ret_index]["raw_bytes"]) // 2)
ret_debug_result = parse_debug_symbols(body, ret_end)
if ret_debug_result is not None:
ops = ops[:last_ret_index + 1]
debug_symbol_offset = ret_end
debug_symbols = [
{
"index": symbol.index,
"unknown1": symbol.unknown1,
"type_id": symbol.type_id,
"type_char": symbol.type_char,
"bp_offset": symbol.bp_offset,
"bp_repr": symbol.bp_repr,
"unknown3": symbol.unknown3,
"name": symbol.name,
}
for symbol in ret_debug_result.debug_symbols
]
end_reason = "debug_symbols_then_end"
unknown_tail = ret_debug_result.trailing_bytes
offset = ret_debug_result.end_offset
if end_reason == "unknown_opcode" and candidate_debug_offsets:
for candidate_offset in reversed(candidate_debug_offsets):
if candidate_offset != offset:
continue
debug_result = parse_debug_symbols(body, candidate_offset)
if debug_result is None:
continue
debug_symbol_offset = candidate_offset
debug_symbols = [
{
"index": symbol.index,
"unknown1": symbol.unknown1,
"type_id": symbol.type_id,
"type_char": symbol.type_char,
"bp_offset": symbol.bp_offset,
"bp_repr": symbol.bp_repr,
"unknown3": symbol.unknown3,
"name": symbol.name,
}
for symbol in debug_result.debug_symbols
]
end_reason = "debug_symbols_then_end"
unknown_tail = debug_result.trailing_bytes
offset = debug_result.end_offset
break
if end_reason == "unknown_opcode" and last_ret_index is not None:
ret_end = ops[last_ret_index]["offset"] + (len(ops[last_ret_index]["raw_bytes"]) // 2)
field_tag_result = parse_field_tags(body, ret_end)
if field_tag_result is not None:
ops = ops[:last_ret_index + 1]
field_tags = [
{
"tag_id": tag.tag_id,
"bp_offset": tag.bp_offset,
"bp_repr": bp_repr(tag.bp_offset),
"value_kind": tag.value_kind,
"name": tag.name,
"tag_label": f"{tag.tag_id:02X}:{tag.bp_offset:02X}{tag.value_kind:02X}->{tag.name}",
}
for tag in field_tag_result.field_tags
]
end_reason = "field_tags_then_end"
unknown_tail = field_tag_result.trailing_bytes
offset = field_tag_result.end_offset
slot = parse_int(event_row["slot"])
payload_shape = infer_payload_shape(slot)
return {
"schema_version": "crusader-usecode-ir-v1-poc",
"source": {
"game_variant": resolved_game_variant,
"flex_path": infer_flex_path(resolved_extracted_root),
"extracted_root": repo_relative_path(resolved_extracted_root),
"chunk_file": repo_relative_path(chunk_file),
},
"class": {
"entry_index": entry_index,
"object_index": parse_int(layout_row["object_index"]),
"class_id": parse_int(layout_row["class_id"]),
"class_name": layout_row["class_name_hint"],
"raw_code_base_u32": parse_int(layout_row["raw_code_base_u32"]),
"code_base_minus_one": parse_int(layout_row["code_base_minus_one"]),
"conservative_event_count": parse_int(layout_row["conservative_event_count"]),
},
"event": {
"slot": slot,
"event_name_hint": event_row["event_name_hint"] or EVENT_NAME_HINTS.get(slot),
"raw_event_entry_word": parse_int(event_row["raw_event_entry_word"]),
"raw_code_offset": parse_int(event_row["raw_code_offset"]),
"derived_body_start": body_start,
"derived_body_end": body_end,
"derived_body_length": parse_int(event_row["derived_body_length"]),
"repeated_template_status": event_row["repeated_template_status"],
},
"body": {
"end_reason": end_reason,
"raw_body_sha1": hashlib.sha1(body).hexdigest(),
"unknown_trailing_bytes": unknown_tail.hex(),
"decoded_op_count": len(ops),
"debug_symbol_offset": debug_symbol_offset,
"debug_symbol_count": len(debug_symbols),
"field_tag_count": len(field_tags),
},
"ops": ops,
"debug_symbols": debug_symbols,
"field_tags": field_tags,
"annotation_hints": annotation_hints(event_row, payload_shape, ops, resolved_extracted_root),
}
# ---------------------------------------------------------------------------
# Family diff helpers
# ---------------------------------------------------------------------------
def _common_prefix_len(a: bytes, b: bytes) -> int:
limit = min(len(a), len(b))
for i in range(limit):
if a[i] != b[i]:
return i
return limit
def _common_suffix_len(a: bytes, b: bytes, prefix_len: int) -> int:
la, lb = len(a), len(b)
limit = min(la - prefix_len, lb - prefix_len)
for i in range(1, limit + 1):
if a[la - i] != b[lb - i]:
return i - 1
return limit
def compute_family_diff(class_name: str, slot: int, extracted_root: Path | str | None = None) -> dict[str, Any]:
"""
Find all event rows that share the same repeated_template_status family tag
as the named class/slot row, then decode each body and compute pairwise diff
statistics against the reference body.
Returns a dict with:
reference_entry entry_index for the named class/slot
family_tag repeated_template_status value used for grouping
sibling_count number of additional rows in the same family
members list of per-member records (entry, class, body stats, diff vs ref)
"""
class_event_index, class_layout_index, _, _ = extracted_root_paths(extracted_root)
event_rows = load_tsv_rows(class_event_index)
layout_rows = load_tsv_rows(class_layout_index)
layout_by_entry: dict[int, dict[str, str]] = {}
for row in layout_rows:
idx = try_parse_int(row.get("entry_index", ""))
if idx is not None:
layout_by_entry[idx] = row
# Locate the reference row
ref_row = next(
(
row for row in event_rows
if row["class_name_hint"].upper() == class_name.upper()
and try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
),
None,
)
if ref_row is None:
raise KeyError(f"No class_event_index row for class={class_name} slot=0x{slot:02X}")
family_tag = ref_row.get("repeated_template_status", "").strip()
ref_entry = parse_int(ref_row["entry_index"])
# Collect family members same family_tag if non-empty, else same slot
if family_tag and family_tag not in {"", "unique"}:
family_rows = [
row for row in event_rows
if row.get("repeated_template_status", "").strip() == family_tag
and try_parse_int(row.get("entry_index", "")) is not None
]
else:
# Fall back: same slot across all classes
family_rows = [
row for row in event_rows
if try_parse_int(row.get("slot", "")) == slot
and try_parse_int(row.get("entry_index", "")) is not None
]
# Load reference body bytes
def _load_body(row: dict[str, str]) -> bytes | None:
body_start_str = row.get("derived_body_start", "")
body_end_str = row.get("derived_body_end", "")
if not body_start_str or not body_end_str:
return None
try:
chunk = find_chunk_file(parse_int(row["entry_index"]), extracted_root)
data = chunk.read_bytes()
return data[parse_int(body_start_str):parse_int(body_end_str)]
except (FileNotFoundError, ValueError):
return None
ref_body = _load_body(ref_row)
if ref_body is None:
raise ValueError(f"Cannot load reference body for class={class_name} slot=0x{slot:02X}")
members: list[dict[str, Any]] = []
for row in family_rows:
entry_idx = parse_int(row["entry_index"])
body = _load_body(row)
is_ref = entry_idx == ref_entry
member: dict[str, Any] = {
"entry_index": entry_idx,
"class_name": row["class_name_hint"],
"slot": try_parse_int(row.get("slot", "")),
"body_length": len(body) if body is not None else None,
"is_reference": is_ref,
}
if body is not None and not is_ref:
prefix = _common_prefix_len(ref_body, body)
suffix = _common_suffix_len(ref_body, body, prefix)
ref_diff_window = ref_body[prefix:len(ref_body) - suffix] if suffix else ref_body[prefix:]
member_diff_window = body[prefix:len(body) - suffix] if suffix else body[prefix:]
member["diff_vs_reference"] = {
"common_prefix_bytes": prefix,
"common_suffix_bytes": suffix,
"ref_diff_window_hex": ref_diff_window.hex(),
"member_diff_window_hex": member_diff_window.hex(),
"diff_window_length_ref": len(ref_diff_window),
"diff_window_length_member": len(member_diff_window),
"identical": ref_body == body,
}
elif is_ref:
member["diff_vs_reference"] = {"identical": True, "note": "reference"}
members.append(member)
members.sort(key=lambda m: (0 if m["is_reference"] else 1, m["entry_index"]))
return {
"reference_entry": ref_entry,
"reference_class": class_name,
"slot": slot,
"family_tag": family_tag or f"slot_0x{slot:02X}_all",
"member_count": len(members),
"sibling_count": len(members) - 1,
"members": members,
}
def render_family_diff_text(diff: dict[str, Any]) -> str:
lines = [
f"Family diff: {diff['family_tag']}",
f"Reference entry={diff['reference_entry']} class={diff['reference_class']} slot=0x{diff['slot']:02X}",
f"Members: {diff['member_count']} Siblings: {diff['sibling_count']}",
"",
]
for m in diff["members"]:
tag = " [REF]" if m["is_reference"] else ""
body_len = m["body_length"] if m["body_length"] is not None else "?"
lines.append(f" entry={m['entry_index']} class={m['class_name']} slot=0x{m['slot']:02X} body_len={body_len}{tag}")
d = m.get("diff_vs_reference")
if d and not m["is_reference"]:
if d["identical"]:
lines.append(" identical to reference")
else:
lines.append(f" prefix={d['common_prefix_bytes']} suffix={d['common_suffix_bytes']}")
lines.append(f" ref_diff_window ({d['diff_window_length_ref']}B): {d['ref_diff_window_hex']}")
lines.append(f" mem_diff_window ({d['diff_window_length_member']}B): {d['member_diff_window_hex']}")
return "\n".join(lines) + "\n"
def build_listing_labels(ir: dict[str, Any]) -> dict[int, str]:
return {
op["operands"]["target_offset"] + ir["event"]["derived_body_start"]: f"L_{op['operands']['target_offset'] + ir['event']['derived_body_start']:04X}"
for op in ir["ops"]
if "target_offset" in op["operands"] and isinstance(op["operands"]["target_offset"], int)
}
def build_script_blocks(ir: dict[str, Any]) -> tuple[dict[int, str], list[tuple[str, list[dict[str, Any]]]]]:
ops = ir["ops"]
if not ops:
return {}, []
branch_mnemonics = {"jne", "jmp", "foreach_list", "foreach_slist"}
leaders = {ops[0]["absolute_body_offset"]}
for index, op in enumerate(ops):
target_offset = op["operands"].get("target_offset")
if isinstance(target_offset, int):
leaders.add(ir["event"]["derived_body_start"] + target_offset)
if op["mnemonic"] in branch_mnemonics and index + 1 < len(ops):
leaders.add(ops[index + 1]["absolute_body_offset"])
ordered_leaders = sorted(leaders)
label_map = {ordered_leaders[0]: "entry"}
for absolute_offset in ordered_leaders[1:]:
label_map[absolute_offset] = f"block_{absolute_offset:04X}"
blocks: list[tuple[str, list[dict[str, Any]]]] = []
current_label = label_map[ops[0]["absolute_body_offset"]]
current_ops: list[dict[str, Any]] = []
for op in ops:
absolute_offset = op["absolute_body_offset"]
if absolute_offset in label_map and current_ops and label_map[absolute_offset] != current_label:
blocks.append((current_label, current_ops))
current_label = label_map[absolute_offset]
current_ops = []
current_ops.append(op)
if current_ops:
blocks.append((current_label, current_ops))
return label_map, blocks
def format_script_string(value: str) -> str:
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
return f'"{escaped}"'
def format_generic_operands(operands: dict[str, Any], label_map: dict[int, str], body_start: int) -> str:
parts: list[str] = []
for key, value in operands.items():
if value is None or value == "":
continue
if key == "target_offset" and isinstance(value, int):
parts.append(f"target={label_map.get(body_start + value, f'0x{body_start + value:04X}')}")
continue
if key == "target_event_name_hint":
parts.append(f"event={value}")
continue
if key == "intrinsic_name_hint":
parts.append(f"hint={value}")
continue
if isinstance(value, int):
if key.endswith("_signed"):
parts.append(f"{key}={value}")
else:
parts.append(f"{key}=0x{value:X}")
continue
parts.append(f"{key}={value}")
return " ".join(parts)
def format_script_statement(op: dict[str, Any], label_map: dict[int, str], body_start: int) -> str:
mnemonic = op["mnemonic"]
operands = op["operands"]
if mnemonic == "init":
return f"init locals 0x{operands['local_bytes']:02X}"
if mnemonic == "push_byte_immediate":
return f"push 0x{operands['value_u8']:02X} ; signed {operands['value_signed']}"
if mnemonic == "push_word_immediate":
return f"push 0x{operands['value_u16']:04X}"
if mnemonic == "push_dword_immediate":
return f"push 0x{operands['value_u32']:08X}"
if mnemonic == "push_string_immediate":
return f"push {format_script_string(operands['string'])}"
if mnemonic.startswith("push_local_") or mnemonic.startswith("push_member_"):
return f"push {operands['target']}"
if mnemonic == "push_local_list":
return f"push {operands['target']}<list:{operands['element_size']}>"
if mnemonic == "push_list_element":
return f"push list_element size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}"
if mnemonic == "push_huge":
return f"push huge 0x{operands['value_a']:02X}:0x{operands['value_b']:02X}"
if mnemonic == "push_global":
return f"push global[0x{operands['global_id']:04X}] size=0x{operands['size']:X}"
if mnemonic == "push_local_addr":
return f"push &{operands['target']}"
if mnemonic == "push_string_ptr":
return f"push string_ptr {operands['target']}"
if mnemonic.startswith("pop_local_") or mnemonic.startswith("pop_member_"):
return f"pop -> {operands['target']}"
if mnemonic == "pop_local_blob" or mnemonic == "pop_member_blob":
return f"pop_blob -> {operands['target']} size=0x{operands['size']:X}"
if mnemonic == "pop_list_element":
return f"pop_list_element -> {operands['target']} elem_size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}"
if mnemonic == "pop_global":
return f"pop -> global[0x{operands['global_id']:04X}] size=0x{operands['size']:X}"
if mnemonic == "call_intrinsic":
hint = operands.get("intrinsic_name_hint") or "unknown_intrinsic"
return f"call intrinsic 0x{operands['intrinsic_ordinal']:04X} {hint} args=0x{operands['arg_bytes']:02X}"
if mnemonic == "call_near":
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"call {target}"
if mnemonic == "call_class_event":
return f"call {format_target_event_reference(operands)}"
if mnemonic in {"append_unique_inline", "append_unique_indirect", "remove_matching_indirect", "remove_matching_inline"}:
return f"{mnemonic} size=0x{operands['element_size']:X}"
if mnemonic == "create_list":
return f"create_list elem_size=0x{operands['element_size']:X} count=0x{operands['count']:X}"
if mnemonic == "in_list":
return f"in_list elem_size=0x{operands['element_size']:X} slist=0x{operands['slist_flag']:X}"
if mnemonic == "jne":
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"if pop() != 0 goto {target}"
if mnemonic == "jmp":
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"goto {target}"
if mnemonic in {"foreach_list", "foreach_slist"}:
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
return f"{mnemonic} {operands['target_var']} elem_size=0x{operands['element_size']:X} -> {target}"
if mnemonic == "spawn":
return (
f"spawn {format_target_event_reference(operands)} "
f"args=0x{operands['arg_bytes']:02X} this_size=0x{operands['this_size']:02X}"
)
if mnemonic == "spawn_inline":
return (
f"spawn_inline {format_target_event_reference(operands)} "
f"inline=0x{operands['inline_offset']:04X} this_size=0x{operands['this_size']:02X} unk=0x{operands['unknown']:02X}"
)
if mnemonic == "line_number":
return f"line {operands['line_number']}"
if mnemonic == "symbol_info":
return f"symbol_info {operands['symbol']} -> 0x{operands['symbol_offset']:04X}"
if mnemonic == "global_address":
return f"push &global[0x{operands['global_id']:04X}]"
if mnemonic == "ret":
return "return"
if mnemonic in NO_ARG_MNEMONICS.values():
return mnemonic
rendered_operands = format_generic_operands(operands, label_map, body_start)
return f"{mnemonic} {rendered_operands}".rstrip()
def render_script(ir: dict[str, Any]) -> str:
label_map, blocks = build_script_blocks(ir)
slot_name = ir["event"]["event_name_hint"] or f"slot_{ir['event']['slot']:02X}"
lines = [
(
f"script {ir['class']['class_name']}.{slot_name} "
f"(entry={ir['class']['entry_index']}, class_id=0x{ir['class']['class_id']:04X}, slot=0x{ir['event']['slot']:02X})"
),
"{",
f" body_range 0x{ir['event']['derived_body_start']:04X}..0x{ir['event']['derived_body_end']:04X}",
f" raw_event_word 0x{ir['event']['raw_event_entry_word']:04X}",
f" raw_code_offset 0x{ir['event']['raw_code_offset']:08X}",
f" end_reason {ir['body']['end_reason']}",
]
if ir["debug_symbols"]:
lines.append(" locals")
lines.append(" {")
for symbol in ir["debug_symbols"]:
lines.append(
f" {symbol['bp_repr']} {symbol['name']} ; type=0x{symbol['type_id']:02X} ('{symbol['type_char']}') unk1=0x{symbol['unknown1']:02X} unk3=0x{symbol['unknown3']:02X}"
)
lines.append(" }")
lines.append("")
for label, ops in blocks:
lines.append(f" {label}:")
for op in ops:
statement = format_script_statement(op, label_map, ir["event"]["derived_body_start"])
lines.append(f" {statement} ; {op['absolute_body_offset']:04X}: {op['raw_bytes']}")
lines.append("")
if ir["field_tags"]:
lines.append(" field_tags")
lines.append(" {")
for tag in ir["field_tags"]:
lines.append(
f" {tag['bp_repr']} {tag['name']} ; tag=0x{tag['tag_id']:02X} kind=0x{tag['value_kind']:02X}"
)
lines.append(" }")
if ir["body"]["unknown_trailing_bytes"]:
lines.append(f" unknown_trailing_bytes {ir['body']['unknown_trailing_bytes']}")
lines.append("}")
return "\n".join(lines) + "\n"
def sanitize_identifier(name: str) -> str:
cleaned = [char if char.isalnum() or char == "_" else "_" for char in name.strip()]
identifier = "".join(cleaned).strip("_") or "var"
if identifier[0].isdigit():
identifier = f"v_{identifier}"
return identifier
def target_event_display_name(operands: dict[str, Any]) -> str:
class_id = operands["target_class_id"]
slot = operands["target_event_slot"]
return CLASS_EVENT_NAME_HINTS.get((class_id, slot)) or operands.get("target_event_name_hint") or f"slot_{slot:02X}"
def format_target_event_reference(operands: dict[str, Any]) -> str:
class_name_hint = operands.get("target_class_name_hint")
event_name = sanitize_identifier(target_event_display_name(operands))
if class_name_hint:
return f"{sanitize_identifier(class_name_hint)}.{event_name}"
return f"class_{operands['target_class_id']:04X}_{event_name}"
def build_local_name_map(ir: dict[str, Any]) -> dict[int, str]:
return {
symbol["bp_offset"]: sanitize_identifier(symbol["name"])
for symbol in ir["debug_symbols"]
}
def format_bp_name(bp_offset: int, local_name_map: dict[int, str]) -> str:
if bp_offset in local_name_map:
return local_name_map[bp_offset]
disp = signed_byte(bp_offset)
if disp >= 0:
return f"arg_{disp:02X}"
return f"local_{abs(disp):02X}"
def intrinsic_display_name(name_hint: str | None, ordinal: int) -> str:
if not name_hint:
return f"intrinsic_{ordinal:04X}"
display = name_hint.replace("::", ".")
display = re.sub(r"(?<=\.)I_", "", display)
if display.startswith("I_"):
display = display[2:]
paren = display.find("(")
if paren != -1:
display = display[:paren]
return display
def push_expr_from_op(op: dict[str, Any], local_name_map: dict[int, str]) -> tuple[str, int] | None:
mnemonic = op["mnemonic"]
operands = op["operands"]
if mnemonic == "push_byte_immediate":
return (str(operands["value_signed"]), 1)
if mnemonic == "push_word_immediate":
return (f"0x{operands['value_u16']:04X}", 2)
if mnemonic == "push_dword_immediate":
return (f"0x{operands['value_u32']:08X}", 4)
if mnemonic == "push_string_immediate":
return (format_script_string(operands["string"]), max(2, operands["declared_length"]))
if mnemonic in {"push_local_byte", "push_local_word", "push_local_dword", "push_local_string", "push_local_slist", "push_local_addr", "push_string_ptr"}:
return (format_bp_name(operands["bp_offset"], local_name_map), 4 if mnemonic in {"push_local_dword", "push_local_addr", "push_string_ptr"} else 2)
if mnemonic in {"push_member_byte", "push_member_word", "push_member_dword", "push_member_huge"}:
return (f"member.{format_bp_name(operands['bp_offset'], local_name_map)}", 4 if mnemonic in {"push_member_dword", "push_member_huge"} else 2)
if mnemonic == "push_local_list":
return (format_bp_name(operands["bp_offset"], local_name_map), max(2, operands["element_size"]))
if mnemonic == "push_list_element":
return (f"list_element(size=0x{operands['element_size']:X})", max(1, operands["element_size"]))
if mnemonic == "push_huge":
return (f"0x{operands['value_a']:02X}{operands['value_b']:02X}", 4)
if mnemonic == "push_global":
return (f"global[0x{operands['global_id']:04X}]", max(1, operands["size"]))
if mnemonic == "push_pid":
return ("pid", 2)
if mnemonic == "push_process_result":
return ("process_result", 2)
return None
def pop_stack_bytes(stack: list[tuple[str, int]], byte_count: int) -> list[str]:
if byte_count <= 0:
return []
parts: list[str] = []
consumed = 0
while stack and consumed < byte_count:
expr, width = stack.pop()
parts.append(expr)
consumed += max(1, width)
parts.reverse()
return parts
def combine_binary(stack: list[tuple[str, int]], operator: str, result_width: int = 2) -> None:
if len(stack) < 2:
return
right_expr, _ = stack.pop()
left_expr, _ = stack.pop()
stack.append((f"({left_expr} {operator} {right_expr})", result_width))
def evaluate_loop_setup_op(
op: dict[str, Any],
stack: list[tuple[str, int]],
local_name_map: dict[int, str],
) -> bool:
pushed = push_expr_from_op(op, local_name_map)
if pushed is not None:
stack.append(pushed)
return True
mnemonic = op["mnemonic"]
operands = op["operands"]
if mnemonic == "push_indirect":
if stack:
expr, _ = stack.pop()
stack.append((f"*({expr})", max(1, operands["size"])))
return True
if mnemonic in {"add", "add_dword"}:
combine_binary(stack, "+", 4 if mnemonic.endswith("dword") else 2)
return True
if mnemonic in {"sub", "sub_dword"}:
combine_binary(stack, "-", 4 if mnemonic.endswith("dword") else 2)
return True
if mnemonic in {"mul", "mul_dword"}:
combine_binary(stack, "*", 4 if mnemonic.endswith("dword") else 2)
return True
if mnemonic in {"div", "div_dword"}:
combine_binary(stack, "/", 4 if mnemonic.endswith("dword") else 2)
return True
if mnemonic == "line_number":
return True
return False
def normalize_loop_origin(expr: str) -> str:
normalized = expr.strip()
if normalized.startswith("*(") and normalized.endswith(")"):
return normalized[2:-1]
return normalized
def try_decode_loop_selector(
ops: list[dict[str, Any]],
start_index: int,
local_name_map: dict[int, str],
) -> tuple[str, int] | None:
selector_tokens: list[int] = []
selector_stack: list[tuple[str, int]] = []
index = start_index
while index < len(ops):
op = ops[index]
mnemonic = op["mnemonic"]
if mnemonic == "loopscr":
selector_tokens.append(op["operands"]["value_u8"])
index += 1
continue
if mnemonic == "loop":
break
if not evaluate_loop_setup_op(op, selector_stack, local_name_map):
return None
index += 1
if index >= len(ops) or ops[index]["mnemonic"] != "loop":
return None
loop_operands = ops[index]["operands"]
if loop_operands.get("string_bytes") != 0x6 or loop_operands.get("loop_type") != 0x2:
return None
if len(selector_tokens) != 4 or selector_tokens[0] != 0x24 or selector_tokens[1] != 0x3D or selector_tokens[3] != 0x25:
if selector_tokens == [0x24, 0x42] and len(selector_stack) >= 4:
current_var = format_bp_name(loop_operands["current_var"], local_name_map)
return (
f"{current_var} in {generic_loop_selector_call('selector_0x42', [
('arg0', selector_stack[-4][0]),
('arg1', selector_stack[-3][0]),
('arg2', selector_stack[-2][0]),
('origin', normalize_loop_origin(selector_stack[-1][0])),
])}",
index + 1,
)
return None
selector_field = LOOP_SELECTOR_FIELD_HINTS.get(selector_tokens[2])
if selector_field is None or len(selector_stack) < 3:
return None
current_var = format_bp_name(loop_operands["current_var"], local_name_map)
selector_value = selector_stack[-3][0]
origin_expr = normalize_loop_origin(selector_stack[-1][0])
return (
f"{current_var} in nearby_items({selector_field}={selector_value}, origin={origin_expr})",
index + 1,
)
def loop_selector_statement(selector_text: str) -> str:
return f"/* loop_selector {selector_text} */"
def decompile_pseudocode_blocks(ir: dict[str, Any]) -> list[tuple[str, list[str]]]:
label_map, blocks = build_script_blocks(ir)
local_name_map = build_local_name_map(ir)
skip_mnemonics = {"line_number", "symbol_info", "add_sp", "init"}
pending_result: str | None
rendered_blocks: list[tuple[str, list[str]]] = []
for label, ops in blocks:
stack: list[tuple[str, int]] = []
pending_result = None
block_lines: list[str] = []
index = 0
while index < len(ops):
op = ops[index]
mnemonic = op["mnemonic"]
operands = op["operands"]
if mnemonic == "loopscr":
decoded_loop = try_decode_loop_selector(ops, index, local_name_map)
if decoded_loop is not None:
selector_text, next_index = decoded_loop
block_lines.append(loop_selector_statement(selector_text))
stack.clear()
pending_result = None
index = next_index
continue
pushed = push_expr_from_op(op, local_name_map)
if pushed is not None:
stack.append(pushed)
index += 1
continue
if mnemonic in skip_mnemonics:
index += 1
continue
if mnemonic == "push_indirect":
if stack:
expr, _ = stack.pop()
stack.append((f"*({expr})", max(1, operands["size"])))
index += 1
continue
if mnemonic == "set_info":
args = ", ".join(expr for expr, _ in stack) if stack else ""
stack.clear()
block_lines.append(f"set_info({args});")
index += 1
continue
if mnemonic == "process_exclude":
block_lines.append("process_exclude();")
index += 1
continue
if mnemonic == "call_intrinsic":
arg_exprs = pop_stack_bytes(stack, operands["arg_bytes"])
pending_result = f"{intrinsic_display_name(operands.get('intrinsic_name_hint'), operands['intrinsic_ordinal'])}({', '.join(arg_exprs)})"
index += 1
continue
if mnemonic == "push_retval_byte":
stack.append((pending_result or "retval", 1))
pending_result = None
index += 1
continue
if mnemonic == "push_retval_word":
stack.append((pending_result or "retval", 2))
pending_result = None
index += 1
continue
if mnemonic == "push_retval_dword":
stack.append((pending_result or "retval", 4))
pending_result = None
index += 1
continue
if mnemonic == "call_class_event":
arg_text = ", ".join(expr for expr, _ in stack)
stack.clear()
block_lines.append(f"{format_target_event_reference(operands)}({arg_text});")
pending_result = None
index += 1
continue
if mnemonic == "spawn":
arg_text = ", ".join(expr for expr, _ in stack)
stack.clear()
block_lines.append(f"spawn {format_target_event_reference(operands)}({arg_text});")
pending_result = None
index += 1
continue
if mnemonic == "spawn_inline":
arg_text = ", ".join(expr for expr, _ in stack)
stack.clear()
block_lines.append(
f"spawn_inline {format_target_event_reference(operands)}({arg_text}) /* inline=0x{operands['inline_offset']:04X} */;"
)
pending_result = None
index += 1
continue
if mnemonic in {"add", "add_dword"}:
combine_binary(stack, "+", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic in {"sub", "sub_dword"}:
combine_binary(stack, "-", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic in {"mul", "mul_dword"}:
combine_binary(stack, "*", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic in {"div", "div_dword"}:
combine_binary(stack, "/", 4 if mnemonic.endswith("dword") else 2)
index += 1
continue
if mnemonic == "bit_and":
combine_binary(stack, "&")
index += 1
continue
if mnemonic == "bit_or":
combine_binary(stack, "|")
index += 1
continue
if mnemonic == "and":
combine_binary(stack, "&&")
index += 1
continue
if mnemonic == "or":
combine_binary(stack, "||")
index += 1
continue
if mnemonic == "cmp":
combine_binary(stack, "!=")
index += 1
continue
if mnemonic == "ne":
combine_binary(stack, "!=")
index += 1
continue
if mnemonic == "lt":
combine_binary(stack, "<")
index += 1
continue
if mnemonic == "le":
combine_binary(stack, "<=")
index += 1
continue
if mnemonic == "gt":
combine_binary(stack, ">")
index += 1
continue
if mnemonic == "ge":
combine_binary(stack, ">=")
index += 1
continue
if mnemonic == "not":
if stack:
expr, width = stack.pop()
stack.append((f"(!{expr})", width))
index += 1
continue
if mnemonic == "implies":
expr = stack.pop()[0] if stack else "retval"
stack.append((f"implies({expr}, 0x{operands['arg0']:X}, 0x{operands['arg1']:X})", 1))
index += 1
continue
if mnemonic == "pop_temp":
if stack:
stack.pop()
index += 1
continue
if mnemonic == "suspend":
block_lines.append("suspend;")
stack.clear()
index += 1
continue
if mnemonic == "jne":
target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}")
condition = stack.pop()[0] if stack else "condition"
block_lines.append(f"if {condition} goto {target};")
index += 1
continue
if mnemonic == "jmp":
target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}")
block_lines.append(f"goto {target};")
stack.clear()
index += 1
continue
if mnemonic in {"foreach_list", "foreach_slist"}:
target = label_map.get(ir["event"]["derived_body_start"] + operands["target_offset"], f"block_{ir['event']['derived_body_start'] + operands['target_offset']:04X}")
block_lines.append(
f"{mnemonic} {format_bp_name(operands['bp_offset'], local_name_map)} -> {target};"
)
index += 1
continue
if mnemonic == "ret":
block_lines.append("return;")
stack.clear()
break
if mnemonic.startswith("pop_local_") or mnemonic.startswith("pop_member_"):
if stack:
expr, _ = stack.pop()
else:
expr = "value"
target_name = format_bp_name(operands["bp_offset"], local_name_map)
block_lines.append(f"{target_name} = {expr};")
index += 1
continue
rendered_operands = format_generic_operands(operands, label_map, ir["event"]["derived_body_start"])
block_lines.append(f"/* {mnemonic} {rendered_operands} */")
index += 1
rendered_blocks.append((label, block_lines))
return rendered_blocks
@dataclass(frozen=True)
class TerminalStatement:
kind: str
condition: str | None = None
target: str | None = None
def parse_terminal_statement(statement: str) -> TerminalStatement | None:
if statement == "return;":
return TerminalStatement("return")
goto_match = re.fullmatch(r"goto ([A-Za-z0-9_]+);", statement)
if goto_match is not None:
return TerminalStatement("goto", target=goto_match.group(1))
if_match = re.fullmatch(r"if (.+) goto ([A-Za-z0-9_]+);", statement)
if if_match is not None:
return TerminalStatement("if", condition=if_match.group(1), target=if_match.group(2))
return None
def strip_outer_parens(expr: str) -> str:
text = expr.strip()
while text.startswith("(") and text.endswith(")"):
depth = 0
balanced = True
for index, char in enumerate(text):
if char == "(":
depth += 1
elif char == ")":
depth -= 1
if depth == 0 and index != len(text) - 1:
balanced = False
break
if depth < 0:
balanced = False
break
if not balanced or depth != 0:
break
text = text[1:-1].strip()
return text
def invert_condition_text(condition: str) -> str:
expr = strip_outer_parens(condition)
comparisons = {
" != ": " == ",
" == ": " != ",
" <= ": " > ",
" >= ": " < ",
" < ": " >= ",
" > ": " <= ",
}
for source, replacement in comparisons.items():
if source in expr:
return expr.replace(source, replacement, 1)
if expr.startswith("!"):
return strip_outer_parens(expr[1:])
if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_:.]*(\(.*\))?", expr):
return f"!{expr}"
return f"!({expr})"
def indent_lines(lines: list[str], prefix: str = " ") -> list[str]:
return [f"{prefix}{line}" if line else "" for line in lines]
def detect_noop_compare_chain(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
start_index: int,
end_index: int,
) -> int | None:
cursor = start_index
common_target: str | None = None
while cursor + 1 < end_index:
_, compare_statements = blocks[cursor]
_, goto_statements = blocks[cursor + 1]
if len(compare_statements) != 1 or len(goto_statements) != 1:
return None
compare_terminal = parse_terminal_statement(compare_statements[0])
goto_terminal = parse_terminal_statement(goto_statements[0])
if compare_terminal is None or compare_terminal.kind != "if":
return None
if goto_terminal is None or goto_terminal.kind != "goto":
return None
if common_target is None:
common_target = goto_terminal.target
elif goto_terminal.target != common_target:
return None
if compare_terminal.target == common_target:
body_index = label_to_index.get(common_target or "")
if body_index is None or body_index != cursor + 2 or body_index >= end_index:
return None
return body_index
next_index = label_to_index.get(compare_terminal.target or "")
if next_index is None or next_index != cursor + 2 or next_index >= end_index:
return None
cursor += 2
return None
def last_nonempty_block_index(
blocks: list[tuple[str, list[str]]],
start_index: int,
end_index: int,
) -> int | None:
for index in range(end_index - 1, start_index - 1, -1):
if blocks[index][1]:
return index
return None
def parse_selector_condition(condition: str) -> tuple[str, str] | None:
expr = strip_outer_parens(condition)
match = re.fullmatch(r"(.+?)\s*!=\s*(.+)", expr)
if match is None:
return None
return match.group(1).strip(), match.group(2).strip()
def parse_loop_selector_statement(statement: str) -> str | None:
match = re.fullmatch(r"/\* loop_selector (.+) \*/", statement)
if match is None:
return None
return match.group(1)
def is_loop_selector_only_block(statements: list[str]) -> bool:
return len(statements) == 1 and parse_loop_selector_statement(statements[0]) is not None
def render_selector_chain(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
start_index: int,
end_index: int,
return_labels: set[str],
active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None,
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None,
) -> tuple[list[str], int] | None:
if not blocks[start_index][1]:
return None
base_terminal = parse_terminal_statement(blocks[start_index][1][-1])
if base_terminal is None or base_terminal.kind != "if":
return None
selector = parse_selector_condition(base_terminal.condition or "")
if selector is None:
return None
selector_expr, _ = selector
cursor = start_index
join_label: str | None = None
branches: list[tuple[str, list[str]]] = []
while cursor < end_index:
_, statements = blocks[cursor]
if not statements:
return None
terminal = parse_terminal_statement(statements[-1])
if terminal is None or terminal.kind != "if":
return None
parsed = parse_selector_condition(terminal.condition or "")
if parsed is None or parsed[0] != selector_expr:
return None
target_label = terminal.target or ""
target_index = label_to_index.get(target_label)
if target_index is None or target_index <= cursor + 1 or target_index > end_index:
return None
body_tail_index = last_nonempty_block_index(blocks, cursor + 1, target_index)
if body_tail_index is None:
return None
body_tail_terminal = parse_terminal_statement(blocks[body_tail_index][1][-1])
if body_tail_terminal is None or body_tail_terminal.kind != "goto":
return None
current_join = body_tail_terminal.target or ""
current_join_index = label_to_index.get(current_join)
if current_join_index is None or current_join_index > end_index:
return None
if current_join_index < target_index:
return None
if current_join_index == target_index and target_label != current_join:
return None
if join_label is None:
join_label = current_join
elif current_join != join_label:
return None
body_result = render_structured_region(
blocks,
label_to_index,
cursor + 1,
target_index,
return_labels,
{join_label},
active_regions,
render_cache,
)
if body_result is None:
return None
body_lines, _ = body_result
branches.append((invert_condition_text(terminal.condition or "condition"), body_lines))
if target_label == join_label:
break
cursor = target_index
if join_label is None:
return None
rendered: list[str] = []
for index, (condition, body_lines) in enumerate(branches):
branch_head = "if" if index == 0 else "else if"
rendered.append(f"{branch_head} ({condition}) {{")
rendered.extend(indent_lines(body_lines))
rendered.append("}")
return rendered, label_to_index[join_label]
def render_loop_construct(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
index: int,
end_index: int,
return_labels: set[str],
active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None,
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None,
) -> tuple[list[str], int] | None:
_, statements = blocks[index]
if not statements:
return None
terminal = parse_terminal_statement(statements[-1])
if terminal is None or terminal.kind != "if":
return None
target_label = terminal.target or ""
target_index = label_to_index.get(target_label)
if target_index is None or target_index <= index or target_index > end_index:
return None
loop_tail_index = last_nonempty_block_index(blocks, index + 1, target_index)
if loop_tail_index is None:
return None
loop_tail_terminal = parse_terminal_statement(blocks[loop_tail_index][1][-1])
if loop_tail_terminal is None or loop_tail_terminal.kind != "goto" or loop_tail_terminal.target != blocks[index][0]:
return None
loop_body = render_structured_region(
blocks,
label_to_index,
index + 1,
target_index,
return_labels,
{blocks[index][0]},
active_regions,
render_cache,
)
if loop_body is None:
return None
loop_lines, _ = loop_body
loop_selector = None
if index > 0 and is_loop_selector_only_block(blocks[index - 1][1]):
loop_selector = parse_loop_selector_statement(blocks[index - 1][1][0])
rendered: list[str] = []
if loop_selector is not None:
rendered.append(f"for {loop_selector} {{")
else:
rendered.append(f"while ({invert_condition_text(terminal.condition or 'condition')}) {{")
rendered.extend(indent_lines(loop_lines))
rendered.append("}")
return rendered, target_index
def render_infinite_loop_construct(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
index: int,
end_index: int,
return_labels: set[str],
active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None,
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None,
) -> tuple[list[str], int] | None:
if index + 1 >= end_index:
return None
loop_label = blocks[index][0]
loop_tail_index: int | None = None
for candidate in range(end_index - 1, index, -1):
statements = blocks[candidate][1]
if not statements:
continue
terminal = parse_terminal_statement(statements[-1])
if terminal is not None and terminal.kind == "goto" and terminal.target == loop_label:
loop_tail_index = candidate
break
if loop_tail_index is None:
return None
loop_body = render_structured_region(
blocks,
label_to_index,
index,
loop_tail_index + 1,
return_labels,
{loop_label},
active_regions,
render_cache,
)
if loop_body is None:
return None
loop_lines, _ = loop_body
rendered = ["while (true) {"]
rendered.extend(indent_lines(loop_lines))
rendered.append("}")
return rendered, loop_tail_index + 1
def render_structured_region(
blocks: list[tuple[str, list[str]]],
label_to_index: dict[str, int],
start_index: int,
end_index: int,
return_labels: set[str],
exit_labels: set[str] | None = None,
active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None,
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None,
) -> tuple[list[str], bool] | None:
region_key = (start_index, end_index, tuple(sorted(exit_labels or ())))
if render_cache is not None and region_key in render_cache:
return render_cache[region_key]
if active_regions is None:
active_regions = set()
elif region_key in active_regions:
return None
active_regions = set(active_regions)
active_regions.add(region_key)
allowed_exit_labels = set(exit_labels or ())
lines: list[str] = []
index = start_index
while index < end_index:
skipped_index = detect_noop_compare_chain(blocks, label_to_index, index, end_index)
if skipped_index is not None:
index = skipped_index
continue
_, statements = blocks[index]
if not statements:
index += 1
continue
if is_loop_selector_only_block(statements):
index += 1
continue
terminal = parse_terminal_statement(statements[-1])
if terminal is None:
lines.extend(statements)
index += 1
continue
lines.extend(statements[:-1])
if terminal.kind == "return":
lines.append("return;")
return lines, False
if terminal.kind == "goto":
target_label = terminal.target or ""
target_index = label_to_index.get(target_label)
if target_label in return_labels:
lines.append("return;")
return lines, False
if target_label in allowed_exit_labels:
return lines, False
if target_index is None:
return None
if target_index == index + 1:
index += 1
continue
if index < target_index < end_index:
index = target_index
continue
return None
target_label = terminal.target or ""
target_index = label_to_index.get(target_label)
if target_index is None or target_index <= index or target_index > end_index:
return None
if target_index == index + 1:
index += 1
continue
selector_chain = render_selector_chain(
blocks,
label_to_index,
index,
end_index,
return_labels,
active_regions,
render_cache,
)
if selector_chain is not None:
selector_lines, selector_join_index = selector_chain
lines.extend(selector_lines)
index = selector_join_index
continue
loop_construct = render_loop_construct(
blocks,
label_to_index,
index,
end_index,
return_labels,
active_regions,
render_cache,
)
if loop_construct is not None:
loop_lines, loop_join_index = loop_construct
lines.extend(loop_lines)
index = loop_join_index
continue
true_tail_index = last_nonempty_block_index(blocks, index + 1, target_index)
if true_tail_index is not None:
true_tail_terminal = parse_terminal_statement(blocks[true_tail_index][1][-1])
if true_tail_terminal is not None and true_tail_terminal.kind == "goto":
join_label = true_tail_terminal.target or ""
join_index = label_to_index.get(join_label)
if join_index is not None and join_index > target_index and join_index <= end_index:
true_result = render_structured_region(
blocks,
label_to_index,
index + 1,
target_index,
return_labels,
{join_label},
active_regions,
render_cache,
)
false_result = render_structured_region(
blocks,
label_to_index,
target_index,
join_index,
return_labels,
{join_label},
active_regions,
render_cache,
)
if true_result is not None and false_result is not None:
true_lines, _ = true_result
false_lines, _ = false_result
lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{")
lines.extend(indent_lines(true_lines))
lines.append("}")
if false_lines:
if false_lines[0].startswith("if "):
lines.append(f"else {false_lines[0]}")
lines.extend(false_lines[1:])
else:
lines.append("else {")
lines.extend(indent_lines(false_lines))
lines.append("}")
index = join_index
continue
inner_result = render_structured_region(
blocks,
label_to_index,
index + 1,
target_index,
return_labels,
None,
active_regions,
render_cache,
)
if inner_result is None:
if render_cache is not None:
render_cache[region_key] = None
return None
inner_lines, inner_falls_through = inner_result
if inner_lines:
lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{")
lines.extend(indent_lines(inner_lines))
lines.append("}")
elif not inner_falls_through:
lines.append(f"if ({invert_condition_text(terminal.condition or 'condition')}) {{")
lines.append("}")
index = target_index
result = (lines, True)
if render_cache is not None:
render_cache[region_key] = result
return result
def render_structured_pseudocode(blocks: list[tuple[str, list[str]]]) -> list[str] | None:
if not blocks:
return []
label_to_index = {label: index for index, (label, _) in enumerate(blocks)}
return_labels = {
label
for label, statements in blocks
if len(statements) == 1 and statements[0] == "return;"
}
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] = {}
structured = render_structured_region(blocks, label_to_index, 0, len(blocks), return_labels, None, None, render_cache)
if structured is None:
return None
return structured[0]
def render_partially_structured_blocks(blocks: list[tuple[str, list[str]]]) -> list[str]:
if not blocks:
return []
label_to_index = {label: index for index, (label, _) in enumerate(blocks)}
return_labels = {
label
for label, statements in blocks
if len(statements) == 1 and statements[0] == "return;"
}
lines: list[str] = []
index = 0
while index < len(blocks):
label, statements = blocks[index]
if is_loop_selector_only_block(statements):
loop_selector = parse_loop_selector_statement(statements[0])
if loop_selector is not None and index + 1 < len(blocks):
next_label, next_statements = blocks[index + 1]
next_terminal = parse_terminal_statement(next_statements[-1]) if next_statements else None
if next_terminal is not None and next_terminal.kind == "if":
target_index = label_to_index.get(next_terminal.target or "")
if target_index is not None and target_index > index + 1:
loop_tail_index = last_nonempty_block_index(blocks, index + 2, target_index)
if loop_tail_index is not None:
loop_tail_terminal = parse_terminal_statement(blocks[loop_tail_index][1][-1])
if loop_tail_terminal is not None and loop_tail_terminal.kind == "goto" and loop_tail_terminal.target == next_label:
loop_body = render_structured_region(
blocks,
label_to_index,
index + 2,
target_index,
return_labels,
{next_label},
)
if loop_body is not None:
loop_lines, _ = loop_body
lines.append(f" {label}:")
lines.append(f" for {loop_selector} {{")
lines.extend(f" {line}" for line in indent_lines(loop_lines))
lines.append(" }")
lines.append("")
index = target_index
continue
lines.append(f" {label}:")
lines.append(f" {statements[0]}")
lines.append("")
index += 1
continue
selector_chain = render_selector_chain(blocks, label_to_index, index, len(blocks), return_labels)
if selector_chain is not None:
selector_lines, selector_join_index = selector_chain
lines.append(f" {label}:")
for statement in selector_lines:
lines.append(f" {statement}" if statement else "")
lines.append("")
index = selector_join_index
continue
loop_construct = render_loop_construct(
blocks,
label_to_index,
index,
len(blocks),
return_labels,
)
if loop_construct is not None:
loop_lines, loop_join_index = loop_construct
lines.append(f" {label}:")
for statement in loop_lines:
lines.append(f" {statement}" if statement else "")
lines.append("")
index = loop_join_index
continue
infinite_loop_construct = render_infinite_loop_construct(
blocks,
label_to_index,
index,
len(blocks),
return_labels,
)
if infinite_loop_construct is not None:
loop_lines, loop_join_index = infinite_loop_construct
lines.append(f" {label}:")
for statement in loop_lines:
lines.append(f" {statement}" if statement else "")
lines.append("")
index = loop_join_index
continue
lines.append(f" {label}:")
for statement in statements:
lines.append(f" {statement}")
lines.append("")
index += 1
return lines
def render_pseudocode(ir: dict[str, Any], shape_catalog: ShapeCatalog | None = None) -> str:
slot_name = sanitize_identifier(ir["event"]["event_name_hint"] or f"slot_{ir['event']['slot']:02X}")
lines = [
(
f"function {sanitize_identifier(ir['class']['class_name'].lower())}_{slot_name}() "
f"/* entry={ir['class']['entry_index']} class_id=0x{ir['class']['class_id']:04X} slot=0x{ir['event']['slot']:02X} */"
),
"{",
]
if ir["debug_symbols"]:
lines.append(" var")
for index, symbol in enumerate(ir["debug_symbols"]):
separator = "," if index + 1 < len(ir["debug_symbols"]) else ";"
lines.append(f" {sanitize_identifier(symbol['name'])}{separator} /* {symbol['bp_repr']} type=0x{symbol['type_id']:02X} */")
lines.append("")
rendered_blocks = decompile_pseudocode_blocks(ir)
structured_lines = render_structured_pseudocode(rendered_blocks)
if structured_lines is not None:
for statement in structured_lines:
lines.append(f" {statement}" if statement else "")
else:
lines.extend(render_partially_structured_blocks(rendered_blocks))
lines.append("}")
return apply_shape_catalog_to_pseudocode("\n".join(lines) + "\n", shape_catalog)
def validate_pseudocode_text(text: str) -> list[str]:
errors: list[str] = []
label_lines: dict[str, int] = {}
goto_targets: list[tuple[str, int]] = []
brace_depth = 0
for line_number, raw_line in enumerate(text.splitlines(), start=1):
stripped = raw_line.strip()
if not stripped:
continue
if stripped.endswith("{"):
brace_depth += 1
if stripped == "}":
brace_depth -= 1
if brace_depth < 0:
errors.append(f"line {line_number}: unexpected closing brace")
brace_depth = 0
label_match = re.fullmatch(r"([A-Za-z_][A-Za-z0-9_]*):", stripped)
if label_match is not None:
label = label_match.group(1)
previous_line = label_lines.get(label)
if previous_line is not None:
errors.append(f"line {line_number}: duplicate label {label} (first at line {previous_line})")
else:
label_lines[label] = line_number
for match in re.finditer(r"\bgoto ([A-Za-z_][A-Za-z0-9_]*)\s*;", stripped):
goto_targets.append((match.group(1), line_number))
if brace_depth != 0:
errors.append(f"unbalanced braces: final depth {brace_depth}")
for target, line_number in goto_targets:
if target not in label_lines:
errors.append(f"line {line_number}: goto target {target} has no label")
return errors
def render_text(ir: dict[str, Any]) -> str:
labels = build_listing_labels(ir)
def format_operand(key: str, value: Any) -> str:
if value is None or value == "":
return ""
if key == "intrinsic_name_hint" and value:
return f"hint={value}"
if key == "target_event_name_hint" and value:
return f"event={value}"
if key == "target_offset" and isinstance(value, int):
label = labels.get(value + ir["event"]["derived_body_start"])
return f"->{label or f'0x{value + ir['event']['derived_body_start']:04X}'}"
if isinstance(value, int):
if key.endswith("_signed"):
return f"{key}={value}"
return f"{key}=0x{value:X}"
return f"{key}={value}"
lines = [
f"Class {ir['class']['class_name']} entry={ir['class']['entry_index']} class_id=0x{ir['class']['class_id']:X}",
f"Slot 0x{ir['event']['slot']:02X} hint={ir['event']['event_name_hint']} raw_word=0x{ir['event']['raw_event_entry_word']:04X} raw_code_off=0x{ir['event']['raw_code_offset']:08X}",
f"Body 0x{ir['event']['derived_body_start']:04X}..0x{ir['event']['derived_body_end']:04X} len={ir['event']['derived_body_length']} end={ir['body']['end_reason']} ops={ir['body']['decoded_op_count']}",
f"SHA1 {ir['body']['raw_body_sha1']}",
"",
]
for op in ir["ops"]:
absolute_offset = op["absolute_body_offset"]
label = labels.get(absolute_offset)
if label is not None:
lines.extend(["", f"{label}:"])
operand_items = [formatted for key, value in op["operands"].items() if (formatted := format_operand(key, value))]
lines.append(f"{absolute_offset:04X}: {op['opcode']:02X} {op['mnemonic']:<24} {' '.join(operand_items)} raw={op['raw_bytes']}")
if ir["debug_symbols"]:
lines.extend(["", f"Debug symbols @ 0x{ir['body']['debug_symbol_offset']:04X}:"])
for symbol in ir["debug_symbols"]:
lines.append(
f" {symbol['index']:02X}: unk1=0x{symbol['unknown1']:02X} type=0x{symbol['type_id']:02X} ('{symbol['type_char']}') {symbol['bp_repr']} unk3=0x{symbol['unknown3']:02X} name={symbol['name']}"
)
if ir["field_tags"]:
lines.extend(["", "Field tags:"])
for tag in ir["field_tags"]:
lines.append(
f" {tag['tag_label']} ({tag['bp_repr']})"
)
if ir["body"]["unknown_trailing_bytes"]:
lines.extend(["", f"unknown_trailing_bytes={ir['body']['unknown_trailing_bytes']}"])
return "\n".join(lines) + "\n"
def main() -> None:
parser = argparse.ArgumentParser(description="Proof-of-concept Crusader USECODE parser over extracted owner-loaded artifacts")
parser.add_argument("--class", dest="class_name", required=True, help="Class name from class_event_index.tsv, for example NPCTRIG")
parser.add_argument("--slot", required=True, help="Event slot, for example 0x0A")
parser.add_argument("--extracted-root", default=str(EXTRACTED_ROOT), help="Extracted USECODE root containing class_event_index.tsv and chunks/")
parser.add_argument("--variant", choices=["auto", "regret", "remorse"], default="auto", help="Crusader intrinsic numbering to apply (default: auto, fallback regret)")
parser.add_argument(
"--shape-csv",
help=(
"Shape catalog CSV to apply to pseudocode output "
"(default: Remorse uses <extracted-root>/usecode_shape_catalog_remorse.csv; "
"Regret uses <extracted-root>/usecode_shape_catalog_regret.csv)"
),
)
parser.add_argument("--output", help="Write IR JSON to this file instead of stdout")
parser.add_argument("--emit-text", action="store_true", help="Emit a readable text listing beside the JSON")
parser.add_argument("--text-output", help="Write the text listing to this file")
parser.add_argument("--emit-script", action="store_true", help="Emit a decompiled script-style view beside the JSON")
parser.add_argument("--script-output", help="Write the script-style decompilation to this file")
parser.add_argument("--emit-pseudocode", action="store_true", help="Emit a higher-level pseudocode view beside the JSON")
parser.add_argument("--pseudocode-output", help="Write the pseudocode view to this file")
parser.add_argument("--family-diff", action="store_true", help="Emit repeated-body family diff report instead of (or alongside) the IR")
parser.add_argument("--family-diff-output", help="Write the family diff JSON to this file")
parser.add_argument("--family-diff-text-output", help="Write the family diff text report to this file")
args = parser.parse_args()
slot = parse_int(args.slot)
extracted_root = Path(args.extracted_root)
shape_csv = Path(args.shape_csv) if args.shape_csv else default_shape_catalog_path(extracted_root, args.variant)
shape_catalog = load_shape_catalog(shape_csv)
event_row, layout_row = select_rows(args.class_name, slot, extracted_root)
ir = parse_body_ir(event_row, layout_row, None if args.variant == "auto" else args.variant, extracted_root)
rendered_json = json.dumps(ir, indent=2)
if args.output:
Path(args.output).write_text(rendered_json + "\n", encoding="utf-8")
else:
print(rendered_json)
if args.emit_text:
rendered_text = render_text(ir)
if args.text_output:
Path(args.text_output).write_text(rendered_text, encoding="utf-8")
else:
print(rendered_text)
if args.emit_script:
rendered_script = render_script(ir)
if args.script_output:
Path(args.script_output).write_text(rendered_script, encoding="utf-8")
else:
print(rendered_script)
if args.emit_pseudocode:
rendered_pseudocode = render_pseudocode(ir, shape_catalog=shape_catalog)
if args.pseudocode_output:
Path(args.pseudocode_output).write_text(rendered_pseudocode, encoding="utf-8")
else:
print(rendered_pseudocode)
if args.family_diff:
diff = compute_family_diff(args.class_name, slot, extracted_root)
diff_json = json.dumps(diff, indent=2)
if args.family_diff_output:
Path(args.family_diff_output).write_text(diff_json + "\n", encoding="utf-8")
else:
print(diff_json)
diff_text = render_family_diff_text(diff)
if args.family_diff_text_output:
Path(args.family_diff_text_output).write_text(diff_text, encoding="utf-8")
else:
print(diff_text)
if __name__ == "__main__":
main()