#!/usr/bin/env python3 """Extractor for Crusader's EUSECODE.FLX container. Current validated layout: - 0x80-byte header area - little-endian entry count at file offset 0x54 - entry table begins at 0x80 - each entry is 8 bytes: The exact semantics of the payload records are still under RE, so the extractor dumps all non-zero entries and emits human-readable sidecars (.strings.txt and index files) to support the next decoding pass. """ from __future__ import annotations import argparse import hashlib import json import pathlib import struct from dataclasses import asdict, dataclass DEFAULT_INPUT = pathlib.Path(r"k:\ghidra\Crusader_Decomp\USECODE\EUSECODE.FLX") DEFAULT_OUTPUT = pathlib.Path(r"k:\ghidra\Crusader_Decomp\USECODE\EUSECODE_extracted") DEFAULT_VALIDATION_PROFILE = "auto" @dataclass(frozen=True) class CandidateEntry: table_offset: int data_offset: int declared_size: int @dataclass class ExtractedChunk: index: int table_offset: int object_index: int | None data_offset: int declared_size: int next_offset: int | None extracted_size: int overlap_with_next: bool text_like: bool printable_ratio: float zero_ratio: float preview: str raw_path: str strings_path: str text_path: str | None primary_label: str | None field_names: list[str] field_tags: list[str] class_id: int | None = None class_name_hint: str | None = None raw_code_base_u32: int | None = None code_base_minus_one: int | None = None conservative_event_count: int | None = None event_table_end: int | None = None class_parse_status: str | None = None @dataclass(frozen=True) class ClassEventRow: entry_index: int object_index: int class_id: int class_name_hint: str slot: int event_name_hint: str | None raw_event_entry_word: int raw_code_offset: int derived_body_start: int | None derived_body_end: int | None derived_body_length: int | None @dataclass(frozen=True) class FlxTable: entry_count: int table_offset: int table_end: int entries: list[CandidateEntry] @dataclass(frozen=True) class FamilyArtifactSpec: output_stem: str title: str labels: tuple[str, ...] @dataclass(frozen=True) class RepeatedFamilyRowExpectation: class_name: str slot: int raw_event_entry_word: int raw_code_offset: int derived_body_start: int derived_body_end: int derived_body_length: int repeated_template_status: str def read_u32_le(data: bytes, offset: int) -> int: return struct.unpack_from(" int: return struct.unpack_from(" str: preview = [] for byte in data[:limit]: if 0x20 <= byte <= 0x7E: preview.append(chr(byte)) else: preview.append(".") return "".join(preview) def printable_ratio(data: bytes) -> float: if not data: return 0.0 printable = sum(1 for byte in data if byte in (0x09, 0x0A, 0x0D) or 0x20 <= byte <= 0x7E) return printable / len(data) def zero_ratio(data: bytes) -> float: if not data: return 0.0 return data.count(0) / len(data) def iter_printable_runs(data: bytes, min_len: int = 4) -> list[str]: runs: list[str] = [] current = bytearray() for byte in data: if byte in (0x09, 0x0A, 0x0D) or 0x20 <= byte <= 0x7E: current.append(byte) continue if len(current) >= min_len: runs.append(current.decode("latin-1")) current.clear() if len(current) >= min_len: runs.append(current.decode("latin-1")) return runs def summarize_descriptor(strings: list[str]) -> tuple[str | None, list[str]]: label_counts: dict[str, int] = {} field_names: list[str] = [] seen_fields: set[str] = set() for value in strings: if value.isupper() and any(ch.isalpha() for ch in value): label_counts[value] = label_counts.get(value, 0) + 1 continue if value and value[0].islower() and value.replace("_", "").isalnum() and value not in seen_fields: seen_fields.add(value) field_names.append(value) primary_label = None if label_counts: primary_label = sorted(label_counts.items(), key=lambda item: (-item[1], item[0]))[0][0] return primary_label, field_names def extract_field_tag_records(data: bytes, field_names: list[str]) -> list[str]: tags: list[str] = [] seen: set[str] = set() for field_name in field_names: needle = field_name.encode("latin-1") start = 0 while True: pos = data.find(needle, start) if pos < 3: break tag = f"{data[pos - 3]:02X}:{data[pos - 2]:02X}{data[pos - 1]:02X}->{field_name}" if tag not in seen: seen.add(tag) tags.append(tag) start = pos + 1 tags.sort() return tags def has_referent_field(chunk: ExtractedChunk) -> bool: if "referent" in chunk.field_names: return True return any(tag.endswith("->referent") for tag in chunk.field_tags) def get_event_evidence(chunk: ExtractedChunk) -> list[str]: evidence: list[str] = [] seen: set[str] = set() for field_name in chunk.field_names: if "event" not in field_name.lower(): continue marker = f"field:{field_name}" if marker not in seen: seen.add(marker) evidence.append(marker) for field_tag in chunk.field_tags: if "->event" not in field_tag.lower(): continue marker = f"tag:{field_tag}" if marker not in seen: seen.add(marker) evidence.append(marker) return evidence def chunk_role(chunk: ExtractedChunk) -> str: if chunk.primary_label in {"JELYHACK", "JELYH2"}: return "referent-anchor" if get_event_evidence(chunk): return "event-bearing" if has_referent_field(chunk): return "referent-neighbor" return "neighbor" def has_event_trigger_field(chunk: ExtractedChunk) -> bool: if any("eventtrigger" == field_name.lower() for field_name in chunk.field_names): return True return any("->eventtrigger" in field_tag.lower() for field_tag in chunk.field_tags) def event_tag_kind(chunk: ExtractedChunk) -> str: if any("->eventtrigger" in field_tag.lower() for field_tag in chunk.field_tags): return "eventTrigger" if any(field_tag.lower().endswith("->event") for field_tag in chunk.field_tags): return "event" return "" def classify_event_family(chunk: ExtractedChunk) -> str: if event_tag_kind(chunk) == "eventTrigger": return "callback-eventtrigger" if event_tag_kind(chunk) != "event": return "" if chunk.primary_label == "EVENT": return "event-hub" if chunk.primary_label and chunk.primary_label.endswith("_BOOT"): return "boot-event-core" if chunk.field_names == ["referent", "event"]: return "minimal-event-core" if any(name in chunk.field_names for name in ("flame", "flame2", "fire", "fire2", "steam", "steam2")): return "environmental-event" if "typeNpc" in chunk.field_names: return "npc-trigger" def normalize_validation_profile(value: str | None) -> str: if value is None: return DEFAULT_VALIDATION_PROFILE normalized = value.strip().lower() if normalized not in {"auto", "remorse", "regret", "none"}: raise ValueError(f"unsupported validation profile: {value}") return normalized def infer_validation_profile(input_path: pathlib.Path, output_path: pathlib.Path) -> str: combined_parts = [part.upper() for part in (*input_path.parts, *output_path.parts)] if "REGRET" in combined_parts: return "regret" return "remorse" def resolve_validation_profile(input_path: pathlib.Path, output_path: pathlib.Path, requested: str | None) -> str: normalized = normalize_validation_profile(requested) if normalized == "auto": return infer_validation_profile(input_path, output_path) return normalized return "specialized-event" def readable_role(chunk: ExtractedChunk) -> str: family = classify_event_family(chunk) if family == "event-hub": return "hub" if chunk_role(chunk) == "referent-anchor": return "anchor" if event_tag_kind(chunk) == "event": return "attach" if event_tag_kind(chunk) == "eventTrigger": return "callback" if has_referent_field(chunk): return "neighbor" return "data" def clean_token(value: str) -> str: return " ".join(value.split()) def readable_subject(chunk: ExtractedChunk) -> str: label = clean_token(chunk.primary_label or f"chunk_{chunk.index}") fields = ",".join(clean_token(field_name) for field_name in chunk.field_names) or "?" return f"{label}({fields})" def readable_signature(chunk: ExtractedChunk) -> str: return f"{readable_role(chunk)} {readable_subject(chunk)}" VERIFIED_VM_IR_ROWS: tuple[dict[str, str], ...] = ( { "stage_address": "000d:0988", "ir_name": "APPEND_UNIQUE_INLINE", "opcode_or_lane": "opcode 0x18 (implied sibling)", "payload_shape": "inline referent-chain payload", "evidence": "0x19/0x1a/0x1b compares in 000d:0988 imply 0x18 as append-unique inline sibling", }, { "stage_address": "000d:0988", "ir_name": "APPEND_UNIQUE_INDIRECT", "opcode_or_lane": "opcode 0x19", "payload_shape": "indirect/string-like referent-chain payload", "evidence": "[BP-0x32] == 0x19 path in 000d:0988 with indirect-mode flag", }, { "stage_address": "000d:0988", "ir_name": "REMOVE_MATCHING_INDIRECT", "opcode_or_lane": "opcode 0x1a", "payload_shape": "indirect/string-like referent-chain payload", "evidence": "[BP-0x32] == 0x1a path in 000d:0988 reaches entity_vm_referent_chain_remove_matching_from", }, { "stage_address": "000d:0988", "ir_name": "REMOVE_MATCHING_INLINE", "opcode_or_lane": "opcode 0x1b", "payload_shape": "inline referent-chain payload", "evidence": "[BP-0x32] == 0x1b path in 000d:0988 reaches entity_vm_referent_chain_remove_matching_from without indirect-mode flag", }, { "stage_address": "000d:177c", "ir_name": "PUSH_FRAME_WORD_LITERAL", "opcode_or_lane": "same FUN_000d_ebe3 sequencer family", "payload_shape": "word scalar pushed to stream stack", "evidence": "000d:177c subtracts 2 from [context+0xcc] and stores frame-local word before entity_vm_opcode_finish", }, { "stage_address": "000d:1acb", "ir_name": "COMPARE_STREAM_DWORD_AND_PUSH_BOOL", "opcode_or_lane": "same FUN_000d_ebe3 sequencer family", "payload_shape": "stream dword pair consumed, predicate word emitted", "evidence": "000d:1acb reads one 32-bit pair from stream, compares against AX:DX, pushes boolean word result", }, { "stage_address": "000d:208b", "ir_name": "MATERIALIZE_OR_FORWARD_VALUE", "opcode_or_lane": "slot-backed context consumer", "payload_shape": "materialized slot value or forwarded object result", "evidence": "000d:208b builds one VM context then forwards immediate or object-backed value through shared epilogue", }, { "stage_address": "000d:21ed", "ir_name": "PREPEND_INLINE_PAYLOAD", "opcode_or_lane": "inline payload substage", "payload_shape": "caller-owned blob copied into context +0x102 buffer", "evidence": "000d:21ed prepends caller bytes into backward-growing context buffer before metadata-driven follow-on work", }, { "stage_address": "000d:22bc", "ir_name": "BUILD_ENTITY_LINK_MATRIX", "opcode_or_lane": "inline payload follow-on stage", "payload_shape": "two signed metadata bytes plus streamed entity/link ids", "evidence": "000d:22bc consumes two signed bytes from +0xd6/+0xd8 and streamed words for repeated entity_link calls", }, { "stage_address": "000d:22bc", "ir_name": "EMIT_OR_PUSHBACK_RESULT", "opcode_or_lane": "inline payload follow-on stage", "payload_shape": "stream writeback filter", "evidence": "000d:23da..2421 pushes back only results without 0x0400 list flag before opcode finish", }, { "stage_address": "000d:2104", "ir_name": "FINALIZE_MIXED_VALUE_TO_OUTPTR", "opcode_or_lane": "same FUN_000d_ebe3 sequencer family", "payload_shape": "mixed immediate/object scalar return", "evidence": "000d:2104 writes either frame-local dword or object word with high word cleared to caller out-ptr", }, ) VERIFIED_MASK_LADDER_ROWS: tuple[dict[str, str], ...] = ( { "wrapper_address": "0005:27a4", "mask_pair": "0x0001:0000", "caller_anchor": "000c:a09e entity +0x5b bit 0x0004 branch", "descriptor_bias": "generic active-event-biased lane; no direct class-id bridge", }, { "wrapper_address": "0005:2867", "mask_pair": "0x0002:0001", "caller_anchor": "stores result into entity field +0x39", "descriptor_bias": "active-event ecosystem bias stronger than referent-anchor bias", }, { "wrapper_address": "0005:2ae2", "mask_pair": "0x0004:0002", "caller_anchor": "same verified local mask ladder around entity_vm_context_try_create_masked_for_entity", "descriptor_bias": "active-event ecosystem bias stronger than referent-anchor bias", }, { "wrapper_address": "0005:2c9b", "mask_pair": "0x0010:0004", "caller_anchor": "direct callers at 0005:5946 and 0005:59e9", "descriptor_bias": "active-event ecosystem bias stronger than referent-anchor bias", }, { "wrapper_address": "0005:2918", "mask_pair": "0x0020:0005", "caller_anchor": "+0x3c == 0x20b object lane, carries caller fields +0x36/+0x38", "descriptor_bias": "candidate active-event field bridge; still not descriptor-specific", }, { "wrapper_address": "0005:2c06", "mask_pair": "0x0200:0009", "caller_anchor": "direct caller anchor at 0005:0292", "descriptor_bias": "active-event ecosystem bias stronger than referent-anchor bias", }, { "wrapper_address": "0005:2c35", "mask_pair": "0x0400:000a", "caller_anchor": "xref-dark signed slot-offset wrapper forwarding context +0x34", "descriptor_bias": "offset-specialized masked context creation; descriptor family unresolved", }, { "wrapper_address": "0005:2c68", "mask_pair": "0x0800:000b", "caller_anchor": "xref-dark signed slot-offset wrapper forwarding context +0x34", "descriptor_bias": "offset-specialized masked context creation; descriptor family unresolved", }, { "wrapper_address": "0005:2cd2", "mask_pair": "0x1000:000c", "caller_anchor": "direct caller anchor at 0005:0fee", "descriptor_bias": "active-event ecosystem bias stronger than referent-anchor bias", }, { "wrapper_address": "0004:f05c", "mask_pair": "0x2000:0015", "caller_anchor": "0004:f2b3 overlap/proximity branch with entity byte +0x32 state checks", "descriptor_bias": "gameplay-state lane feeding runtime materialization, not direct descriptor switch", }, { "wrapper_address": "0005:2d01", "mask_pair": "0x4000:000e", "caller_anchor": "direct callers at 0007:814e and 0007:822e", "descriptor_bias": "active-event ecosystem bias stronger than referent-anchor bias", }, { "wrapper_address": "0004:f033", "mask_pair": "0x8000:0007", "caller_anchor": "local wrapper seed recovered from direct instruction evidence", "descriptor_bias": "gameplay-side materialization lane; still descriptor-agnostic", }, { "wrapper_address": "0005:2d30", "mask_pair": "0x8000:000f", "caller_anchor": "entity id/class flag gate plus 0x0f16 / 0x20f dispatch-entry emission path", "descriptor_bias": "strongest current active-event ecosystem candidate in the ladder", }, ) SCUMMVM_EVENT_NAME_HINTS: tuple[str, ...] = ( "look", "use", "anim", "setActivity", "cachein", "hit", "gotHit", "hatch", "schedule", "release", "equip", "unequip", "combine", "func0D", "calledFromAnim", "enterFastArea", "leaveFastArea", "cast", "justMoved", "avatarStoleSomething", "animGetHit", "unhatch", "func16", "func17", "func18", "func19", "func1A", "func1B", "func1C", "func1D", "func1E", "func1F", ) VERIFIED_REPEATED_TEMPLATE_FAMILIES: tuple[tuple[str, tuple[str, ...]], ...] = ( ("referent-anchor-twin", ("JELYHACK", "JELYH2")), ("boot-event-core", ("AND_BOOT", "BRO_BOOT", "COR_BOOT", "REE_BOOT", "VAR_BOOT")), ("callback-eventtrigger", ("SURCAMNS", "SURCAMEW")), ("environmental-event", ("FLAMEBOX", "NOSTRIL", "STEAMBOX")), ) FAMILY_ARTIFACT_SPECS: tuple[FamilyArtifactSpec, ...] = ( FamilyArtifactSpec( output_stem="boot_family_decompile", title="_BOOT Family Decompiled Event Sketches", labels=("AND_BOOT", "BRO_BOOT", "COR_BOOT", "REE_BOOT", "VAR_BOOT"), ), FamilyArtifactSpec( output_stem="callback_family_decompile", title="SURCAM Callback Family Decompiled Event Sketches", labels=("SURCAMNS", "SURCAMEW"), ), FamilyArtifactSpec( output_stem="environmental_family_decompile", title="Environmental Family Decompiled Event Sketches", labels=("FLAMEBOX", "NOSTRIL", "STEAMBOX"), ), ) IMMORTALITY_TARGET_LABELS: tuple[str, ...] = ( "EVENT", "NPCTRIG", "COR_BOOT", "REE_BOOT", "SFXTRIG", "SPECIAL", "TRIGPAD", ) IMMORTALITY_TEMPLATE_COMPARE_LABELS: frozenset[str] = frozenset( {"NPCTRIG", "COR_BOOT", "REE_BOOT", "SFXTRIG"} ) IMMORTALITY_STRUCTURAL_TARGET_LABELS: frozenset[str] = frozenset({"EVENT", "NPCTRIG"}) IMMORTALITY_BODY_MOTIFS: tuple[tuple[str, bytes], ...] = ( ("call_40_06_4c_02", bytes.fromhex("40 06 4c 02")), ("call_40_06_0f_04", bytes.fromhex("40 06 0f 04")), ("subheader_53_5c", bytes.fromhex("53 5c")), ("writeback_57_02", bytes.fromhex("57 02")), ("branch_59_0a", bytes.fromhex("59 0a")), ("branch_3f_0a", bytes.fromhex("3f 0a")), ("field_4b_fe_0f", bytes.fromhex("4b fe 0f")), ("field_4b_fc_0f", bytes.fromhex("4b fc 0f")), ("push_24_51", bytes.fromhex("24 51")), ("event_field_69_0a_00", bytes.fromhex("69 0a 00")), ) VERIFIED_REPEATED_FAMILY_ROW_EXPECTATIONS_BY_PROFILE: dict[str, tuple[RepeatedFamilyRowExpectation, ...]] = { "remorse": ( RepeatedFamilyRowExpectation("JELYHACK", 0x01, 0x002A, 0x00000001, 0x00D4, 0x00FE, 42, "referent-anchor-twin/shared-slot-0x01/same-length-template"), RepeatedFamilyRowExpectation("JELYH2", 0x01, 0x002A, 0x00000001, 0x00D4, 0x00FE, 42, "referent-anchor-twin/shared-slot-0x01/same-length-template"), RepeatedFamilyRowExpectation("AND_BOOT", 0x0A, 0x0253, 0x00000001, 0x00D4, 0x0327, 595, "boot-event-core/shared-slot-0x0A/shared-slot-template"), RepeatedFamilyRowExpectation("AND_BOOT", 0x0F, 0x0237, 0x00000254, 0x0327, 0x055E, 567, "boot-event-core/shared-slot-0x0F/shared-slot-template"), RepeatedFamilyRowExpectation("AND_BOOT", 0x10, 0x003B, 0x0000048B, 0x055E, 0x0599, 59, "boot-event-core/shared-slot-0x10/same-length-template"), RepeatedFamilyRowExpectation("BRO_BOOT", 0x0A, 0x02D5, 0x00000001, 0x00D4, 0x03A9, 725, "boot-event-core/shared-slot-0x0A/shared-slot-template"), RepeatedFamilyRowExpectation("BRO_BOOT", 0x0F, 0x024C, 0x000002D6, 0x03A9, 0x05F5, 588, "boot-event-core/shared-slot-0x0F/shared-slot-template"), RepeatedFamilyRowExpectation("BRO_BOOT", 0x10, 0x003B, 0x00000522, 0x05F5, 0x0630, 59, "boot-event-core/shared-slot-0x10/same-length-template"), RepeatedFamilyRowExpectation("COR_BOOT", 0x0A, 0x0227, 0x00000001, 0x00D4, 0x02FB, 551, "boot-event-core/shared-slot-0x0A/shared-slot-template"), RepeatedFamilyRowExpectation("COR_BOOT", 0x0F, 0x0234, 0x00000228, 0x02FB, 0x052F, 564, "boot-event-core/shared-slot-0x0F/shared-slot-template"), RepeatedFamilyRowExpectation("COR_BOOT", 0x10, 0x003B, 0x0000045C, 0x052F, 0x056A, 59, "boot-event-core/shared-slot-0x10/same-length-template"), RepeatedFamilyRowExpectation("REE_BOOT", 0x0A, 0x034B, 0x00000001, 0x00D4, 0x041F, 843, "boot-event-core/shared-slot-0x0A/shared-slot-template"), RepeatedFamilyRowExpectation("REE_BOOT", 0x0F, 0x025C, 0x0000034C, 0x041F, 0x067B, 604, "boot-event-core/shared-slot-0x0F/shared-slot-template"), RepeatedFamilyRowExpectation("REE_BOOT", 0x10, 0x003B, 0x000005A8, 0x067B, 0x06B6, 59, "boot-event-core/shared-slot-0x10/same-length-template"), RepeatedFamilyRowExpectation("VAR_BOOT", 0x0A, 0x029A, 0x00000001, 0x00D4, 0x036E, 666, "boot-event-core/shared-slot-0x0A/shared-slot-template"), RepeatedFamilyRowExpectation("VAR_BOOT", 0x0F, 0x0244, 0x0000029B, 0x036E, 0x05B2, 580, "boot-event-core/shared-slot-0x0F/shared-slot-template"), RepeatedFamilyRowExpectation("VAR_BOOT", 0x10, 0x003B, 0x000004DF, 0x05B2, 0x05ED, 59, "boot-event-core/shared-slot-0x10/same-length-template"), RepeatedFamilyRowExpectation("SURCAMNS", 0x01, 0x0051, 0x000000D2, 0x01B7, 0x0208, 81, "callback-eventtrigger/shared-slot-0x01/shared-slot-template"), RepeatedFamilyRowExpectation("SURCAMNS", 0x0A, 0x00D1, 0x00000001, 0x00E6, 0x01B7, 209, "callback-eventtrigger/shared-slot-0x0A/same-length-template"), RepeatedFamilyRowExpectation("SURCAMNS", 0x20, 0x02BA, 0x00000123, 0x0208, 0x04C2, 698, "callback-eventtrigger/shared-slot-0x20/same-length-template"), RepeatedFamilyRowExpectation("SURCAMNS", 0x21, 0x0709, 0x000003DD, 0x04C2, 0x0BCB, 1801, "callback-eventtrigger/shared-slot-0x21/shared-slot-template"), RepeatedFamilyRowExpectation("SURCAMNS", 0x22, 0x01A3, 0x00000AE6, 0x0BCB, 0x0D6E, 419, "callback-eventtrigger/shared-slot-0x22/same-length-template"), RepeatedFamilyRowExpectation("SURCAMEW", 0x01, 0x00F7, 0x000000D2, 0x01B7, 0x02AE, 247, "callback-eventtrigger/shared-slot-0x01/shared-slot-template"), RepeatedFamilyRowExpectation("SURCAMEW", 0x0A, 0x00D1, 0x00000001, 0x00E6, 0x01B7, 209, "callback-eventtrigger/shared-slot-0x0A/same-length-template"), RepeatedFamilyRowExpectation("SURCAMEW", 0x20, 0x02BA, 0x000001C9, 0x02AE, 0x0568, 698, "callback-eventtrigger/shared-slot-0x20/same-length-template"), RepeatedFamilyRowExpectation("SURCAMEW", 0x21, 0x0655, 0x00000483, 0x0568, 0x0BBD, 1621, "callback-eventtrigger/shared-slot-0x21/shared-slot-template"), RepeatedFamilyRowExpectation("SURCAMEW", 0x22, 0x01A3, 0x00000AD8, 0x0BBD, 0x0D60, 419, "callback-eventtrigger/shared-slot-0x22/same-length-template"), RepeatedFamilyRowExpectation("FLAMEBOX", 0x0A, 0x026A, 0x00000001, 0x00E0, 0x034A, 618, "environmental-event/shared-slot-0x0A/shared-slot-template"), RepeatedFamilyRowExpectation("FLAMEBOX", 0x20, 0x01AC, 0x0000026B, 0x034A, 0x04F6, 428, "environmental-event/shared-slot-0x20/shared-slot-template"), RepeatedFamilyRowExpectation("FLAMEBOX", 0x21, 0x029A, 0x00000417, 0x04F6, 0x0790, 666, "environmental-event/shared-slot-0x21/shared-slot-template"), RepeatedFamilyRowExpectation("NOSTRIL", 0x0A, 0x00C0, 0x00000001, 0x00E0, 0x01A0, 192, "environmental-event/shared-slot-0x0A/shared-slot-template"), RepeatedFamilyRowExpectation("NOSTRIL", 0x20, 0x0129, 0x000000C1, 0x01A0, 0x02C9, 297, "environmental-event/shared-slot-0x20/shared-slot-template"), RepeatedFamilyRowExpectation("NOSTRIL", 0x21, 0x01BE, 0x000001EA, 0x02C9, 0x0487, 446, "environmental-event/shared-slot-0x21/shared-slot-template"), RepeatedFamilyRowExpectation("STEAMBOX", 0x0A, 0x0266, 0x00000001, 0x00E0, 0x0346, 614, "environmental-event/shared-slot-0x0A/shared-slot-template"), RepeatedFamilyRowExpectation("STEAMBOX", 0x20, 0x01F6, 0x00000267, 0x0346, 0x053C, 502, "environmental-event/shared-slot-0x20/shared-slot-template"), RepeatedFamilyRowExpectation("STEAMBOX", 0x21, 0x02A7, 0x0000045D, 0x053C, 0x07E3, 679, "environmental-event/shared-slot-0x21/shared-slot-template"), ), "regret": ( RepeatedFamilyRowExpectation("JELYHACK", 0x01, 0x000D, 0x00000001, 0x00D4, 0x00E1, 13, ""), RepeatedFamilyRowExpectation("JELYH2", 0x01, 0x000D, 0x00000001, 0x00D4, 0x00E1, 13, ""), RepeatedFamilyRowExpectation("FLAMEBOX", 0x0A, 0x01D8, 0x00000001, 0x00E0, 0x02B8, 472, ""), RepeatedFamilyRowExpectation("FLAMEBOX", 0x20, 0x011D, 0x000001D9, 0x02B8, 0x03D5, 285, ""), RepeatedFamilyRowExpectation("FLAMEBOX", 0x21, 0x01BD, 0x000002F6, 0x03D5, 0x0592, 445, ""), RepeatedFamilyRowExpectation("NOSTRIL", 0x0A, 0x007B, 0x00000001, 0x00E0, 0x015B, 123, ""), RepeatedFamilyRowExpectation("NOSTRIL", 0x20, 0x00BE, 0x0000007C, 0x015B, 0x0219, 190, ""), RepeatedFamilyRowExpectation("NOSTRIL", 0x21, 0x013B, 0x0000013A, 0x0219, 0x0354, 315, ""), RepeatedFamilyRowExpectation("STEAMBOX", 0x0A, 0x01D7, 0x00000001, 0x00E0, 0x02B7, 471, ""), RepeatedFamilyRowExpectation("STEAMBOX", 0x20, 0x014F, 0x000001D8, 0x02B7, 0x0406, 335, ""), RepeatedFamilyRowExpectation("STEAMBOX", 0x21, 0x01CD, 0x00000327, 0x0406, 0x05D3, 461, ""), ), } def scummvm_event_name_hint(slot: int) -> str | None: if 0 <= slot < len(SCUMMVM_EVENT_NAME_HINTS): return SCUMMVM_EVENT_NAME_HINTS[slot] return None def object_index_from_table_offset(table_offset: int) -> int | None: if table_offset < 0x80: return None relative = table_offset - 0x80 if relative % 8 != 0: return None return relative // 8 def decode_name_table_entry(name_table_data: bytes, class_id: int) -> str | None: if class_id < 0: return None name_offset = 4 + 13 * class_id if name_offset + 13 > len(name_table_data): return None raw_name = name_table_data[name_offset:name_offset + 13] raw_name = raw_name.split(b"\x00", 1)[0].rstrip(b"\x00 ") if not raw_name: return None return raw_name.decode("latin-1", errors="replace") def annotate_class_layout(chunks: list[ExtractedChunk]) -> None: name_table_data: bytes | None = None for chunk in chunks: chunk.object_index = object_index_from_table_offset(chunk.table_offset) if chunk.object_index == 1: name_table_data = pathlib.Path(chunk.raw_path).read_bytes() chunk.class_parse_status = "name-table" for chunk in chunks: if chunk.object_index is None: chunk.class_parse_status = chunk.class_parse_status or "unaligned-table-offset" continue if chunk.object_index < 2: chunk.class_parse_status = chunk.class_parse_status or "non-class-object" continue chunk.class_id = chunk.object_index - 2 if name_table_data is not None: chunk.class_name_hint = decode_name_table_entry(name_table_data, chunk.class_id) raw_data = pathlib.Path(chunk.raw_path).read_bytes() if len(raw_data) < 20: chunk.class_parse_status = "too-small-for-class-header" continue raw_code_base_u32 = read_u32_le(raw_data, 8) chunk.raw_code_base_u32 = raw_code_base_u32 if raw_code_base_u32 > 0: chunk.code_base_minus_one = raw_code_base_u32 - 1 event_region = raw_code_base_u32 - 20 if event_region < 0: chunk.class_parse_status = "header-before-event-table" continue if event_region % 6 != 0: chunk.class_parse_status = "event-region-not-divisible-by-6" continue event_count = event_region // 6 event_table_end = 20 + event_count * 6 if event_table_end > len(raw_data): chunk.class_parse_status = "event-table-past-object-end" continue chunk.conservative_event_count = event_count chunk.event_table_end = event_table_end chunk.class_parse_status = "parsed-class-layout" def derive_class_event_rows(chunk: ExtractedChunk, raw_data: bytes) -> list[ClassEventRow]: if chunk.class_parse_status != "parsed-class-layout": return [] if chunk.object_index is None or chunk.class_id is None or chunk.conservative_event_count is None: return [] provisional_rows: list[tuple[int, int, int]] = [] for slot in range(chunk.conservative_event_count): entry_offset = 20 + 6 * slot raw_word = read_u16_le(raw_data, entry_offset) raw_code_offset = read_u32_le(raw_data, entry_offset + 2) provisional_rows.append((slot, raw_word, raw_code_offset)) non_zero_offsets = sorted( { raw_code_offset for _, _, raw_code_offset in provisional_rows if raw_code_offset != 0 } ) rows: list[ClassEventRow] = [] for slot, raw_word, raw_code_offset in provisional_rows: derived_body_start: int | None = None derived_body_end: int | None = None derived_body_length: int | None = None if raw_code_offset != 0 and chunk.code_base_minus_one is not None: body_start = chunk.code_base_minus_one + raw_code_offset next_offsets = [offset for offset in non_zero_offsets if offset > raw_code_offset] body_end = chunk.code_base_minus_one + next_offsets[0] if next_offsets else len(raw_data) if 0 <= body_start <= body_end <= len(raw_data): derived_body_start = body_start derived_body_end = body_end derived_body_length = body_end - body_start rows.append( ClassEventRow( entry_index=chunk.index, object_index=chunk.object_index, class_id=chunk.class_id, class_name_hint=chunk.class_name_hint or "", slot=slot, event_name_hint=scummvm_event_name_hint(slot), raw_event_entry_word=raw_word, raw_code_offset=raw_code_offset, derived_body_start=derived_body_start, derived_body_end=derived_body_end, derived_body_length=derived_body_length, ) ) return rows def build_class_event_rows( parsed_class_chunks: list[ExtractedChunk], ) -> tuple[list[ClassEventRow], dict[int, list[ClassEventRow]], dict[int, bytes]]: all_rows: list[ClassEventRow] = [] rows_by_entry: dict[int, list[ClassEventRow]] = {} raw_data_by_entry: dict[int, bytes] = {} for chunk in parsed_class_chunks: raw_data = pathlib.Path(chunk.raw_path).read_bytes() raw_data_by_entry[chunk.index] = raw_data rows = derive_class_event_rows(chunk, raw_data) rows_by_entry[chunk.index] = rows all_rows.extend(rows) return all_rows, rows_by_entry, raw_data_by_entry def build_repeated_template_status_map( parsed_class_chunks: list[ExtractedChunk], rows_by_entry: dict[int, list[ClassEventRow]], raw_data_by_entry: dict[int, bytes], ) -> dict[tuple[int, int], str]: status_by_row: dict[tuple[int, int], str] = {} chunk_by_label = { chunk.primary_label: chunk for chunk in parsed_class_chunks if chunk.primary_label } for family_name, labels in VERIFIED_REPEATED_TEMPLATE_FAMILIES: family_chunks = [chunk_by_label[label] for label in labels if label in chunk_by_label] if len(family_chunks) < 2: continue rows_by_slot: dict[int, list[tuple[ExtractedChunk, ClassEventRow, bytes]]] = {} for chunk in family_chunks: raw_data = raw_data_by_entry.get(chunk.index) if raw_data is None: continue for row in rows_by_entry.get(chunk.index, []): if row.raw_code_offset == 0: continue if row.derived_body_start is None or row.derived_body_end is None: continue body = raw_data[row.derived_body_start:row.derived_body_end] rows_by_slot.setdefault(row.slot, []).append((chunk, row, body)) for slot, slot_rows in rows_by_slot.items(): if len(slot_rows) < 2: continue lengths = {len(body) for _, _, body in slot_rows} bodies = {body for _, _, body in slot_rows} if len(bodies) == 1: status_suffix = "exact-body-clone" elif len(lengths) == 1: status_suffix = "same-length-template" else: status_suffix = "shared-slot-template" status = f"{family_name}/shared-slot-0x{slot:02X}/{status_suffix}" for chunk, row, _ in slot_rows: status_by_row[(chunk.index, row.slot)] = status return status_by_row def format_optional_hex(value: int | None, width: int = 0) -> str: if value is None: return "" if width > 0: return f"0x{value:0{width}X}" return f"0x{value:X}" def hex_edge(data: bytes, width: int = 8) -> str: if not data: return "" return data[:width].hex() def hex_tail(data: bytes, width: int = 8) -> str: if not data: return "" return data[-width:].hex() def find_all_offsets(haystack: bytes, needle: bytes) -> list[int]: offsets: list[int] = [] start = 0 while True: found = haystack.find(needle, start) if found < 0: return offsets offsets.append(found) start = found + 1 def common_prefix_len(left: bytes, right: bytes) -> int: limit = min(len(left), len(right)) offset = 0 while offset < limit and left[offset] == right[offset]: offset += 1 return offset def common_suffix_len(left: bytes, right: bytes) -> int: limit = min(len(left), len(right)) offset = 0 while offset < limit and left[-1 - offset] == right[-1 - offset]: offset += 1 return offset def write_family_decompile_artifact( out_dir: pathlib.Path, parsed_class_chunks: list[ExtractedChunk], rows_by_entry: dict[int, list[ClassEventRow]], raw_data_by_entry: dict[int, bytes], repeated_status_by_row: dict[tuple[int, int], str], spec: FamilyArtifactSpec, ) -> None: family_labels = set(spec.labels) family_chunks = [chunk for chunk in parsed_class_chunks if chunk.primary_label in family_labels] if not family_chunks: return family_chunks.sort(key=lambda chunk: chunk.primary_label or "") tsv_lines = [ "entry_index\tclass_id\tclass_name\tslot\tevent_name_hint\traw_event_entry_word\traw_code_offset\tderived_body_start\tderived_body_end\tderived_body_length\trepeated_template_status\tbody_sha1\tbody_prefix_hex\tbody_suffix_hex" ] md_lines = [ f"# {spec.title}", "", "This is a reversible per-class rendering derived directly from `class_event_index.tsv` plus the raw extracted chunk bytes.", "ScummVM event labels remain hints only; the authoritative data here is the slot id, raw row bytes, and derived body window.", "", ] for chunk in family_chunks: rows = [row for row in rows_by_entry.get(chunk.index, []) if row.raw_code_offset != 0] if not rows: continue raw_data = raw_data_by_entry[chunk.index] md_lines.extend([ f"## {chunk.primary_label}", "", "```yaml", "class:", f" entry_index: 0x{chunk.index:03X}", f" class_id: 0x{chunk.class_id:X}", f" class_name: {chunk.primary_label}", f" class_object_index: 0x{chunk.object_index:X}", f" raw_code_base_u32: 0x{chunk.raw_code_base_u32:X}", f" code_base_minus_one: 0x{chunk.code_base_minus_one:X}", f" conservative_event_count: {chunk.conservative_event_count}", " events:", ]) for row in rows: body = b"" if row.derived_body_start is not None and row.derived_body_end is not None: body = raw_data[row.derived_body_start:row.derived_body_end] repeated_status = repeated_status_by_row.get((row.entry_index, row.slot), "") body_sha1 = hashlib.sha1(body).hexdigest() if body else "" md_lines.extend([ f" - slot: 0x{row.slot:02x}", f" event_name_hint: {row.event_name_hint or ''}", f" raw_event_entry_word: 0x{row.raw_event_entry_word:04x}", f" raw_code_offset: 0x{row.raw_code_offset:08x}", f" derived_body_start: {format_optional_hex(row.derived_body_start, 4).lower() or 'null'}", f" derived_body_end: {format_optional_hex(row.derived_body_end, 4).lower() or 'null'}", f" derived_body_length: {row.derived_body_length if row.derived_body_length is not None else 'null'}", f" repeated_template_status: {repeated_status or 'unique-or-unclassified'}", f" body_sha1: {body_sha1 or 'null'}", f" body_prefix_hex: {hex_edge(body) or 'null'}", f" body_suffix_hex: {hex_tail(body) or 'null'}", ]) tsv_lines.append( "{entry_index}\t0x{class_id:X}\t{class_name}\t0x{slot:02X}\t{event_name_hint}\t0x{raw_event_entry_word:04X}\t0x{raw_code_offset:08X}\t{derived_body_start}\t{derived_body_end}\t{derived_body_length}\t{repeated_template_status}\t{body_sha1}\t{body_prefix_hex}\t{body_suffix_hex}".format( entry_index=row.entry_index, class_id=row.class_id, class_name=chunk.primary_label or "", slot=row.slot, event_name_hint=row.event_name_hint or "", raw_event_entry_word=row.raw_event_entry_word, raw_code_offset=row.raw_code_offset, derived_body_start=format_optional_hex(row.derived_body_start, 4), derived_body_end=format_optional_hex(row.derived_body_end, 4), derived_body_length=(row.derived_body_length if row.derived_body_length is not None else ""), repeated_template_status=repeated_status, body_sha1=body_sha1, body_prefix_hex=hex_edge(body), body_suffix_hex=hex_tail(body), ) ) md_lines.extend([ "```", "", ]) (out_dir / f"{spec.output_stem}.md").write_text("\n".join(md_lines), encoding="utf-8") (out_dir / f"{spec.output_stem}.tsv").write_text("\n".join(tsv_lines) + "\n", encoding="utf-8") def validate_verified_repeated_family_regressions( parsed_class_chunks: list[ExtractedChunk], rows_by_entry: dict[int, list[ClassEventRow]], repeated_status_by_row: dict[tuple[int, int], str], validation_profile: str, ) -> list[str]: report_lines = [ "record_type\tclass_name\tslot\texpected\tactual\tstatus" ] expectations = VERIFIED_REPEATED_FAMILY_ROW_EXPECTATIONS_BY_PROFILE.get(validation_profile) if not expectations: report_lines.append( f"meta\t*\t*\tprofile-baseline\t{validation_profile}\tskipped" ) return report_lines chunk_by_label: dict[str, ExtractedChunk] = {} for chunk in parsed_class_chunks: if chunk.primary_label: chunk_by_label.setdefault(chunk.primary_label, chunk) if chunk.class_name_hint: chunk_by_label.setdefault(chunk.class_name_hint, chunk) expected_slots_by_class: dict[str, set[int]] = {} for expectation in expectations: expected_slots_by_class.setdefault(expectation.class_name, set()).add(expectation.slot) errors: list[str] = [] for class_name, expected_slots in sorted(expected_slots_by_class.items()): chunk = chunk_by_label.get(class_name) actual_slots: set[int] = set() if chunk is not None: actual_slots = { row.slot for row in rows_by_entry.get(chunk.index, []) if row.raw_code_offset != 0 } status = "ok" if actual_slots == expected_slots else "mismatch" report_lines.append( "slot-set\t{class_name}\t*\t{expected}\t{actual}\t{status}".format( class_name=class_name, expected=",".join(f"0x{slot:02X}" for slot in sorted(expected_slots)), actual=",".join(f"0x{slot:02X}" for slot in sorted(actual_slots)), status=status, ) ) if status != "ok": errors.append( f"{class_name}: expected non-zero slots {sorted(expected_slots)}, found {sorted(actual_slots)}" ) for expectation in expectations: chunk = chunk_by_label.get(expectation.class_name) if chunk is None: errors.append(f"missing repeated-family class {expectation.class_name}") report_lines.append( f"row\t{expectation.class_name}\t0x{expectation.slot:02X}\tpresent\tmissing-class\tmismatch" ) continue row = next( (candidate for candidate in rows_by_entry.get(chunk.index, []) if candidate.slot == expectation.slot), None, ) if row is None: errors.append(f"missing row {expectation.class_name} slot 0x{expectation.slot:02X}") report_lines.append( f"row\t{expectation.class_name}\t0x{expectation.slot:02X}\tpresent\tmissing-row\tmismatch" ) continue actual_values = ( row.raw_event_entry_word, row.raw_code_offset, row.derived_body_start, row.derived_body_end, row.derived_body_length, repeated_status_by_row.get((row.entry_index, row.slot), ""), ) expected_values = ( expectation.raw_event_entry_word, expectation.raw_code_offset, expectation.derived_body_start, expectation.derived_body_end, expectation.derived_body_length, expectation.repeated_template_status, ) status = "ok" if actual_values == expected_values else "mismatch" report_lines.append( "row\t{class_name}\t0x{slot:02X}\t{expected}\t{actual}\t{status}".format( class_name=expectation.class_name, slot=expectation.slot, expected="|".join( [ f"0x{expectation.raw_event_entry_word:04X}", f"0x{expectation.raw_code_offset:08X}", f"0x{expectation.derived_body_start:04X}", f"0x{expectation.derived_body_end:04X}", str(expectation.derived_body_length), expectation.repeated_template_status, ] ), actual="|".join( [ f"0x{row.raw_event_entry_word:04X}", f"0x{row.raw_code_offset:08X}", format_optional_hex(row.derived_body_start, 4), format_optional_hex(row.derived_body_end, 4), str(row.derived_body_length if row.derived_body_length is not None else ""), repeated_status_by_row.get((row.entry_index, row.slot), ""), ] ), status=status, ) ) if status != "ok": errors.append( "{class_name} slot 0x{slot:02X}: expected {expected}, found {actual}".format( class_name=expectation.class_name, slot=expectation.slot, expected=expected_values, actual=actual_values, ) ) if errors: raise ValueError( "repeated-family regression mismatch:\n- " + "\n- ".join(errors) ) return report_lines def write_immortality_target_body_scan( out_dir: pathlib.Path, parsed_class_chunks: list[ExtractedChunk], rows_by_entry: dict[int, list[ClassEventRow]], raw_data_by_entry: dict[int, bytes], ) -> None: chunk_by_label = { chunk.primary_label: chunk for chunk in parsed_class_chunks if chunk.primary_label } scan_patterns = ( ("le16_0410", struct.pack(" tuple[str, int] | None: end = start limit = min(len(data), start + max_len) while end < limit and data[end] != 0: byte = data[end] if not (0x20 <= byte <= 0x7E): return None end += 1 if end >= len(data) or end == start or data[end] != 0: return None return data[start:end].decode("latin-1"), end + 1 def parse_body_open_header(body: bytes) -> dict[str, object] | None: if len(body) < 7: return None if body[0] == 0x5A and body[2] == 0x5C: open_arg = body[1] target_offset = 3 label_offset = 5 elif body[1] == 0x5C: open_arg = body[0] target_offset = 1 label_offset = 3 else: return None label_result = read_ascii_cstring(body, label_offset) if label_result is None: return None label, offset = label_result while offset < len(body) and body[offset] == 0: offset += 1 event_code = body[offset + 1] if offset + 1 < len(body) and body[offset] == 0x0B else None return { "open_arg": open_arg, "target": read_u16_le(body, target_offset), "label": label, "event_code": event_code, } def find_labeled_subheaders(body: bytes, label: str) -> list[tuple[int, int]]: offsets: list[tuple[int, int]] = [] label_bytes = label.encode("latin-1") marker = b"\x53\x5c" search_start = 0 while True: found = body.find(marker, search_start) if found < 0 or found + 4 >= len(body): return offsets if body[found + 4:found + 4 + len(label_bytes)] == label_bytes: offsets.append((found, read_u16_le(body, found + 2))) search_start = found + 1 def scan_body_field_tokens(body: bytes, tail_window: int | None = None) -> list[str]: tokens: list[str] = [] seen: set[str] = set() start = max(0, len(body) - tail_window) if tail_window is not None else 0 for offset in range(start, len(body) - 4): if body[offset] not in {0x24, 0x69}: continue field_result = read_ascii_cstring(body, offset + 3) if field_result is None: continue field_name, _ = field_result token = f"{body[offset]:02X}:{read_u16_le(body, offset + 1):04X}->{field_name}" if token not in seen: seen.add(token) tokens.append(token) return tokens def format_offset_list(offsets: list[int], limit: int = 10) -> str: if not offsets: return "" rendered = ",".join(f"0x{offset:04X}" for offset in offsets[:limit]) if len(offsets) > limit: rendered += ",..." return rendered def scan_body_field_offsets(body: bytes, tail_window: int | None = None) -> list[tuple[int, str]]: tokens: list[tuple[int, str]] = [] seen: set[tuple[int, str]] = set() start = max(0, len(body) - tail_window) if tail_window is not None else 0 for offset in range(start, len(body) - 4): if body[offset] not in {0x24, 0x69}: continue field_result = read_ascii_cstring(body, offset + 3) if field_result is None: continue field_name, _ = field_result token = f"{body[offset]:02X}:{read_u16_le(body, offset + 1):04X}->{field_name}" entry = (offset, token) if entry in seen: continue seen.add(entry) tokens.append(entry) return tokens def count_offsets_in_range(offsets: list[int], start: int, end: int) -> int: return sum(1 for offset in offsets if start <= offset < end) def relative_offsets_in_range(offsets: list[int], start: int, end: int) -> list[int]: return [offset - start for offset in offsets if start <= offset < end] def format_relative_offsets(offsets: list[int], limit: int = 8) -> str: if not offsets: return "-" rendered = ",".join(f"+0x{offset:02X}" for offset in offsets[:limit]) if len(offsets) > limit: rendered += ",..." return rendered def find_repeated_windows(body: bytes, size: int, min_count: int = 2, max_results: int = 6) -> list[tuple[bytes, list[int]]]: if size <= 0 or len(body) < size: return [] offsets_by_window: dict[bytes, list[int]] = {} for offset in range(0, len(body) - size + 1): window = body[offset:offset + size] if window.count(0) == len(window): continue offsets_by_window.setdefault(window, []).append(offset) repeated = [ (window, offsets) for window, offsets in offsets_by_window.items() if len(offsets) >= min_count ] repeated.sort(key=lambda item: (-len(item[1]), item[1][0], item[0])) return repeated[:max_results] def format_hex_window(window: bytes) -> str: return " ".join(f"{byte:02X}" for byte in window) def build_npctrig_clause_segments( body: bytes, subheaders: list[tuple[int, int]], ) -> tuple[list[tuple[str, int, int]], int]: first_subheader = subheaders[0][0] if subheaders else 0 tail_fields = scan_body_field_offsets(body, tail_window=min(len(body), 192)) tail_start = tail_fields[0][0] if tail_fields else len(body) if tail_start <= first_subheader: tail_start = len(body) segments: list[tuple[str, int, int]] = [] if first_subheader > 0: segments.append(("prefix", 0, first_subheader)) for index, (start, _) in enumerate(subheaders): next_start = subheaders[index + 1][0] if index + 1 < len(subheaders) else tail_start segments.append((f"clause_{index + 1}", start, next_start)) if tail_start < len(body): segments.append(("tail", tail_start, len(body))) return segments, tail_start def write_npctrig_clause_report( out_dir: pathlib.Path, parsed_class_chunks: list[ExtractedChunk], rows_by_entry: dict[int, list[ClassEventRow]], raw_data_by_entry: dict[int, bytes], ) -> None: chunk = next((candidate for candidate in parsed_class_chunks if candidate.primary_label == "NPCTRIG"), None) if chunk is None: return raw_data = raw_data_by_entry.get(chunk.index) if raw_data is None: return records: list[dict[str, object]] = [] clause_motif_names = ["subheader_53_5c", "branch_3f_0a", "writeback_57_02", "push_24_51", "field_4b_fe_0f"] for row in rows_by_entry.get(chunk.index, []): if row.raw_code_offset == 0 or row.derived_body_start is None or row.derived_body_end is None: continue body = raw_data[row.derived_body_start:row.derived_body_end] header = parse_body_open_header(body) subheaders = find_labeled_subheaders(body, "NPCTRIG") segments, tail_start = build_npctrig_clause_segments(body, subheaders) motif_hits = { motif_name: find_all_offsets(body, motif_bytes) for motif_name, motif_bytes in IMMORTALITY_BODY_MOTIFS } repeated_windows_8 = find_repeated_windows(body, 8) repeated_windows_6 = find_repeated_windows(body, 6) tail_fields = scan_body_field_offsets(body, tail_window=min(len(body), 192)) segment_rows: list[dict[str, object]] = [] for segment_name, start, end in segments: segment_body = body[start:end] labels = [offset for offset in find_all_offsets(segment_body, bytes.fromhex("5B"))] motif_offsets = { motif_name: relative_offsets_in_range(motif_hits[motif_name], start, end) for motif_name in clause_motif_names } segment_rows.append( { "segment": segment_name, "start": start, "end": end, "length": end - start, "prefix_hex": hex_edge(segment_body, width=16), "suffix_hex": hex_tail(segment_body, width=12), "local_labels": [start + offset for offset in labels[:8]], "motif_counts": { motif_name: count_offsets_in_range(motif_hits[motif_name], start, end) for motif_name in clause_motif_names }, "motif_offsets": motif_offsets, } ) subheader_offset_deltas = [subheaders[index + 1][0] - subheaders[index][0] for index in range(len(subheaders) - 1)] subheader_target_deltas = [subheaders[index + 1][1] - subheaders[index][1] for index in range(len(subheaders) - 1)] uniform_stride = subheader_offset_deltas[0] if subheader_offset_deltas and len(set(subheader_offset_deltas)) == 1 else None full_clause_segments = [ segment for segment in segment_rows if segment["segment"].startswith("clause_") and segment["motif_counts"]["push_24_51"] and segment["motif_counts"]["writeback_57_02"] ] records.append( { "slot": row.slot, "event_name_hint": row.event_name_hint or "", "body_length": len(body), "header": header, "subheaders": subheaders, "subheader_offset_deltas": subheader_offset_deltas, "subheader_target_deltas": subheader_target_deltas, "segments": segment_rows, "tail_start": tail_start, "tail_fields": tail_fields, "repeated_windows_8": repeated_windows_8, "repeated_windows_6": repeated_windows_6, "has_writeback": bool(motif_hits["writeback_57_02"]), "has_push_2451": bool(motif_hits["push_24_51"]), "field_4b_fe_0f_count": len(motif_hits["field_4b_fe_0f"]), "uniform_stride": uniform_stride, "full_clause_count": len(full_clause_segments), "selector_offsets": [offset for offset, _ in subheaders], "selector_targets": [target for _, target in subheaders], } ) if not records: return tsv_lines = [ "slot\tevent_name_hint\tbody_length\theader_target\theader_event_code\tsubheader_offsets\tsubheader_targets\tsubheader_offset_deltas\tsubheader_target_deltas\tuniform_stride\tfull_clause_count\ttail_start\thas_writeback\thas_push_2451\tfield_4b_fe_0f_count\trepeated_windows_8\trepeated_windows_6" ] for record in records: header = record["header"] or {} tsv_lines.append( "0x{slot:02X}\t{event_name_hint}\t{body_length}\t{header_target}\t{header_event_code}\t{subheader_offsets}\t{subheader_targets}\t{subheader_offset_deltas}\t{subheader_target_deltas}\t{uniform_stride}\t{full_clause_count}\t0x{tail_start:04X}\t{has_writeback}\t{has_push_2451}\t{field_4b_fe_0f_count}\t{repeated_windows_8}\t{repeated_windows_6}".format( slot=record["slot"], event_name_hint=record["event_name_hint"], body_length=record["body_length"], header_target=(f"0x{header['target']:04X}" if header else ""), header_event_code=(f"0x{header['event_code']:02X}" if header and header.get("event_code") is not None else ""), subheader_offsets=",".join(f"0x{offset:04X}" for offset, _ in record["subheaders"]), subheader_targets=",".join(f"0x{target:04X}" for _, target in record["subheaders"]), subheader_offset_deltas=",".join(f"0x{delta:02X}" for delta in record["subheader_offset_deltas"]), subheader_target_deltas=",".join(f"0x{delta & 0xFFFF:04X}" for delta in record["subheader_target_deltas"]), uniform_stride=(f"0x{record['uniform_stride']:02X}" if record["uniform_stride"] is not None else ""), full_clause_count=record["full_clause_count"], tail_start=record["tail_start"], has_writeback="yes" if record["has_writeback"] else "no", has_push_2451="yes" if record["has_push_2451"] else "no", field_4b_fe_0f_count=record["field_4b_fe_0f_count"], repeated_windows_8=";".join( f"{window.hex()}@{','.join(f'0x{offset:04X}' for offset in offsets)}" for window, offsets in record["repeated_windows_8"] ), repeated_windows_6=";".join( f"{window.hex()}@{','.join(f'0x{offset:04X}' for offset in offsets)}" for window, offsets in record["repeated_windows_6"] ), ) ) (out_dir / "immortality_npctrig_clauses.tsv").write_text("\n".join(tsv_lines) + "\n", encoding="utf-8") md_lines = [ "# Immortality NPCTRIG Clauses", "", "This report focuses on the surviving compact NPCTRIG frontier and splits the extracted slot bodies into prefix, clause, and tail regions.", "It is intended to make the slot `0x0A` versus slot `0x20` difference explicit enough to compare against the runtime-side slot-`0x0A` consumer path.", "", ] for record in records: header = record["header"] or {} md_lines.extend([ f"## NPCTRIG slot `0x{record['slot']:02X}`", "", f"- Event hint: `{record['event_name_hint'] or '-'}`.", f"- Open header: `0x5A 0x{header['open_arg']:02X} 0x5C 0x{header['target']:04X}` -> `NPCTRIG` with event-code byte `{f'0x{header['event_code']:02X}' if header.get('event_code') is not None else '-'}`." if header else "- Open header: not recognized.", f"- First tail-field offset: `0x{record['tail_start']:04X}`.", f"- Subheader offsets: {', '.join(f'`0x{offset:04X}`' for offset, _ in record['subheaders']) or '`-`'}.", f"- Subheader targets: {', '.join(f'`0x{target:04X}`' for _, target in record['subheaders']) or '`-`'}.", f"- Subheader offset deltas: {', '.join(f'`0x{delta:02X}`' for delta in record['subheader_offset_deltas']) or '`-`'}.", f"- Subheader target deltas: {', '.join(f'`0x{delta & 0xFFFF:04X}`' for delta in record['subheader_target_deltas']) or '`-`'}.", f"- Runtime-shape motifs: `writeback_57_02={'yes' if record['has_writeback'] else 'no'}`, `push_24_51={'yes' if record['has_push_2451'] else 'no'}`, `field_4b_fe_0f={record['field_4b_fe_0f_count']}`.", "", "| Segment | Range | Len | Local Labels | Subheaders | Branch 3F 0A | Writeback 57 02 | Push 24 51 | Field 4B FE 0F | Motif Offsets | Prefix | Suffix |", "|---|---|---:|---|---:|---:|---:|---:|---:|---|---|---|", ]) for segment in record["segments"]: motif_counts = segment["motif_counts"] motif_offsets = segment["motif_offsets"] motif_offset_render = "; ".join( f"{motif_name}={format_relative_offsets(offsets)}" for motif_name, offsets in motif_offsets.items() if offsets ) or "-" label_render = ",".join(f"0x{offset:04X}" for offset in segment["local_labels"]) or "-" md_lines.append( "| {segment} | `0x{start:04X}..0x{end:04X}` | {length} | `{labels}` | {subheaders} | {branch} | {writeback} | {push_2451} | {field_4b_fe_0f} | `{motif_offsets}` | `{prefix}` | `{suffix}` |".format( segment=segment["segment"], start=segment["start"], end=segment["end"], length=segment["length"], labels=label_render, subheaders=motif_counts["subheader_53_5c"], branch=motif_counts["branch_3f_0a"], writeback=motif_counts["writeback_57_02"], push_2451=motif_counts["push_24_51"], field_4b_fe_0f=motif_counts["field_4b_fe_0f"], motif_offsets=motif_offset_render, prefix=segment["prefix_hex"], suffix=segment["suffix_hex"], ) ) md_lines.extend([ "", "Repeated windows (8-byte):", "", ]) for window, offsets in record["repeated_windows_8"]: md_lines.append( f"- `{format_hex_window(window)}` at {', '.join(f'`0x{offset:04X}`' for offset in offsets)}" ) md_lines.extend([ "", "Repeated windows (6-byte):", "", ]) for window, offsets in record["repeated_windows_6"]: md_lines.append( f"- `{format_hex_window(window)}` at {', '.join(f'`0x{offset:04X}`' for offset in offsets)}" ) md_lines.extend([ "", "Runtime-fit candidates:", "", f"- Candidate clause selector starts: {', '.join(f'`0x{offset:04X}`' for offset in record['selector_offsets']) or '`-`'}.", f"- Candidate clause selector targets: {', '.join(f'`0x{target:04X}`' for target in record['selector_targets']) or '`-`'}.", f"- Uniform selector stride: `{f'0x{record['uniform_stride']:02X}' if record['uniform_stride'] is not None else '-'}`; full clauses carrying both `push_24_51` and `writeback_57_02`: `{record['full_clause_count']}`.", "- Runtime side anchor: `000d:5572` proves the wrapper extra word is additive (`entity_vm_slot_load_value(...) + offset`), while `000d:21ed -> 000d:2433` copies one inline blob, reads two signed metadata bytes, then consumes a word matrix where byte A controls the lead-word row count and byte B controls the shared target-list width.", "", "Tail field offsets:", "", ]) for offset, token in record["tail_fields"]: md_lines.append(f"- `0x{offset:04X}` -> `{token}`") md_lines.append("") slot_0a = next((record for record in records if record["slot"] == 0x0A), None) slot_20 = next((record for record in records if record["slot"] == 0x20), None) if slot_0a and slot_20: slot_0a_header = slot_0a["header"] or {} slot_20_header = slot_20["header"] or {} md_lines.extend([ "## Current Read", "", f"- Slot `0x0A` now reads as a repeated clause ladder, not a monolithic blob: `{len(slot_0a['subheaders'])}` subheaders sit on a uniform `{', '.join(f'0x{delta:02X}' for delta in slot_0a['subheader_offset_deltas']) or '-'}` byte stride, and their targets walk backward by `{', '.join(f'0x{delta & 0xFFFF:04X}' for delta in slot_0a['subheader_target_deltas']) or '-'}`. Each clause block carries one `branch_3f_0a`, one `push_24_51`, and one `writeback_57_02`, which fits an event-bearing clause stream better than a pure type filter.", f"- Slot `0x20` is structurally different even before the tail fields: its open event-code byte is `{f'0x{slot_20_header['event_code']:02X}' if slot_20_header.get('event_code') is not None else '-'}` instead of `{f'0x{slot_0a_header['event_code']:02X}' if slot_0a_header.get('event_code') is not None else '-'}`, it has only one class-labelled subheader, no `writeback_57_02`, no `push_24_51`, and `{slot_20['field_4b_fe_0f_count']}` `field_4b_fe_0f` hits concentrated around repeated `0x0A 00/05 4B FE 0F ...` windows. That is a materially better fit for a typed gate or setup/attachment body than for the live event-emission ladder.", "- This split matches the current runtime-side bridge better than the previous undifferentiated frontier. The verified slot-`0x0A` wrapper `0005:2c35` seeds mask `0x0400`, slot `0x0A`, and one additive word that `000d:5572` applies directly to the loaded slot value before `000d:21ed` consumes the result. The exact `000d:21ed -> 000d:22bc` contract is now narrower too: after copying the inline blob it reads two signed bytes, uses byte A as the lead-word row count, uses byte B as the shared target-list width, performs `A x B` `entity_link` calls, and pushes back only non-`0x0400` words. `NPCTRIG slot 0x0A` is the only surviving compact body here with a natural five-row selector family (`5` evenly spaced clause starts at stride `0x2F`), while slot `0x20` offers only one clause and no matching writeback/push motif.", ]) (out_dir / "immortality_npctrig_clauses.md").write_text("\n".join(md_lines) + "\n", encoding="utf-8") def write_immortality_body_structure_report( out_dir: pathlib.Path, parsed_class_chunks: list[ExtractedChunk], rows_by_entry: dict[int, list[ClassEventRow]], raw_data_by_entry: dict[int, bytes], ) -> None: chunk_by_label = { chunk.primary_label: chunk for chunk in parsed_class_chunks if chunk.primary_label in IMMORTALITY_STRUCTURAL_TARGET_LABELS } records: list[dict[str, object]] = [] for label in sorted(IMMORTALITY_STRUCTURAL_TARGET_LABELS): chunk = chunk_by_label.get(label) if chunk is None: continue raw_data = raw_data_by_entry.get(chunk.index) if raw_data is None: continue for row in rows_by_entry.get(chunk.index, []): if row.raw_code_offset == 0 or row.derived_body_start is None or row.derived_body_end is None: continue body = raw_data[row.derived_body_start:row.derived_body_end] header = parse_body_open_header(body) subheaders = find_labeled_subheaders(body, label) motif_hits = { motif_name: find_all_offsets(body, motif_bytes) for motif_name, motif_bytes in IMMORTALITY_BODY_MOTIFS } records.append( { "entry_index": row.entry_index, "class_name": label, "slot": row.slot, "event_name_hint": row.event_name_hint or "", "body_length": len(body), "header": header, "clause_terminators": body.count(0x7A), "local_labels": body.count(0x5B), "subheaders": subheaders, "tail_fields": scan_body_field_tokens(body, tail_window=256), "all_fields": scan_body_field_tokens(body), "motif_hits": motif_hits, } ) tsv_lines = [ "entry_index\tclass_name\tslot\tevent_name_hint\tbody_length\theader_open_arg\theader_target\theader_label\theader_event_code\tclause_terminator_count\tlocal_label_count\tsubheader_count\tsubheader_targets\ttail_fields\tall_fields\tmotif_counts\tmotif_offsets" ] for record in records: header = record["header"] or {} motif_hits = record["motif_hits"] tsv_lines.append( "{entry_index}\t{class_name}\t0x{slot:02X}\t{event_name_hint}\t{body_length}\t{header_open_arg}\t{header_target}\t{header_label}\t{header_event_code}\t{clause_terminators}\t{local_labels}\t{subheader_count}\t{subheader_targets}\t{tail_fields}\t{all_fields}\t{motif_counts}\t{motif_offsets}".format( entry_index=record["entry_index"], class_name=record["class_name"], slot=record["slot"], event_name_hint=record["event_name_hint"], body_length=record["body_length"], header_open_arg=(f"0x{header['open_arg']:02X}" if header else ""), header_target=(f"0x{header['target']:04X}" if header else ""), header_label=(header.get("label", "") if header else ""), header_event_code=(f"0x{header['event_code']:02X}" if header and header.get("event_code") is not None else ""), clause_terminators=record["clause_terminators"], local_labels=record["local_labels"], subheader_count=len(record["subheaders"]), subheader_targets=",".join( f"0x{offset:04X}->0x{target:04X}" for offset, target in record["subheaders"] ), tail_fields=",".join(record["tail_fields"]), all_fields=",".join(record["all_fields"]), motif_counts=",".join( f"{motif_name}:{len(motif_hits[motif_name])}" for motif_name, _ in IMMORTALITY_BODY_MOTIFS ), motif_offsets=",".join( f"{motif_name}={format_offset_list(motif_hits[motif_name])}" for motif_name, _ in IMMORTALITY_BODY_MOTIFS if motif_hits[motif_name] ), ) ) (out_dir / "immortality_body_structure.tsv").write_text("\n".join(tsv_lines) + "\n", encoding="utf-8") md_lines = [ "# Immortality Body Structure", "", "This report decodes one layer deeper than the literal scan for the surviving EVENT and NPCTRIG frontier.", "It is still heuristic: the output is limited to repeatable byte grammar, subheader boundaries, field-tag trailers, and motif offsets that can be cross-checked against the 000d slot-backed runtime lane.", "", ] for record in records: header = record["header"] or {} motif_hits = record["motif_hits"] md_lines.extend([ f"## {record['class_name']} slot `0x{record['slot']:02X}`", "", f"- Body length: `{record['body_length']}` bytes.", f"- Open header: `0x5A 0x{header['open_arg']:02X} 0x5C 0x{header['target']:04X}` -> `{header.get('label', '')}` with embedded event-code byte `{f'0x{header['event_code']:02X}' if header.get('event_code') is not None else '-'}`." if header else "- Open header: not recognized by the current heuristic.", f"- Clause terminators (`0x7A`): `{record['clause_terminators']}`; local labels (`0x5B`): `{record['local_labels']}`.", f"- Internal labeled subheaders (`0x53 0x5C {record['class_name']}`): `{len(record['subheaders'])}` -> {', '.join(f'`0x{offset:04X}->0x{target:04X}`' for offset, target in record['subheaders'][:12]) or '`-`'}." , f"- Tail field tags: {', '.join(f'`{value}`' for value in record['tail_fields']) or '`-`' }.", "", "| Motif | Count | First Offsets |", "|---|---:|---|", ]) for motif_name, _ in IMMORTALITY_BODY_MOTIFS: offsets = motif_hits[motif_name] md_lines.append( f"| `{motif_name}` | {len(offsets)} | `{format_offset_list(offsets) or '-'}` |" ) md_lines.append("") event_slot_0a = next((record for record in records if record["class_name"] == "EVENT" and record["slot"] == 0x0A), None) npctrig_slot_0a = next((record for record in records if record["class_name"] == "NPCTRIG" and record["slot"] == 0x0A), None) npctrig_slot_20 = next((record for record in records if record["class_name"] == "NPCTRIG" and record["slot"] == 0x20), None) if event_slot_0a and npctrig_slot_0a and npctrig_slot_20: npctrig_slot_0a_header = npctrig_slot_0a.get("header") or {} npctrig_slot_20_header = npctrig_slot_20.get("header") or {} md_lines.extend([ "## Current Read", "", f"- `EVENT 0x0A` is the generic hub-shaped body: it has `{len(event_slot_0a['subheaders'])}` internal labeled subheaders and the widest field trailer (`{', '.join(event_slot_0a['tail_fields'])}`).", f"- `NPCTRIG 0x0A` is the compact player-trigger candidate: it reuses the same class-labelled open header and subheader grammar, but it stays constrained to `{', '.join(npctrig_slot_0a['tail_fields'])}` instead of the wider EVENT field set.", f"- `NPCTRIG 0x20` keeps the same constrained field set as `NPCTRIG 0x0A` and changes only the embedded prolog event-code byte (`{f'0x{npctrig_slot_20_header['event_code']:02X}' if npctrig_slot_20_header.get('event_code') is not None else '-'}` vs `{f'0x{npctrig_slot_0a_header['event_code']:02X}' if npctrig_slot_0a_header.get('event_code') is not None else '-'}`), which fits a variant trigger/setup lane better than a separate generic hub.", "- The repeated `0x53 0x5C LABEL` subheaders and dense `0x5B ` local labels make these bodies look like inline clause streams rather than single flat payloads, which is consistent with the `000d:21ed -> 000d:22bc` runtime lane that copies variable-length inline bytes first and only then consumes compact metadata bytes plus streamed words.", "- The surviving slot focus is still `0x0A`: both EVENT and NPCTRIG expose non-zero slot-`0x0A` bodies, and the runtime side has an exact offset-specialized masked wrapper for slot `0x0A` at `0005:2c35` (`entity_vm_context_try_create_mask_0400_slot0a_with_offset`).", ]) (out_dir / "immortality_body_structure.md").write_text("\n".join(md_lines) + "\n", encoding="utf-8") def readable_neighbor_chunks( center: ExtractedChunk, chunk_by_index: dict[int, ExtractedChunk], total_chunks: int, window: int, ) -> list[ExtractedChunk]: neighbors: list[ExtractedChunk] = [] for neighbor_index in range(max(0, center.index - window), min(total_chunks, center.index + window + 1)): if neighbor_index == center.index: continue neighbor = chunk_by_index[neighbor_index] if not (event_tag_kind(neighbor) or has_referent_field(neighbor) or neighbor.primary_label == center.primary_label): continue neighbors.append(neighbor) return neighbors def unique_preserve_order(values: list[str]) -> list[str]: seen: set[str] = set() ordered: list[str] = [] for value in values: if not value or value in seen: continue seen.add(value) ordered.append(value) return ordered def section_runtime_ops(section_name: str) -> list[str]: if section_name == "Callback trigger lane": return [ "MATERIALIZE_OR_FORWARD_VALUE", "PUSH_FRAME_WORD_LITERAL", "COMPARE_STREAM_DWORD_AND_PUSH_BOOL", "FINALIZE_MIXED_VALUE_TO_OUTPTR", ] return [ "APPEND_UNIQUE_INLINE", "APPEND_UNIQUE_INDIRECT", "REMOVE_MATCHING_INDIRECT", "REMOVE_MATCHING_INLINE", "MATERIALIZE_OR_FORWARD_VALUE", "PREPEND_INLINE_PAYLOAD", "BUILD_ENTITY_LINK_MATRIX", "EMIT_OR_PUSHBACK_RESULT", "FINALIZE_MIXED_VALUE_TO_OUTPTR", ] def section_mask_pairs(section_name: str) -> list[str]: if section_name == "JELYHACK anchor lane": return ["indirect-only active-event-biased ladder; no direct anchor-specific mask proven"] if section_name == "Callback trigger lane": return ["no callback-specific mask pair proven; current ladder favors active event carriers"] return [ "0x0001:0000", "0x0002:0001", "0x0004:0002", "0x0010:0004", "0x0020:0005", "0x0200:0009", "0x0400:000a", "0x0800:000b", "0x1000:000c", "0x2000:0015", "0x4000:000e", "0x8000:0007", "0x8000:000f", ] def section_bridge_note(section_name: str) -> str: if section_name == "JELYHACK anchor lane": return "Referent-only anchors are now readable as payload owners, but the current mask ladder still correlates more strongly with active-event descriptors than with anchor-only rows." if section_name == "Callback trigger lane": return "Callback/eventTrigger descriptors are structurally distinct from the active event lane, so the runtime bridge is still generic slot-backed context flow rather than a callback-specific opcode family." if section_name == "EVENT hub lane": return "This is the strongest current descriptor-side bridge into the active event runtime lane: the neighborhood contains explicit event cores and matches the proven payload-chain plus link-matrix VM behavior." if section_name == "Environmental event lane": return "Environmental descriptors share the same active event field grammar, so they likely ride the same generic VM event lane even though no hazard-specific opcode split is proven yet." return "Descriptor-side and runtime-side evidence align only at the conservative family level." def write_runtime_bridge_reports( out_dir: pathlib.Path, descriptor_chunks: list[ExtractedChunk], chunk_by_index: dict[int, ExtractedChunk], total_chunks: int, ) -> None: vm_ir_lines = [ "stage_address\tir_name\topcode_or_lane\tpayload_shape\tevidence" ] for row in VERIFIED_VM_IR_ROWS: vm_ir_lines.append( "{stage_address}\t{ir_name}\t{opcode_or_lane}\t{payload_shape}\t{evidence}".format(**row) ) (out_dir / "runtime_vm_ir.tsv").write_text("\n".join(vm_ir_lines) + "\n", encoding="utf-8") mask_lines = [ "wrapper_address\tmask_pair\tcaller_anchor\tdescriptor_bias" ] for row in VERIFIED_MASK_LADDER_ROWS: mask_lines.append( "{wrapper_address}\t{mask_pair}\t{caller_anchor}\t{descriptor_bias}".format(**row) ) (out_dir / "vm_mask_ladder.tsv").write_text("\n".join(mask_lines) + "\n", encoding="utf-8") focus_sets = [ ("jelyhack_anchor_attachment", "JELYHACK anchor lane", {"JELYHACK", "JELYH2"}, 8), ("event_hub_cluster", "EVENT hub lane", {"EVENT", "COR_BOOT", "NPCTRIG"}, 5), ("environmental_event_cluster", "Environmental event lane", {"FLAMEBOX", "NOSTRIL", "STEAMBOX"}, 5), ("callback_trigger_cluster", "Callback trigger lane", {"SURCAMNS", "SURCAMEW"}, 5), ] script_tsv_lines = [ "template_id\tsection\tcenter_index\tcenter_label\tattach_labels\tcallback_labels\tneighbor_labels\tevent_families\truntime_ops\tmask_pairs\towner_source\tmirror_write\tselector_status\tbridge_note" ] script_md_lines = [ "# Readable Script IR", "", "This report joins descriptor neighborhoods to the verified 000d VM/runtime lane.", "It stays conservative: opcode-family and mask-family evidence is carried forward only where the binary proves it.", "", "## Verified Runtime Lane", "", "- Owner path: `entity_vm_runtime_init_from_path_if_configured -> entity_vm_runtime_create -> entity_vm_runtime_owner_resource_create`", "- Slot source: `(+0x10/+0x12) + 0x0d*slot + 4` inside the runtime owner/resource object", "- Context seed: `entity_vm_context_create_from_slot_index` copies that source into `+0xd6/+0xd8` and mirrors it to `0x39ca[slot]`", "- Selector status: `0x19/0x1a/0x1b` are proven inside `000d:0988`; `0x18` is still implied, and the upstream seed into `[BP-0x32]` remains unresolved", "", "## Verified VM IR Operators", "", "| Stage | IR | Opcode / Lane | Payload Shape |", "|---|---|---|---|", ] for row in VERIFIED_VM_IR_ROWS: script_md_lines.append( "| {stage_address} | {ir_name} | {opcode_or_lane} | {payload_shape} |".format(**row) ) script_md_lines.extend([ "", "## Verified Mask Ladder", "", "| Wrapper | Mask | Caller Anchor | Descriptor Bias |", "|---|---|---|---|", ]) for row in VERIFIED_MASK_LADDER_ROWS: script_md_lines.append( "| {wrapper_address} | {mask_pair} | {caller_anchor} | {descriptor_bias} |".format(**row) ) owner_source = ( "000d:44df -> 000d:4c99 -> 000d:7000 -> (+0x10/+0x12) + 0x0d*slot + 4" ) mirror_write = "entity_vm_context_create_from_slot_index writes the same source pair to 0x39ca[context_slot]" selector_status = "0x19/0x1a/0x1b proven in 000d:0988; 0x18 implied; upstream [BP-0x32] seed unresolved" for template_id, section_name, center_labels, window in focus_sets: centers = [chunk for chunk in descriptor_chunks if chunk.primary_label in center_labels] if not centers: continue script_md_lines.extend(["", f"## {section_name}", ""]) for center in centers: neighbors = readable_neighbor_chunks(center, chunk_by_index, total_chunks, window) attach_labels = unique_preserve_order([ clean_token(neighbor.primary_label or "") for neighbor in neighbors if event_tag_kind(neighbor) == "event" ]) callback_labels = unique_preserve_order([ clean_token(neighbor.primary_label or "") for neighbor in neighbors if event_tag_kind(neighbor) == "eventTrigger" ]) neighbor_labels = unique_preserve_order([ clean_token(neighbor.primary_label or "") for neighbor in neighbors if has_referent_field(neighbor) and event_tag_kind(neighbor) == "" ]) family_labels = unique_preserve_order([ classify_event_family(neighbor) for neighbor in neighbors ]) runtime_ops = section_runtime_ops(section_name) mask_pairs = section_mask_pairs(section_name) bridge_note = section_bridge_note(section_name) script_tsv_lines.append( "{template_id}\t{section}\t{center_index}\t{center_label}\t{attach_labels}\t{callback_labels}\t{neighbor_labels}\t{event_families}\t{runtime_ops}\t{mask_pairs}\t{owner_source}\t{mirror_write}\t{selector_status}\t{bridge_note}".format( template_id=template_id, section=section_name, center_index=center.index, center_label=clean_token(center.primary_label or ""), attach_labels=",".join(attach_labels), callback_labels=",".join(callback_labels), neighbor_labels=",".join(neighbor_labels), event_families=",".join(family_labels), runtime_ops=",".join(runtime_ops), mask_pairs=",".join(mask_pairs), owner_source=owner_source, mirror_write=mirror_write, selector_status=selector_status, bridge_note=bridge_note, ) ) script_md_lines.append(f"### {center.index}: {center.primary_label}") script_md_lines.append("") script_md_lines.append(f"Descriptor focus: `{readable_signature(center)}`") script_md_lines.append("") script_md_lines.append("Descriptor-side attachments:") script_md_lines.append(f"- Active event neighbors: {', '.join(attach_labels) or 'none proven in window'}") script_md_lines.append(f"- Callback neighbors: {', '.join(callback_labels) or 'none proven in window'}") script_md_lines.append(f"- Referent-side neighbors: {', '.join(neighbor_labels) or 'none proven in window'}") script_md_lines.append(f"- Event families present: {', '.join(family_labels) or 'none'}") script_md_lines.append("") script_md_lines.append("Runtime bridge:") script_md_lines.append(f"- Runtime ops: {', '.join(runtime_ops)}") script_md_lines.append(f"- Mask pairs: {', '.join(mask_pairs)}") script_md_lines.append(f"- Owner source: {owner_source}") script_md_lines.append(f"- Mirror write: {mirror_write}") script_md_lines.append(f"- Selector status: {selector_status}") script_md_lines.append(f"- Interpretation: {bridge_note}") script_md_lines.append("") script_md_lines.append("```text") script_md_lines.append(readable_signature(center)) for label in attach_labels: script_md_lines.append(f"attach {label}(...) # active event-bearing neighbor") for label in callback_labels: script_md_lines.append(f"callback {label}(...) # eventTrigger-bearing neighbor") for label in neighbor_labels: script_md_lines.append(f"near {label}(...) # referent-side local context") script_md_lines.append("") script_md_lines.append("vm_effect:") for runtime_op in runtime_ops: script_md_lines.append(f" {runtime_op}(...)" ) script_md_lines.append("```") script_md_lines.append("") (out_dir / "readable_script_ir.tsv").write_text("\n".join(script_tsv_lines) + "\n", encoding="utf-8") (out_dir / "readable_script_ir.md").write_text("\n".join(script_md_lines), encoding="utf-8") def chunk_bridge_family(chunk: ExtractedChunk) -> str: event_family = classify_event_family(chunk) if event_family: return event_family if chunk_role(chunk) == "referent-anchor": return "referent-anchor" return "" def family_runtime_ops(family: str) -> list[str]: if family == "callback-eventtrigger": return [ "MATERIALIZE_OR_FORWARD_VALUE", "PUSH_FRAME_WORD_LITERAL", "COMPARE_STREAM_DWORD_AND_PUSH_BOOL", "FINALIZE_MIXED_VALUE_TO_OUTPTR", ] if family == "referent-anchor": return [ "APPEND_UNIQUE_INLINE", "APPEND_UNIQUE_INDIRECT", "REMOVE_MATCHING_INDIRECT", "REMOVE_MATCHING_INLINE", "MATERIALIZE_OR_FORWARD_VALUE", ] return [ "APPEND_UNIQUE_INLINE", "APPEND_UNIQUE_INDIRECT", "REMOVE_MATCHING_INDIRECT", "REMOVE_MATCHING_INLINE", "MATERIALIZE_OR_FORWARD_VALUE", "PREPEND_INLINE_PAYLOAD", "BUILD_ENTITY_LINK_MATRIX", "EMIT_OR_PUSHBACK_RESULT", "FINALIZE_MIXED_VALUE_TO_OUTPTR", ] def family_mask_pairs(family: str) -> list[str]: if family == "referent-anchor": return ["anchor role uses referent registry and payload ownership; no anchor-specific slot mask proven"] if family == "callback-eventtrigger": return ["no callback-specific mask pair proven; verified ladder still favors active event carriers"] return [ "0x0001:0000", "0x0002:0001", "0x0004:0002", "0x0010:0004", "0x0020:0005", "0x0200:0009", "0x0400:000a", "0x0800:000b", "0x1000:000c", "0x2000:0015", "0x4000:000e", "0x8000:0007", "0x8000:000f", ] def family_bridge_metadata() -> tuple[dict[str, str], ...]: return ( { "lane_rank": "1", "primary_runtime_lane": "active-event payload lane", "descriptor_family": "event-hub", "fit_strength": "strongest", "confidence": "high", "why": "Explicit 69:0A00 event tag plus the richest source/dest/door/link/time/counter payload shape; best current match for the VM payload-chain plus link-matrix lane.", "exemplar": "EVENT", }, { "lane_rank": "2", "primary_runtime_lane": "active-event payload lane", "descriptor_family": "boot-event-core", "fit_strength": "strong", "confidence": "high", "why": "All five _BOOT descriptors share one compact referent,event,counter,item schema and sit beside referent-heavy object islands that fit the same active-event runtime lane.", "exemplar": "COR_BOOT", }, { "lane_rank": "3", "primary_runtime_lane": "active-event payload lane", "descriptor_family": "npc-trigger", "fit_strength": "strong", "confidence": "moderate-high", "why": "NPCTRIG carries an explicit event field and sits in the same compact event-bearing core as EVENT and COR_BOOT, but its narrower field set makes it look more satellite than hub.", "exemplar": "NPCTRIG", }, { "lane_rank": "4", "primary_runtime_lane": "active-event payload lane", "descriptor_family": "minimal-event-core", "fit_strength": "moderate", "confidence": "moderate", "why": "SFXTRIG keeps the active event tag while stripping most side fields, so it still fits the live event lane but as a smaller attachment form rather than a full hub or boot core.", "exemplar": "SFXTRIG", }, { "lane_rank": "5", "primary_runtime_lane": "active-event payload lane", "descriptor_family": "environmental-event", "fit_strength": "moderate", "confidence": "moderate", "why": "FLAMEBOX, NOSTRIL, and STEAMBOX share the same active event grammar, but no hazard-specific opcode or mask split is proven yet beyond the generic active-event-biased ladder.", "exemplar": "FLAMEBOX", }, { "lane_rank": "6", "primary_runtime_lane": "referent-anchor / payload-owner lane", "descriptor_family": "referent-anchor", "fit_strength": "strong", "confidence": "moderate-high", "why": "JELYHACK and JELYH2 are still referent-only, but the VM referent registry and payload-chain machinery now make that a live anchor role rather than inert metadata.", "exemplar": "JELYHACK", }, { "lane_rank": "7", "primary_runtime_lane": "callback / attachment lane", "descriptor_family": "callback-eventtrigger", "fit_strength": "weak-moderate", "confidence": "moderate", "why": "SURCAMNS and SURCAMEW are structurally coherent callback holders with eventTrigger tags, but the current mask ladder and opcode evidence still align more strongly with active event carriers than callback-specific dispatch.", "exemplar": "SURCAMNS", }, ) def family_chunk_map(descriptor_chunks: list[ExtractedChunk]) -> dict[str, list[ExtractedChunk]]: grouped: dict[str, list[ExtractedChunk]] = {} for chunk in descriptor_chunks: family = chunk_bridge_family(chunk) if not family: continue grouped.setdefault(family, []).append(chunk) return grouped def choose_family_exemplar(family: str, chunks: list[ExtractedChunk], preferred_label: str) -> ExtractedChunk | None: for chunk in chunks: if chunk.primary_label == preferred_label: return chunk return chunks[0] if chunks else None def family_script_block(exemplar: ExtractedChunk, family: str, labels: list[str]) -> list[str]: lines = [readable_signature(exemplar)] if family == "event-hub": lines.extend([ "owner_slot = runtime_owner_table[slot]", "chain = APPEND_UNIQUE_INLINE(...) or APPEND_UNIQUE_INDIRECT(...)", "chain = REMOVE_MATCHING_INDIRECT(...) or REMOVE_MATCHING_INLINE(...)", "payload = PREPEND_INLINE_PAYLOAD(...) when caller bytes are present", "links = BUILD_ENTITY_LINK_MATRIX(shape_a, shape_b, entity_ids)", "emit EVENT-style result through FINALIZE_MIXED_VALUE_TO_OUTPTR(...)", ]) elif family == "boot-event-core": lines.extend([ "anchor referent/event/counter/item into one compact event core", "materialize slot-backed value from runtime_owner_table[slot]", "mutate referent payload chain via opcode_0x18_to_0x1b family", "emit boot-style active event result", ]) elif family == "npc-trigger": lines.extend([ "materialize slot-backed trigger payload", "attach event plus item/item2/typeNpc side fields", "emit NPC-trigger result through shared opcode epilogue", ]) elif family == "minimal-event-core": lines.extend([ "bind referent to minimal event payload", "reuse generic active-event mutation path without hub-style side fields", ]) elif family == "environmental-event": lines.extend([ "bind referent plus event to hazard-specific side fields", "reuse generic active-event lane; no hazard-specific opcode split proven", ]) elif family == "referent-anchor": lines.extend([ "referent_id = registry anchor", "payload_chain = mutable owner-side chain attached to the referent", "neighboring event-bearing descriptor supplies live event semantics", "likely attachments: REE_BOOT, SURCAMEW, SFXTRIG", ]) else: lines.extend([ "callback-side attachment remains descriptor-visible", "runtime bridge is still generic slot-backed context flow rather than callback-specific opcode dispatch", ]) return lines def write_runtime_family_bridge_reports(out_dir: pathlib.Path, descriptor_chunks: list[ExtractedChunk]) -> None: grouped = family_chunk_map(descriptor_chunks) owner_source = "000d:44df -> 000d:4c99 -> 000d:7000 -> (+0x10/+0x12) + 0x0d*slot + 4" loader_evidence = ( "0009:67b6/6916 walk helper-owned +0x10/+0x18 tables, format per-entry paths, and then open/read/close files" ) selector_status = "0x19/0x1a/0x1b proven in 000d:0988; 0x18 implied; upstream [BP-0x32] seed unresolved" tsv_lines = [ "lane_rank\tprimary_runtime_lane\tdescriptor_family\trepresentative_labels\tfit_strength\tconfidence\truntime_ops\tmask_pairs\towner_source\tloader_evidence\tselector_status\twhy" ] md_lines = [ "# Runtime Descriptor Family Rankings", "", "This report ranks descriptor families against the currently verified 000d VM/runtime lanes.", "It is intentionally conservative: it scores ecosystem-level fit, not a direct descriptor-id-to-opcode decode.", "", "## Owner Source", "", f"- Owner path: `{owner_source}`", f"- Loader evidence: `{loader_evidence}`", f"- Selector status: `{selector_status}`", "", "## Ranked Families", "", "| Rank | Runtime Lane | Descriptor Family | Labels | Fit | Confidence |", "|---:|---|---|---|---|---|", ] for metadata in family_bridge_metadata(): family = metadata["descriptor_family"] family_chunks = grouped.get(family, []) if not family_chunks: continue labels = unique_preserve_order([ clean_token(chunk.primary_label or "") for chunk in family_chunks if chunk.primary_label ]) exemplar = choose_family_exemplar(family, family_chunks, metadata["exemplar"]) if exemplar is None: continue runtime_ops = family_runtime_ops(family) mask_pairs = family_mask_pairs(family) tsv_lines.append( "{lane_rank}\t{primary_runtime_lane}\t{descriptor_family}\t{representative_labels}\t{fit_strength}\t{confidence}\t{runtime_ops}\t{mask_pairs}\t{owner_source}\t{loader_evidence}\t{selector_status}\t{why}".format( lane_rank=metadata["lane_rank"], primary_runtime_lane=metadata["primary_runtime_lane"], descriptor_family=family, representative_labels=",".join(labels), fit_strength=metadata["fit_strength"], confidence=metadata["confidence"], runtime_ops=",".join(runtime_ops), mask_pairs=",".join(mask_pairs), owner_source=owner_source, loader_evidence=loader_evidence, selector_status=selector_status, why=metadata["why"], ) ) md_lines.append( "| {lane_rank} | {primary_runtime_lane} | {descriptor_family} | {labels} | {fit_strength} | {confidence} |".format( lane_rank=metadata["lane_rank"], primary_runtime_lane=metadata["primary_runtime_lane"], descriptor_family=family, labels=", ".join(labels), fit_strength=metadata["fit_strength"], confidence=metadata["confidence"], ) ) md_lines.extend([ "", f"## {metadata['lane_rank']}. {family}", "", f"- Runtime lane: {metadata['primary_runtime_lane']}", f"- Labels: {', '.join(labels)}", f"- Fit: {metadata['fit_strength']}", f"- Confidence: {metadata['confidence']}", f"- Why: {metadata['why']}", f"- Runtime ops: {', '.join(runtime_ops)}", f"- Mask pairs: {', '.join(mask_pairs)}", "", "```text", ]) md_lines.extend(family_script_block(exemplar, family, labels)) md_lines.extend([ "```", "", ]) (out_dir / "runtime_descriptor_family_rankings.tsv").write_text("\n".join(tsv_lines) + "\n", encoding="utf-8") (out_dir / "runtime_descriptor_family_rankings.md").write_text("\n".join(md_lines), encoding="utf-8") def write_readable_template_reports( out_dir: pathlib.Path, descriptor_chunks: list[ExtractedChunk], chunk_by_index: dict[int, ExtractedChunk], total_chunks: int, ) -> None: focus_sets = [ ("JELYHACK anchor lane", {"JELYHACK", "JELYH2"}, 8), ("EVENT hub lane", {"EVENT", "COR_BOOT", "NPCTRIG"}, 5), ("Environmental event lane", {"FLAMEBOX", "NOSTRIL", "STEAMBOX"}, 5), ("Callback trigger lane", {"SURCAMNS", "SURCAMEW"}, 5), ] tsv_lines = [ "section\tcenter_index\tcenter_label\trelation\tneighbor_index\tdistance\tneighbor_label\trole\tfamily\ttag_kind\tfield_names" ] md_lines = [ "# EUSECODE Readable Descriptor Templates", "", "These are conservative descriptor-side pseudo-script sketches.", "They reflect verified field grammar and local table neighborhoods, not a direct opcode dump.", "", ] for section_name, center_labels, window in focus_sets: centers = [chunk for chunk in descriptor_chunks if chunk.primary_label in center_labels] if not centers: continue md_lines.append(f"## {section_name}") md_lines.append("") for center in centers: md_lines.append(f"### {center.index}: {center.primary_label}") md_lines.append("") md_lines.append("```text") md_lines.append(readable_signature(center)) for neighbor_index in range(max(0, center.index - window), min(total_chunks, center.index + window + 1)): if neighbor_index == center.index: continue neighbor = chunk_by_index[neighbor_index] if not (event_tag_kind(neighbor) or has_referent_field(neighbor) or neighbor.primary_label in center_labels): continue distance = neighbor.index - center.index relation = "near" if event_tag_kind(neighbor) == "event": relation = "attach" elif event_tag_kind(neighbor) == "eventTrigger": relation = "callback" md_lines.append( "{relation} {subject} # offset {distance:+d}{family_suffix}".format( relation=relation, subject=readable_subject(neighbor), distance=distance, family_suffix=( f", family={classify_event_family(neighbor)}" if classify_event_family(neighbor) else "" ), ) ) tsv_lines.append( "{section}\t{center_index}\t{center_label}\t{relation}\t{neighbor_index}\t{distance:+d}\t{neighbor_label}\t{role}\t{family}\t{tag_kind}\t{field_names}".format( section=section_name, center_index=center.index, center_label=clean_token(center.primary_label or ""), relation=relation, neighbor_index=neighbor.index, distance=distance, neighbor_label=clean_token(neighbor.primary_label or ""), role=chunk_role(neighbor), family=classify_event_family(neighbor), tag_kind=event_tag_kind(neighbor), field_names=",".join(clean_token(field_name) for field_name in neighbor.field_names), ) ) md_lines.append("```") md_lines.append("") family_lines = [ "## Family Signatures", "", "| Family | Label | Signature |", "|---|---|---|", ] for chunk in sorted(descriptor_chunks, key=lambda value: (classify_event_family(value), value.index)): family = classify_event_family(chunk) if not family: continue family_lines.append( "| {family} | {label} | {signature} |".format( family=family, label=clean_token(chunk.primary_label or ""), signature=readable_signature(chunk).replace("|", "/"), ) ) md_lines.extend(family_lines) md_lines.append("") (out_dir / "readable_descriptor_templates.md").write_text("\n".join(md_lines), encoding="utf-8") (out_dir / "readable_descriptor_templates.tsv").write_text("\n".join(tsv_lines) + "\n", encoding="utf-8") def header_u16_words(data: bytes, count: int = 16) -> list[str]: limit = min(len(data) // 2, count) return [f"0x{read_u16_le(data, index * 2):04X}" for index in range(limit)] def header_u32_words(data: bytes, count: int = 8) -> list[str]: limit = min(len(data) // 4, count) return [f"0x{read_u32_le(data, index * 4):08X}" for index in range(limit)] def interesting_printable_markers(data: bytes) -> list[str]: markers: list[str] = [] seen: set[str] = set() for run in iter_printable_runs(data, min_len=3): if not any(token in run for token in ("wx[", "wt$[", "t$t=t@", "$Q", "?\n", "?\r")): continue if run not in seen: seen.add(run) markers.append(run) return markers[:8] def write_island_graph( out_dir: pathlib.Path, output_name: str, title: str, center_labels: set[str], descriptor_chunks: list[ExtractedChunk], chunk_by_index: dict[int, ExtractedChunk], total_chunks: int, window: int = 5, ) -> None: centers = [chunk for chunk in descriptor_chunks if chunk.primary_label in center_labels] if not centers: return island_indices = sorted( { neighbor_index for center in centers for neighbor_index in range(max(0, center.index - window), min(total_chunks, center.index + window + 1)) } ) island_lines = [f"# {title}", "", "## Nodes", "", "| Index | Label | Role | Fields | Event Evidence |", "|---:|---|---|---|---|"] for index in island_indices: chunk = chunk_by_index[index] island_lines.append( "| {index} | {label} | {role} | {fields} | {evidence} |".format( index=index, label=chunk.primary_label or "", role=chunk_role(chunk), fields=",".join(chunk.field_names) or "-", evidence=",".join(get_event_evidence(chunk)) or "-", ) ) island_lines.extend(["", "## Edges", "", "| Source | Relation | Target | Evidence |", "|---|---|---|---|"]) for center in centers: for neighbor_index in range(max(0, center.index - window), min(total_chunks, center.index + window + 1)): if neighbor_index == center.index: continue neighbor = chunk_by_index[neighbor_index] relation = f"table-neighbor({neighbor.index - center.index:+d})" event_evidence = get_event_evidence(neighbor) if event_evidence: relation = f"possible-event-attachment({neighbor.index - center.index:+d})" island_lines.append( "| {source} ({source_index}) | {relation} | {target} ({target_index}) | {evidence} |".format( source=center.primary_label, source_index=center.index, relation=relation, target=neighbor.primary_label or "", target_index=neighbor.index, evidence=",".join(event_evidence) or "same local extraction neighborhood", ) ) (out_dir / output_name).write_text("\n".join(island_lines) + "\n", encoding="utf-8") def write_descriptor_compare( out_dir: pathlib.Path, output_name: str, labels: set[str], descriptor_chunks: list[ExtractedChunk], ) -> None: compare_lines = [ "entry_index\tlabel\trole\tdata_offset\tdeclared_size\theader_u16\theader_u32\tprintable_markers\tfield_tags" ] for chunk in descriptor_chunks: if chunk.primary_label not in labels: continue raw_data = pathlib.Path(chunk.raw_path).read_bytes() compare_lines.append( "{index}\t{label}\t{role}\t0x{data_offset:X}\t0x{declared_size:X}\t{header_u16}\t{header_u32}\t{markers}\t{field_tags}".format( index=chunk.index, label=chunk.primary_label, role=chunk_role(chunk), data_offset=chunk.data_offset, declared_size=chunk.declared_size, header_u16=",".join(header_u16_words(raw_data)), header_u32=",".join(header_u32_words(raw_data)), markers="|".join(interesting_printable_markers(raw_data)), field_tags=",".join(chunk.field_tags), ) ) (out_dir / output_name).write_text("\n".join(compare_lines) + "\n", encoding="utf-8") def write_event_family_reports( out_dir: pathlib.Path, descriptor_chunks: list[ExtractedChunk], chunk_by_index: dict[int, ExtractedChunk], total_chunks: int, ) -> None: family_lines = [ "entry_index\tlabel\tfamily\ttag_kind\trole\tfield_count\tfield_names\tfield_tags\tdata_offset\tdeclared_size\tlocal_event_neighbors" ] families: dict[str, list[ExtractedChunk]] = {} for chunk in descriptor_chunks: family = classify_event_family(chunk) if not family: continue families.setdefault(family, []).append(chunk) local_event_neighbors = 0 for neighbor_index in range(max(0, chunk.index - 5), min(total_chunks, chunk.index + 6)): if neighbor_index == chunk.index: continue neighbor = chunk_by_index[neighbor_index] if event_tag_kind(neighbor): local_event_neighbors += 1 family_lines.append( "{index}\t{label}\t{family}\t{tag_kind}\t{role}\t{field_count}\t{field_names}\t{field_tags}\t0x{data_offset:X}\t0x{declared_size:X}\t{local_event_neighbors}".format( index=chunk.index, label=chunk.primary_label or "", family=family, tag_kind=event_tag_kind(chunk), role=chunk_role(chunk), field_count=len(chunk.field_names), field_names=",".join(chunk.field_names), field_tags=",".join(chunk.field_tags), data_offset=chunk.data_offset, declared_size=chunk.declared_size, local_event_neighbors=local_event_neighbors, ) ) (out_dir / "event_family_index.tsv").write_text("\n".join(family_lines) + "\n", encoding="utf-8") summary_lines = ["# Event Family Summary", ""] family_order = [ "event-hub", "boot-event-core", "npc-trigger", "minimal-event-core", "environmental-event", "specialized-event", "callback-eventtrigger", ] for family in family_order: family_chunks = families.get(family, []) if not family_chunks: continue summary_lines.append(f"## {family}") summary_lines.append("") summary_lines.append("| Index | Label | Tag Kind | Fields | Size | Local Event Neighbors |") summary_lines.append("|---:|---|---|---|---:|---:|") for chunk in sorted(family_chunks, key=lambda value: value.index): local_event_neighbors = 0 for neighbor_index in range(max(0, chunk.index - 5), min(total_chunks, chunk.index + 6)): if neighbor_index == chunk.index: continue neighbor = chunk_by_index[neighbor_index] if event_tag_kind(neighbor): local_event_neighbors += 1 summary_lines.append( "| {index} | {label} | {tag_kind} | {fields} | 0x{declared_size:X} | {local_event_neighbors} |".format( index=chunk.index, label=chunk.primary_label or "", tag_kind=event_tag_kind(chunk), fields=",".join(chunk.field_names) or "-", declared_size=chunk.declared_size, local_event_neighbors=local_event_neighbors, ) ) summary_lines.append("") (out_dir / "event_family_summary.md").write_text("\n".join(summary_lines), encoding="utf-8") def looks_text_like(data: bytes) -> bool: if not data: return False ratio = printable_ratio(data) if ratio < 0.80: return False if b"\r\n" in data or b"\n" in data: return True return zero_ratio(data) < 0.05 def parse_flx_table(data: bytes, table_offset: int = 0x80, count_offset: int = 0x54) -> FlxTable: file_size = len(data) entry_count = read_u32_le(data, count_offset) table_end = table_offset + entry_count * 8 if table_end > file_size: raise ValueError( f"FLX table extends past EOF: entry_count={entry_count} table_end=0x{table_end:X} file_size=0x{file_size:X}" ) entries: list[CandidateEntry] = [] for index in range(entry_count): offset = table_offset + index * 8 data_offset = read_u32_le(data, offset) declared_size = read_u32_le(data, offset + 4) if data_offset == 0 and declared_size == 0: continue if data_offset <= 0 or data_offset > file_size: continue if declared_size <= 0: continue entries.append(CandidateEntry(offset, data_offset, declared_size)) return FlxTable( entry_count=entry_count, table_offset=table_offset, table_end=table_end, entries=entries, ) def dump_chunk( base_dir: pathlib.Path, chunk_name: str, data: bytes ) -> tuple[str, str, str | None, bool, float, float, str, str | None, list[str], list[str]]: raw_path = base_dir / f"{chunk_name}.bin" strings_path = base_dir / f"{chunk_name}.strings.txt" text_path = base_dir / f"{chunk_name}.txt" raw_path.write_bytes(data) runs = iter_printable_runs(data) strings_path.write_text("\n".join(runs) + ("\n" if runs else ""), encoding="utf-8") primary_label, field_names = summarize_descriptor(runs) field_tags = extract_field_tag_records(data, field_names) text_like = looks_text_like(data) actual_text_path: str | None = None if text_like: text_path.write_text(data.decode("latin-1", errors="replace"), encoding="utf-8") actual_text_path = str(text_path) return ( str(raw_path), str(strings_path), actual_text_path, text_like, printable_ratio(data), zero_ratio(data), ascii_preview(data), primary_label, field_names, field_tags, ) def extract_candidates(data: bytes, out_dir: pathlib.Path, entries: list[CandidateEntry]) -> list[ExtractedChunk]: chunks_dir = out_dir / "chunks" chunks_dir.mkdir(parents=True, exist_ok=True) extracted: list[ExtractedChunk] = [] file_size = len(data) sorted_entries = sorted(enumerate(entries), key=lambda item: (item[1].data_offset, item[0])) next_by_original_index: dict[int, int | None] = {} for position, (original_index, entry) in enumerate(sorted_entries): next_offset = sorted_entries[position + 1][1].data_offset if position + 1 < len(sorted_entries) else None next_by_original_index[original_index] = next_offset for index, entry in enumerate(entries): next_offset = next_by_original_index.get(index) chunk_end = min(file_size, entry.data_offset + entry.declared_size) chunk_data = data[entry.data_offset:chunk_end] overlap = next_offset is not None and (entry.data_offset + entry.declared_size) > next_offset chunk_name = ( f"chunk_{index:03d}_table_{entry.table_offset:04X}_off_{entry.data_offset:06X}_len_{entry.declared_size:06X}" ) raw_path, strings_path, text_path, text_like, print_ratio, z_ratio, preview, primary_label, field_names, field_tags = dump_chunk( chunks_dir, chunk_name, chunk_data ) extracted.append( ExtractedChunk( index=index, table_offset=entry.table_offset, object_index=object_index_from_table_offset(entry.table_offset), data_offset=entry.data_offset, declared_size=entry.declared_size, next_offset=next_offset, extracted_size=len(chunk_data), overlap_with_next=overlap, text_like=text_like, printable_ratio=round(print_ratio, 4), zero_ratio=round(z_ratio, 4), preview=preview, raw_path=raw_path, strings_path=strings_path, text_path=text_path, primary_label=primary_label, field_names=field_names, field_tags=field_tags, ) ) annotate_class_layout(extracted) return extracted def write_summary( out_dir: pathlib.Path, input_path: pathlib.Path, data: bytes, entries: list[CandidateEntry], chunks: list[ExtractedChunk], validation_profile: str, ) -> None: summary = { "input_path": str(input_path), "validation_profile": validation_profile, "file_size": len(data), "header_preview_hex": data[:128].hex(), "header_preview_ascii": ascii_preview(data[:128], 128), "candidate_entries": [asdict(entry) for entry in entries], "chunks": [asdict(chunk) for chunk in chunks], } (out_dir / "summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8") index_lines = [ "entry_index\ttable_offset\tobject_index\tclass_id\tclass_name_hint\traw_code_base_u32\tcode_base_minus_one\tconservative_event_count\tevent_table_end\tclass_parse_status\tdata_offset\tdeclared_size\textracted_size\ttext_like\tprintable_ratio\tzero_ratio\toverlap_with_next\tprimary_label\tfield_names\tfield_tags\tpreview" ] for chunk in chunks: index_lines.append( "{index}\t{table_offset}\t{object_index}\t{class_id}\t{class_name_hint}\t{raw_code_base_u32}\t{code_base_minus_one}\t{conservative_event_count}\t{event_table_end}\t{class_parse_status}\t{data_offset}\t{declared_size}\t{extracted_size}\t{text_like}\t{printable_ratio:.4f}\t{zero_ratio:.4f}\t{overlap}\t{primary_label}\t{field_names}\t{field_tags}\t{preview}".format( index=chunk.index, table_offset=(f"0x{chunk.table_offset:X}"), object_index=(f"0x{chunk.object_index:X}" if chunk.object_index is not None else ""), class_id=(f"0x{chunk.class_id:X}" if chunk.class_id is not None else ""), class_name_hint=chunk.class_name_hint or "", raw_code_base_u32=(f"0x{chunk.raw_code_base_u32:X}" if chunk.raw_code_base_u32 is not None else ""), code_base_minus_one=(f"0x{chunk.code_base_minus_one:X}" if chunk.code_base_minus_one is not None else ""), conservative_event_count=(chunk.conservative_event_count if chunk.conservative_event_count is not None else ""), event_table_end=(f"0x{chunk.event_table_end:X}" if chunk.event_table_end is not None else ""), class_parse_status=chunk.class_parse_status or "", data_offset=f"0x{chunk.data_offset:X}", declared_size=f"0x{chunk.declared_size:X}", extracted_size=f"0x{chunk.extracted_size:X}", text_like=int(chunk.text_like), printable_ratio=chunk.printable_ratio, zero_ratio=chunk.zero_ratio, overlap=int(chunk.overlap_with_next), primary_label=chunk.primary_label or "", field_names=",".join(chunk.field_names), field_tags=",".join(chunk.field_tags), preview=chunk.preview.replace("\t", " "), ) ) (out_dir / "entry_index.tsv").write_text("\n".join(index_lines) + "\n", encoding="utf-8") descriptor_lines = [ "entry_index\tobject_index\tclass_id\tclass_name_hint\traw_code_base_u32\tcode_base_minus_one\tconservative_event_count\tevent_table_end\tclass_parse_status\tprimary_label\tfield_names\tfield_tags\tdata_offset\tdeclared_size" ] descriptor_chunks = [chunk for chunk in chunks if chunk.primary_label or chunk.field_names] for chunk in descriptor_chunks: descriptor_lines.append( "{index}\t{object_index}\t{class_id}\t{class_name_hint}\t{raw_code_base_u32}\t{code_base_minus_one}\t{conservative_event_count}\t{event_table_end}\t{class_parse_status}\t{primary_label}\t{field_names}\t{field_tags}\t0x{data_offset:X}\t0x{declared_size:X}".format( index=chunk.index, object_index=(f"0x{chunk.object_index:X}" if chunk.object_index is not None else ""), class_id=(f"0x{chunk.class_id:X}" if chunk.class_id is not None else ""), class_name_hint=chunk.class_name_hint or "", raw_code_base_u32=(f"0x{chunk.raw_code_base_u32:X}" if chunk.raw_code_base_u32 is not None else ""), code_base_minus_one=(f"0x{chunk.code_base_minus_one:X}" if chunk.code_base_minus_one is not None else ""), conservative_event_count=(chunk.conservative_event_count if chunk.conservative_event_count is not None else ""), event_table_end=(f"0x{chunk.event_table_end:X}" if chunk.event_table_end is not None else ""), class_parse_status=chunk.class_parse_status or "", primary_label=chunk.primary_label or "", field_names=",".join(chunk.field_names), field_tags=",".join(chunk.field_tags), data_offset=chunk.data_offset, declared_size=chunk.declared_size, ) ) (out_dir / "descriptor_index.tsv").write_text("\n".join(descriptor_lines) + "\n", encoding="utf-8") class_layout_lines = [ "entry_index\tobject_index\tclass_id\tclass_name_hint\traw_code_base_u32\tcode_base_minus_one\tconservative_event_count\tevent_table_end\tclass_parse_status\tdata_offset\tdeclared_size\tprimary_label" ] parsed_class_chunks = [chunk for chunk in chunks if chunk.class_parse_status == "parsed-class-layout"] class_event_rows, rows_by_entry, raw_data_by_entry = build_class_event_rows(parsed_class_chunks) repeated_status_by_row = build_repeated_template_status_map( parsed_class_chunks, rows_by_entry, raw_data_by_entry, ) repeated_family_regression_lines = validate_verified_repeated_family_regressions( parsed_class_chunks, rows_by_entry, repeated_status_by_row, validation_profile, ) for chunk in parsed_class_chunks: class_layout_lines.append( "{index}\t0x{object_index:X}\t0x{class_id:X}\t{class_name_hint}\t0x{raw_code_base_u32:X}\t0x{code_base_minus_one:X}\t{conservative_event_count}\t0x{event_table_end:X}\t{class_parse_status}\t0x{data_offset:X}\t0x{declared_size:X}\t{primary_label}".format( index=chunk.index, object_index=chunk.object_index, class_id=chunk.class_id, class_name_hint=chunk.class_name_hint or "", raw_code_base_u32=chunk.raw_code_base_u32, code_base_minus_one=chunk.code_base_minus_one, conservative_event_count=chunk.conservative_event_count, event_table_end=chunk.event_table_end, class_parse_status=chunk.class_parse_status, data_offset=chunk.data_offset, declared_size=chunk.declared_size, primary_label=chunk.primary_label or "", ) ) (out_dir / "class_layout_index.tsv").write_text("\n".join(class_layout_lines) + "\n", encoding="utf-8") class_event_lines = [ "entry_index\tobject_index\tclass_id\tclass_name_hint\tslot\tevent_name_hint\traw_event_entry_word\traw_code_offset\tderived_body_start\tderived_body_end\tderived_body_length\trepeated_template_status" ] for row in class_event_rows: class_event_lines.append( "{entry_index}\t0x{object_index:X}\t0x{class_id:X}\t{class_name_hint}\t0x{slot:02X}\t{event_name_hint}\t0x{raw_event_entry_word:04X}\t0x{raw_code_offset:08X}\t{derived_body_start}\t{derived_body_end}\t{derived_body_length}\t{repeated_template_status}".format( entry_index=row.entry_index, object_index=row.object_index, class_id=row.class_id, class_name_hint=row.class_name_hint, slot=row.slot, event_name_hint=row.event_name_hint or "", raw_event_entry_word=row.raw_event_entry_word, raw_code_offset=row.raw_code_offset, derived_body_start=format_optional_hex(row.derived_body_start, 4), derived_body_end=format_optional_hex(row.derived_body_end, 4), derived_body_length=(row.derived_body_length if row.derived_body_length is not None else ""), repeated_template_status=repeated_status_by_row.get((row.entry_index, row.slot), ""), ) ) (out_dir / "class_event_index.tsv").write_text("\n".join(class_event_lines) + "\n", encoding="utf-8") for family_artifact_spec in FAMILY_ARTIFACT_SPECS: write_family_decompile_artifact( out_dir, parsed_class_chunks, rows_by_entry, raw_data_by_entry, repeated_status_by_row, family_artifact_spec, ) (out_dir / "repeated_family_regressions.tsv").write_text( "\n".join(repeated_family_regression_lines) + "\n", encoding="utf-8", ) neighborhood_lines = [ "center_index\tneighbor_index\tprimary_label\tfield_names\tfield_tags" ] interesting = {"JELYHACK", "JELYH2", "NPCTRIG", "CRUZTRIG", "TRIGPAD", "SPECIAL", "EVENT", "SFXTRIG"} interesting_indices = [chunk.index for chunk in chunks if chunk.primary_label in interesting] seen_pairs: set[tuple[int, int]] = set() chunk_by_index = {chunk.index: chunk for chunk in chunks} for center_index in interesting_indices: for neighbor_index in range(max(0, center_index - 4), min(len(chunks), center_index + 5)): pair = (center_index, neighbor_index) if pair in seen_pairs: continue seen_pairs.add(pair) chunk = chunk_by_index[neighbor_index] neighborhood_lines.append( "{center_index}\t{neighbor_index}\t{primary_label}\t{field_names}\t{field_tags}".format( center_index=center_index, neighbor_index=neighbor_index, primary_label=chunk.primary_label or "", field_names=",".join(chunk.field_names), field_tags=",".join(chunk.field_tags), ) ) (out_dir / "descriptor_neighborhoods.tsv").write_text("\n".join(neighborhood_lines) + "\n", encoding="utf-8") anchor_graph_lines = [ "anchor_index\tanchor_label\tanchor_fields\tneighbor_index\tdistance\tneighbor_label\tneighbor_fields\tneighbor_role\tevent_evidence" ] for anchor in descriptor_chunks: if not anchor.primary_label or not has_referent_field(anchor): continue for neighbor_index in range(max(0, anchor.index - 8), min(len(chunks), anchor.index + 9)): if neighbor_index == anchor.index: continue neighbor = chunk_by_index[neighbor_index] event_evidence = get_event_evidence(neighbor) if not event_evidence: continue anchor_graph_lines.append( "{anchor_index}\t{anchor_label}\t{anchor_fields}\t{neighbor_index}\t{distance:+d}\t{neighbor_label}\t{neighbor_fields}\t{neighbor_role}\t{event_evidence}".format( anchor_index=anchor.index, anchor_label=anchor.primary_label, anchor_fields=",".join(anchor.field_names), neighbor_index=neighbor.index, distance=neighbor.index - anchor.index, neighbor_label=neighbor.primary_label or "", neighbor_fields=",".join(neighbor.field_names), neighbor_role=chunk_role(neighbor), event_evidence=",".join(event_evidence), ) ) (out_dir / "referent_anchor_event_graph.tsv").write_text("\n".join(anchor_graph_lines) + "\n", encoding="utf-8") write_island_graph( out_dir, "jelyhack_island_graph.md", "JELYHACK Island Graph", {"JELYHACK", "JELYH2"}, descriptor_chunks, chunk_by_index, len(chunks), window=8, ) write_descriptor_compare( out_dir, "jelyhack_descriptor_compare.tsv", {"JELYHACK", "JELYH2", "REE_BOOT", "SURCAMEW", "SFXTRIG"}, descriptor_chunks, ) write_island_graph( out_dir, "event_island_graph.md", "EVENT Cluster Graph", {"EVENT", "COR_BOOT", "NPCTRIG", "ROLL_NS", "CRUZTRIG"}, descriptor_chunks, chunk_by_index, len(chunks), ) write_descriptor_compare( out_dir, "event_descriptor_compare.tsv", {"ROLL_NS", "COR_BOOT", "EVENT", "NPCTRIG", "CRUZTRIG", "NPC_ONLY", "VMAIL"}, descriptor_chunks, ) write_island_graph( out_dir, "boot_frontier_graph.md", "AND/BRO Boot Frontier Graph", {"AND_BOOT", "BRO_BOOT"}, descriptor_chunks, chunk_by_index, len(chunks), window=6, ) write_descriptor_compare( out_dir, "boot_family_compare.tsv", {"AND_BOOT", "BRO_BOOT", "COR_BOOT", "VAR_BOOT", "REE_BOOT"}, descriptor_chunks, ) write_island_graph( out_dir, "environmental_event_graph.md", "Environmental Event Graph", {"FLAMEBOX", "NOSTRIL", "STEAMBOX"}, descriptor_chunks, chunk_by_index, len(chunks), window=5, ) write_descriptor_compare( out_dir, "environmental_family_compare.tsv", {"FLAMEBOX", "NOSTRIL", "STEAMBOX"}, descriptor_chunks, ) write_descriptor_compare( out_dir, "callback_trigger_compare.tsv", {"SURCAMNS", "SURCAMEW"}, descriptor_chunks, ) write_event_family_reports(out_dir, descriptor_chunks, chunk_by_index, len(chunks)) write_readable_template_reports(out_dir, descriptor_chunks, chunk_by_index, len(chunks)) write_runtime_bridge_reports(out_dir, descriptor_chunks, chunk_by_index, len(chunks)) write_runtime_family_bridge_reports(out_dir, descriptor_chunks) write_immortality_target_body_scan(out_dir, parsed_class_chunks, rows_by_entry, raw_data_by_entry) write_immortality_body_structure_report(out_dir, parsed_class_chunks, rows_by_entry, raw_data_by_entry) write_npctrig_clause_report(out_dir, parsed_class_chunks, rows_by_entry, raw_data_by_entry) lines = [] lines.append("# EUSECODE.FLX First-Pass Extraction") lines.append("") lines.append(f"Input: {input_path}") lines.append(f"File size: 0x{len(data):X} ({len(data)} bytes)") lines.append(f"Candidate entries: {len(entries)}") lines.append("") lines.append("## Header Preview") lines.append("") lines.append(f"ASCII: `{ascii_preview(data[:128], 128)}`") lines.append("") lines.append("## Chunks") lines.append("") lines.append("| # | Table Off | Data Off | Declared Size | Next Off | Text | Overlap | Preview |") lines.append("|---:|---:|---:|---:|---:|:---:|:---:|---|") for chunk in chunks: next_off = f"0x{chunk.next_offset:X}" if chunk.next_offset is not None else "-" lines.append( "| {index} | 0x{table_offset:X} | 0x{data_offset:X} | 0x{declared_size:X} | {next_off} | {text_like} | {overlap} | {preview} |".format( index=chunk.index, table_offset=chunk.table_offset, data_offset=chunk.data_offset, declared_size=chunk.declared_size, next_off=next_off, text_like="yes" if chunk.text_like else "no", overlap="yes" if chunk.overlap_with_next else "no", preview=chunk.preview.replace("|", "/"), ) ) lines.append("") lines.append("## Notes") lines.append("") lines.append("- The extractor now parses the validated FLX table directly: entry count at `0x54`, table at `0x80`, 8 bytes per entry.") lines.append("- Overlapping declared sizes likely mean some entries are counts or record spans rather than exact chunk lengths.") lines.append("- `.strings.txt` files are the main human-readable output for now; `.txt` files are emitted only for chunks that look text-like.") lines.append("- `descriptor_index.tsv` summarizes guessed class labels, field names, and compact tag patterns for descriptor-like chunks.") lines.append("- `class_layout_index.tsv` records the conservative owner-loaded class parsing state: object index, class id, class-name hint, raw bytes-8..11 field, derived code-base-minus-one, and event-count/table-end values when the local divisibility and bounds checks succeed.") lines.append("- `class_event_index.tsv` now also emits derived body-window columns (`derived_body_start`, `derived_body_end`, `derived_body_length`) plus conservative `repeated_template_status` tags for verified repeated families.") lines.append("- `boot_family_decompile.md` / `.tsv`, `callback_family_decompile.md` / `.tsv`, and `environmental_family_decompile.md` / `.tsv` now provide reversible per-class decompile artifacts for the `_BOOT`, `SURCAM*`, and environmental repeated-family lanes.") lines.append("- `repeated_family_regressions.tsv` enforces the current repeated-family slot sets plus the verified raw-row and derived body-window fields for `JELYHACK/JELYH2`, `_BOOT`, `SURCAM*`, and `FLAMEBOX/NOSTRIL/STEAMBOX`.") lines.append("- `descriptor_neighborhoods.tsv` captures local table neighborhoods around trigger/event-related classes such as `JELYHACK`, `NPCTRIG`, `CRUZTRIG`, `TRIGPAD`, and `SPECIAL`.") lines.append("- `referent_anchor_event_graph.tsv` groups referent-bearing descriptors with nearby event-bearing neighbors so the attachment model can be inspected without ad hoc grepping.") lines.append("- `jelyhack_island_graph.md` now uses a wider local window so the `JELYHACK` / `JELYH2` anchors can be inspected alongside the nearby event-bearing `REE_BOOT`, `SURCAMEW`, and `SFXTRIG` descriptors rather than stopping at the referent-only neighbors.") lines.append("- `jelyhack_descriptor_compare.tsv` captures the first 16 header words, first 8 dwords, and a few odd printable markers for the core JELYHACK-island descriptors so structural similarity can be compared without raw hex dumps.") lines.append("- `event_island_graph.md` renders the denser `EVENT` / `COR_BOOT` / `NPCTRIG` / `ROLL_NS` / `CRUZTRIG` island, which currently looks like the strongest event-explicit neighborhood outside the JELYHACK anchor case.") lines.append("- `event_descriptor_compare.tsv` captures the same header-word and printable-marker comparison for the `EVENT` island so large event-bearing descriptors can be contrasted with neighboring trigger and referent records.") lines.append("- `boot_frontier_graph.md` renders the upstream referent neighborhood feeding `AND_BOOT` / `BRO_BOOT`, which is currently the clearest unexplored boot-event frontier.") lines.append("- `boot_family_compare.tsv` compares the five `_BOOT` event cores (`AND_BOOT`, `BRO_BOOT`, `COR_BOOT`, `VAR_BOOT`, `REE_BOOT`) by header words, markers, and field tags.") lines.append("- `environmental_event_graph.md` renders the three hazard/event islands centered on `FLAMEBOX`, `NOSTRIL`, and `STEAMBOX`, each surrounded by its own referent-heavy local neighborhood.") lines.append("- `environmental_family_compare.tsv` compares the environmental event trio so the shared hazard pattern (`referent,event,,,direction,count`) can be contrasted directly.") lines.append("- `callback_trigger_compare.tsv` compares `SURCAMNS` and `SURCAMEW` directly so the callback-only `eventTrigger` lane can be checked against the active `event` families without raw hex dumps.") lines.append("- `event_family_index.tsv` and `event_family_summary.md` classify all current `event` and `eventTrigger` descriptors into reusable families such as boot-event cores, minimal event cores, environmental events, and callback-only surveillance triggers.") lines.append("- `readable_descriptor_templates.md` and `readable_descriptor_templates.tsv` emit conservative pseudo-script sketches for the strongest current anchor, event-hub, environmental, and callback lanes so USECODE neighborhoods can be read as structured attachments instead of only raw descriptor rows.") lines.append("- `runtime_vm_ir.tsv` captures the currently verified 000d VM operator vocabulary as machine-readable rows with stage addresses, opcode/lane status, payload shape, and evidence anchors.") lines.append("- `vm_mask_ladder.tsv` records the current `entity_vm_context_try_create_masked_for_entity` wrapper ladder in machine-readable form so gameplay mask lanes can be compared against descriptor-side families without reopening the notes.") lines.append("- `readable_script_ir.md` and `readable_script_ir.tsv` join descriptor neighborhoods, the verified VM IR, the runtime owner/source path, and the current mask-family hints into one conservative script-facing bridge artifact.") lines.append("- `runtime_descriptor_family_rankings.md` and `runtime_descriptor_family_rankings.tsv` rank descriptor families against the verified runtime lanes so the current human-readable script bridge is searchable by family fit rather than only by neighborhood dumps.") lines.append("- `immortality_target_body_scan.md` and `immortality_target_body_scan.tsv` now scan the strongest current immortality candidates (`EVENT`, `NPCTRIG`, `_BOOT`, `SFXTRIG`, `SPECIAL`, `TRIGPAD`) for inline `0x410` literals and record the tightest remaining active-event template frontier.") lines.append("- `immortality_npctrig_clauses.md` and `immortality_npctrig_clauses.tsv` now split the compact `NPCTRIG` slot `0x0A` / `0x20` bodies into prefix, clause, and tail regions so the event-bearing ladder can be compared against the typed/setup companion body without reopening raw hex.") (out_dir / "README.md").write_text("\n".join(lines) + "\n", encoding="utf-8") all_strings = iter_printable_runs(data) (out_dir / "all_strings.txt").write_text("\n".join(all_strings) + ("\n" if all_strings else ""), encoding="utf-8") def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("input", nargs="?", type=pathlib.Path, default=DEFAULT_INPUT) parser.add_argument("output", nargs="?", type=pathlib.Path, default=DEFAULT_OUTPUT) parser.add_argument( "--validation-profile", choices=["auto", "remorse", "regret", "none"], default=DEFAULT_VALIDATION_PROFILE, help="Repeated-family regression baseline to enforce (default: auto)", ) return parser.parse_args() def main() -> int: args = parse_args() validation_profile = resolve_validation_profile(args.input, args.output, args.validation_profile) data = args.input.read_bytes() args.output.mkdir(parents=True, exist_ok=True) flx_table = parse_flx_table(data) entries = flx_table.entries chunks = extract_candidates(data, args.output, entries) write_summary(args.output, args.input, data, entries, chunks, validation_profile) print( "Parsed " f"{flx_table.entry_count} table slots with {len(chunks)} non-zero entries; " f"validation profile {validation_profile}; extracted to {args.output}" ) return 0 if __name__ == "__main__": raise SystemExit(main())