Add detailed class event processing and family comparison tools

- Enhance `extract_eusecode_flx.py` to derive class event rows with additional metadata including derived body windows and repeated template statuses. - Introduce `usecode_family_compare.py` for comparing event families, analyzing commonalities in event bodies, and generating reports on identical groups and differences. - Implement new data structures for managing class event rows and family artifact specifications. - Update output formats to include derived body information and repeated family regression checks. - Ensure robust validation of repeated family expectations against actual extracted data.
2026-03-22 23:24:46 +01:00 · 2026-03-22 23:24:46 +01:00 · 4d3c8cd81b
commit 4d3c8cd81b
parent de42fd1ea1
23 changed files with 15033 additions and 14221 deletions
--- a/tools/extract_eusecode_flx.py
+++ b/tools/extract_eusecode_flx.py
@ -15,6 +15,7 @@ to support the next decoding pass.
 from __future__ import annotations

 import argparse
+import hashlib
 import json
 import pathlib
 import struct
@ -61,6 +62,21 @@ class ExtractedChunk:
    class_parse_status: str | None = None


+@dataclass(frozen=True)
+class ClassEventRow:
+    entry_index: int
+    object_index: int
+    class_id: int
+    class_name_hint: str
+    slot: int
+    event_name_hint: str | None
+    raw_event_entry_word: int
+    raw_code_offset: int
+    derived_body_start: int | None
+    derived_body_end: int | None
+    derived_body_length: int | None
+
+
@dataclass(frozen=True)
 class FlxTable:
    entry_count: int
@ -69,6 +85,25 @@ class FlxTable:
    entries: list[CandidateEntry]


+@dataclass(frozen=True)
+class FamilyArtifactSpec:
+    output_stem: str
+    title: str
+    labels: tuple[str, ...]
+
+
+@dataclass(frozen=True)
+class RepeatedFamilyRowExpectation:
+    class_name: str
+    slot: int
+    raw_event_entry_word: int
+    raw_code_offset: int
+    derived_body_start: int
+    derived_body_end: int
+    derived_body_length: int
+    repeated_template_status: str
+
+
 def read_u32_le(data: bytes, offset: int) -> int:
    return struct.unpack_from("<I", data, offset)[0]

@ -454,6 +489,73 @@ SCUMMVM_EVENT_NAME_HINTS: tuple[str, ...] = (
 )


+VERIFIED_REPEATED_TEMPLATE_FAMILIES: tuple[tuple[str, tuple[str, ...]], ...] = (
+    ("referent-anchor-twin", ("JELYHACK", "JELYH2")),
+    ("boot-event-core", ("AND_BOOT", "BRO_BOOT", "COR_BOOT", "REE_BOOT", "VAR_BOOT")),
+    ("callback-eventtrigger", ("SURCAMNS", "SURCAMEW")),
+    ("environmental-event", ("FLAMEBOX", "NOSTRIL", "STEAMBOX")),
+)
+
+
+FAMILY_ARTIFACT_SPECS: tuple[FamilyArtifactSpec, ...] = (
+    FamilyArtifactSpec(
+        output_stem="boot_family_decompile",
+        title="_BOOT Family Decompiled Event Sketches",
+        labels=("AND_BOOT", "BRO_BOOT", "COR_BOOT", "REE_BOOT", "VAR_BOOT"),
+    ),
+    FamilyArtifactSpec(
+        output_stem="callback_family_decompile",
+        title="SURCAM Callback Family Decompiled Event Sketches",
+        labels=("SURCAMNS", "SURCAMEW"),
+    ),
+    FamilyArtifactSpec(
+        output_stem="environmental_family_decompile",
+        title="Environmental Family Decompiled Event Sketches",
+        labels=("FLAMEBOX", "NOSTRIL", "STEAMBOX"),
+    ),
+)
+
+
+VERIFIED_REPEATED_FAMILY_ROW_EXPECTATIONS: tuple[RepeatedFamilyRowExpectation, ...] = (
+    RepeatedFamilyRowExpectation("JELYHACK", 0x01, 0x002A, 0x00000001, 0x00D4, 0x00FE, 42, "referent-anchor-twin/shared-slot-0x01/same-length-template"),
+    RepeatedFamilyRowExpectation("JELYH2", 0x01, 0x002A, 0x00000001, 0x00D4, 0x00FE, 42, "referent-anchor-twin/shared-slot-0x01/same-length-template"),
+    RepeatedFamilyRowExpectation("AND_BOOT", 0x0A, 0x0253, 0x00000001, 0x00D4, 0x0327, 595, "boot-event-core/shared-slot-0x0A/shared-slot-template"),
+    RepeatedFamilyRowExpectation("AND_BOOT", 0x0F, 0x0237, 0x00000254, 0x0327, 0x055E, 567, "boot-event-core/shared-slot-0x0F/shared-slot-template"),
+    RepeatedFamilyRowExpectation("AND_BOOT", 0x10, 0x003B, 0x0000048B, 0x055E, 0x0599, 59, "boot-event-core/shared-slot-0x10/same-length-template"),
+    RepeatedFamilyRowExpectation("BRO_BOOT", 0x0A, 0x02D5, 0x00000001, 0x00D4, 0x03A9, 725, "boot-event-core/shared-slot-0x0A/shared-slot-template"),
+    RepeatedFamilyRowExpectation("BRO_BOOT", 0x0F, 0x024C, 0x000002D6, 0x03A9, 0x05F5, 588, "boot-event-core/shared-slot-0x0F/shared-slot-template"),
+    RepeatedFamilyRowExpectation("BRO_BOOT", 0x10, 0x003B, 0x00000522, 0x05F5, 0x0630, 59, "boot-event-core/shared-slot-0x10/same-length-template"),
+    RepeatedFamilyRowExpectation("COR_BOOT", 0x0A, 0x0227, 0x00000001, 0x00D4, 0x02FB, 551, "boot-event-core/shared-slot-0x0A/shared-slot-template"),
+    RepeatedFamilyRowExpectation("COR_BOOT", 0x0F, 0x0234, 0x00000228, 0x02FB, 0x052F, 564, "boot-event-core/shared-slot-0x0F/shared-slot-template"),
+    RepeatedFamilyRowExpectation("COR_BOOT", 0x10, 0x003B, 0x0000045C, 0x052F, 0x056A, 59, "boot-event-core/shared-slot-0x10/same-length-template"),
+    RepeatedFamilyRowExpectation("REE_BOOT", 0x0A, 0x034B, 0x00000001, 0x00D4, 0x041F, 843, "boot-event-core/shared-slot-0x0A/shared-slot-template"),
+    RepeatedFamilyRowExpectation("REE_BOOT", 0x0F, 0x025C, 0x0000034C, 0x041F, 0x067B, 604, "boot-event-core/shared-slot-0x0F/shared-slot-template"),
+    RepeatedFamilyRowExpectation("REE_BOOT", 0x10, 0x003B, 0x000005A8, 0x067B, 0x06B6, 59, "boot-event-core/shared-slot-0x10/same-length-template"),
+    RepeatedFamilyRowExpectation("VAR_BOOT", 0x0A, 0x029A, 0x00000001, 0x00D4, 0x036E, 666, "boot-event-core/shared-slot-0x0A/shared-slot-template"),
+    RepeatedFamilyRowExpectation("VAR_BOOT", 0x0F, 0x0244, 0x0000029B, 0x036E, 0x05B2, 580, "boot-event-core/shared-slot-0x0F/shared-slot-template"),
+    RepeatedFamilyRowExpectation("VAR_BOOT", 0x10, 0x003B, 0x000004DF, 0x05B2, 0x05ED, 59, "boot-event-core/shared-slot-0x10/same-length-template"),
+    RepeatedFamilyRowExpectation("SURCAMNS", 0x01, 0x0051, 0x000000D2, 0x01B7, 0x0208, 81, "callback-eventtrigger/shared-slot-0x01/shared-slot-template"),
+    RepeatedFamilyRowExpectation("SURCAMNS", 0x0A, 0x00D1, 0x00000001, 0x00E6, 0x01B7, 209, "callback-eventtrigger/shared-slot-0x0A/same-length-template"),
+    RepeatedFamilyRowExpectation("SURCAMNS", 0x20, 0x02BA, 0x00000123, 0x0208, 0x04C2, 698, "callback-eventtrigger/shared-slot-0x20/same-length-template"),
+    RepeatedFamilyRowExpectation("SURCAMNS", 0x21, 0x0709, 0x000003DD, 0x04C2, 0x0BCB, 1801, "callback-eventtrigger/shared-slot-0x21/shared-slot-template"),
+    RepeatedFamilyRowExpectation("SURCAMNS", 0x22, 0x01A3, 0x00000AE6, 0x0BCB, 0x0D6E, 419, "callback-eventtrigger/shared-slot-0x22/same-length-template"),
+    RepeatedFamilyRowExpectation("SURCAMEW", 0x01, 0x00F7, 0x000000D2, 0x01B7, 0x02AE, 247, "callback-eventtrigger/shared-slot-0x01/shared-slot-template"),
+    RepeatedFamilyRowExpectation("SURCAMEW", 0x0A, 0x00D1, 0x00000001, 0x00E6, 0x01B7, 209, "callback-eventtrigger/shared-slot-0x0A/same-length-template"),
+    RepeatedFamilyRowExpectation("SURCAMEW", 0x20, 0x02BA, 0x000001C9, 0x02AE, 0x0568, 698, "callback-eventtrigger/shared-slot-0x20/same-length-template"),
+    RepeatedFamilyRowExpectation("SURCAMEW", 0x21, 0x0655, 0x00000483, 0x0568, 0x0BBD, 1621, "callback-eventtrigger/shared-slot-0x21/shared-slot-template"),
+    RepeatedFamilyRowExpectation("SURCAMEW", 0x22, 0x01A3, 0x00000AD8, 0x0BBD, 0x0D60, 419, "callback-eventtrigger/shared-slot-0x22/same-length-template"),
+    RepeatedFamilyRowExpectation("FLAMEBOX", 0x0A, 0x026A, 0x00000001, 0x00E0, 0x034A, 618, "environmental-event/shared-slot-0x0A/shared-slot-template"),
+    RepeatedFamilyRowExpectation("FLAMEBOX", 0x20, 0x01AC, 0x0000026B, 0x034A, 0x04F6, 428, "environmental-event/shared-slot-0x20/shared-slot-template"),
+    RepeatedFamilyRowExpectation("FLAMEBOX", 0x21, 0x029A, 0x00000417, 0x04F6, 0x0790, 666, "environmental-event/shared-slot-0x21/shared-slot-template"),
+    RepeatedFamilyRowExpectation("NOSTRIL", 0x0A, 0x00C0, 0x00000001, 0x00E0, 0x01A0, 192, "environmental-event/shared-slot-0x0A/shared-slot-template"),
+    RepeatedFamilyRowExpectation("NOSTRIL", 0x20, 0x0129, 0x000000C1, 0x01A0, 0x02C9, 297, "environmental-event/shared-slot-0x20/shared-slot-template"),
+    RepeatedFamilyRowExpectation("NOSTRIL", 0x21, 0x01BE, 0x000001EA, 0x02C9, 0x0487, 446, "environmental-event/shared-slot-0x21/shared-slot-template"),
+    RepeatedFamilyRowExpectation("STEAMBOX", 0x0A, 0x0266, 0x00000001, 0x00E0, 0x0346, 614, "environmental-event/shared-slot-0x0A/shared-slot-template"),
+    RepeatedFamilyRowExpectation("STEAMBOX", 0x20, 0x01F6, 0x00000267, 0x0346, 0x053C, 502, "environmental-event/shared-slot-0x20/shared-slot-template"),
+    RepeatedFamilyRowExpectation("STEAMBOX", 0x21, 0x02A7, 0x0000045D, 0x053C, 0x07E3, 679, "environmental-event/shared-slot-0x21/shared-slot-template"),
+)
+
+
 def scummvm_event_name_hint(slot: int) -> str | None:
    if 0 <= slot < len(SCUMMVM_EVENT_NAME_HINTS):
        return SCUMMVM_EVENT_NAME_HINTS[slot]
@ -532,6 +634,368 @@ def annotate_class_layout(chunks: list[ExtractedChunk]) -> None:
        chunk.class_parse_status = "parsed-class-layout"


+def derive_class_event_rows(chunk: ExtractedChunk, raw_data: bytes) -> list[ClassEventRow]:
+    if chunk.class_parse_status != "parsed-class-layout":
+        return []
+    if chunk.object_index is None or chunk.class_id is None or chunk.conservative_event_count is None:
+        return []
+
+    provisional_rows: list[tuple[int, int, int]] = []
+    for slot in range(chunk.conservative_event_count):
+        entry_offset = 20 + 6 * slot
+        raw_word = read_u16_le(raw_data, entry_offset)
+        raw_code_offset = read_u32_le(raw_data, entry_offset + 2)
+        provisional_rows.append((slot, raw_word, raw_code_offset))
+
+    non_zero_offsets = sorted(
+        {
+            raw_code_offset
+            for _, _, raw_code_offset in provisional_rows
+            if raw_code_offset != 0
+        }
+    )
+    rows: list[ClassEventRow] = []
+
+    for slot, raw_word, raw_code_offset in provisional_rows:
+        derived_body_start: int | None = None
+        derived_body_end: int | None = None
+        derived_body_length: int | None = None
+
+        if raw_code_offset != 0 and chunk.code_base_minus_one is not None:
+            body_start = chunk.code_base_minus_one + raw_code_offset
+            next_offsets = [offset for offset in non_zero_offsets if offset > raw_code_offset]
+            body_end = chunk.code_base_minus_one + next_offsets[0] if next_offsets else len(raw_data)
+            if 0 <= body_start <= body_end <= len(raw_data):
+                derived_body_start = body_start
+                derived_body_end = body_end
+                derived_body_length = body_end - body_start
+
+        rows.append(
+            ClassEventRow(
+                entry_index=chunk.index,
+                object_index=chunk.object_index,
+                class_id=chunk.class_id,
+                class_name_hint=chunk.class_name_hint or "",
+                slot=slot,
+                event_name_hint=scummvm_event_name_hint(slot),
+                raw_event_entry_word=raw_word,
+                raw_code_offset=raw_code_offset,
+                derived_body_start=derived_body_start,
+                derived_body_end=derived_body_end,
+                derived_body_length=derived_body_length,
+            )
+        )
+
+    return rows
+
+
+def build_class_event_rows(
+    parsed_class_chunks: list[ExtractedChunk],
+) -> tuple[list[ClassEventRow], dict[int, list[ClassEventRow]], dict[int, bytes]]:
+    all_rows: list[ClassEventRow] = []
+    rows_by_entry: dict[int, list[ClassEventRow]] = {}
+    raw_data_by_entry: dict[int, bytes] = {}
+
+    for chunk in parsed_class_chunks:
+        raw_data = pathlib.Path(chunk.raw_path).read_bytes()
+        raw_data_by_entry[chunk.index] = raw_data
+        rows = derive_class_event_rows(chunk, raw_data)
+        rows_by_entry[chunk.index] = rows
+        all_rows.extend(rows)
+
+    return all_rows, rows_by_entry, raw_data_by_entry
+
+
+def build_repeated_template_status_map(
+    parsed_class_chunks: list[ExtractedChunk],
+    rows_by_entry: dict[int, list[ClassEventRow]],
+    raw_data_by_entry: dict[int, bytes],
+) -> dict[tuple[int, int], str]:
+    status_by_row: dict[tuple[int, int], str] = {}
+    chunk_by_label = {
+        chunk.primary_label: chunk
+        for chunk in parsed_class_chunks
+        if chunk.primary_label
+    }
+
+    for family_name, labels in VERIFIED_REPEATED_TEMPLATE_FAMILIES:
+        family_chunks = [chunk_by_label[label] for label in labels if label in chunk_by_label]
+        if len(family_chunks) < 2:
+            continue
+
+        rows_by_slot: dict[int, list[tuple[ExtractedChunk, ClassEventRow, bytes]]] = {}
+        for chunk in family_chunks:
+            raw_data = raw_data_by_entry.get(chunk.index)
+            if raw_data is None:
+                continue
+            for row in rows_by_entry.get(chunk.index, []):
+                if row.raw_code_offset == 0:
+                    continue
+                if row.derived_body_start is None or row.derived_body_end is None:
+                    continue
+                body = raw_data[row.derived_body_start:row.derived_body_end]
+                rows_by_slot.setdefault(row.slot, []).append((chunk, row, body))
+
+        for slot, slot_rows in rows_by_slot.items():
+            if len(slot_rows) < 2:
+                continue
+
+            lengths = {len(body) for _, _, body in slot_rows}
+            bodies = {body for _, _, body in slot_rows}
+            if len(bodies) == 1:
+                status_suffix = "exact-body-clone"
+            elif len(lengths) == 1:
+                status_suffix = "same-length-template"
+            else:
+                status_suffix = "shared-slot-template"
+
+            status = f"{family_name}/shared-slot-0x{slot:02X}/{status_suffix}"
+            for chunk, row, _ in slot_rows:
+                status_by_row[(chunk.index, row.slot)] = status
+
+    return status_by_row
+
+
+def format_optional_hex(value: int | None, width: int = 0) -> str:
+    if value is None:
+        return ""
+    if width > 0:
+        return f"0x{value:0{width}X}"
+    return f"0x{value:X}"
+
+
+def hex_edge(data: bytes, width: int = 8) -> str:
+    if not data:
+        return ""
+    return data[:width].hex()
+
+
+def hex_tail(data: bytes, width: int = 8) -> str:
+    if not data:
+        return ""
+    return data[-width:].hex()
+
+
+def write_family_decompile_artifact(
+    out_dir: pathlib.Path,
+    parsed_class_chunks: list[ExtractedChunk],
+    rows_by_entry: dict[int, list[ClassEventRow]],
+    raw_data_by_entry: dict[int, bytes],
+    repeated_status_by_row: dict[tuple[int, int], str],
+    spec: FamilyArtifactSpec,
+) -> None:
+    family_labels = set(spec.labels)
+    family_chunks = [chunk for chunk in parsed_class_chunks if chunk.primary_label in family_labels]
+    if not family_chunks:
+        return
+
+    family_chunks.sort(key=lambda chunk: chunk.primary_label or "")
+
+    tsv_lines = [
+        "entry_index\tclass_id\tclass_name\tslot\tevent_name_hint\traw_event_entry_word\traw_code_offset\tderived_body_start\tderived_body_end\tderived_body_length\trepeated_template_status\tbody_sha1\tbody_prefix_hex\tbody_suffix_hex"
+    ]
+    md_lines = [
+        f"# {spec.title}",
+        "",
+        "This is a reversible per-class rendering derived directly from `class_event_index.tsv` plus the raw extracted chunk bytes.",
+        "ScummVM event labels remain hints only; the authoritative data here is the slot id, raw row bytes, and derived body window.",
+        "",
+    ]
+
+    for chunk in family_chunks:
+        rows = [row for row in rows_by_entry.get(chunk.index, []) if row.raw_code_offset != 0]
+        if not rows:
+            continue
+        raw_data = raw_data_by_entry[chunk.index]
+
+        md_lines.extend([
+            f"## {chunk.primary_label}",
+            "",
+            "```yaml",
+            "class:",
+            f"  entry_index: 0x{chunk.index:03X}",
+            f"  class_id: 0x{chunk.class_id:X}",
+            f"  class_name: {chunk.primary_label}",
+            f"  class_object_index: 0x{chunk.object_index:X}",
+            f"  raw_code_base_u32: 0x{chunk.raw_code_base_u32:X}",
+            f"  code_base_minus_one: 0x{chunk.code_base_minus_one:X}",
+            f"  conservative_event_count: {chunk.conservative_event_count}",
+            "  events:",
+        ])
+
+        for row in rows:
+            body = b""
+            if row.derived_body_start is not None and row.derived_body_end is not None:
+                body = raw_data[row.derived_body_start:row.derived_body_end]
+            repeated_status = repeated_status_by_row.get((row.entry_index, row.slot), "")
+            body_sha1 = hashlib.sha1(body).hexdigest() if body else ""
+
+            md_lines.extend([
+                f"    - slot: 0x{row.slot:02x}",
+                f"      event_name_hint: {row.event_name_hint or ''}",
+                f"      raw_event_entry_word: 0x{row.raw_event_entry_word:04x}",
+                f"      raw_code_offset: 0x{row.raw_code_offset:08x}",
+                f"      derived_body_start: {format_optional_hex(row.derived_body_start, 4).lower() or 'null'}",
+                f"      derived_body_end: {format_optional_hex(row.derived_body_end, 4).lower() or 'null'}",
+                f"      derived_body_length: {row.derived_body_length if row.derived_body_length is not None else 'null'}",
+                f"      repeated_template_status: {repeated_status or 'unique-or-unclassified'}",
+                f"      body_sha1: {body_sha1 or 'null'}",
+                f"      body_prefix_hex: {hex_edge(body) or 'null'}",
+                f"      body_suffix_hex: {hex_tail(body) or 'null'}",
+            ])
+
+            tsv_lines.append(
+                "{entry_index}\t0x{class_id:X}\t{class_name}\t0x{slot:02X}\t{event_name_hint}\t0x{raw_event_entry_word:04X}\t0x{raw_code_offset:08X}\t{derived_body_start}\t{derived_body_end}\t{derived_body_length}\t{repeated_template_status}\t{body_sha1}\t{body_prefix_hex}\t{body_suffix_hex}".format(
+                    entry_index=row.entry_index,
+                    class_id=row.class_id,
+                    class_name=chunk.primary_label or "",
+                    slot=row.slot,
+                    event_name_hint=row.event_name_hint or "",
+                    raw_event_entry_word=row.raw_event_entry_word,
+                    raw_code_offset=row.raw_code_offset,
+                    derived_body_start=format_optional_hex(row.derived_body_start, 4),
+                    derived_body_end=format_optional_hex(row.derived_body_end, 4),
+                    derived_body_length=(row.derived_body_length if row.derived_body_length is not None else ""),
+                    repeated_template_status=repeated_status,
+                    body_sha1=body_sha1,
+                    body_prefix_hex=hex_edge(body),
+                    body_suffix_hex=hex_tail(body),
+                )
+            )
+
+        md_lines.extend([
+            "```",
+            "",
+        ])
+
+    (out_dir / f"{spec.output_stem}.md").write_text("\n".join(md_lines), encoding="utf-8")
+    (out_dir / f"{spec.output_stem}.tsv").write_text("\n".join(tsv_lines) + "\n", encoding="utf-8")
+
+
+def validate_verified_repeated_family_regressions(
+    parsed_class_chunks: list[ExtractedChunk],
+    rows_by_entry: dict[int, list[ClassEventRow]],
+    repeated_status_by_row: dict[tuple[int, int], str],
+) -> list[str]:
+    chunk_by_label = {
+        chunk.primary_label: chunk
+        for chunk in parsed_class_chunks
+        if chunk.primary_label
+    }
+    expected_slots_by_class: dict[str, set[int]] = {}
+    for expectation in VERIFIED_REPEATED_FAMILY_ROW_EXPECTATIONS:
+        expected_slots_by_class.setdefault(expectation.class_name, set()).add(expectation.slot)
+
+    report_lines = [
+        "record_type\tclass_name\tslot\texpected\tactual\tstatus"
+    ]
+    errors: list[str] = []
+
+    for class_name, expected_slots in sorted(expected_slots_by_class.items()):
+        chunk = chunk_by_label.get(class_name)
+        actual_slots: set[int] = set()
+        if chunk is not None:
+            actual_slots = {
+                row.slot
+                for row in rows_by_entry.get(chunk.index, [])
+                if row.raw_code_offset != 0
+            }
+        status = "ok" if actual_slots == expected_slots else "mismatch"
+        report_lines.append(
+            "slot-set\t{class_name}\t*\t{expected}\t{actual}\t{status}".format(
+                class_name=class_name,
+                expected=",".join(f"0x{slot:02X}" for slot in sorted(expected_slots)),
+                actual=",".join(f"0x{slot:02X}" for slot in sorted(actual_slots)),
+                status=status,
+            )
+        )
+        if status != "ok":
+            errors.append(
+                f"{class_name}: expected non-zero slots {sorted(expected_slots)}, found {sorted(actual_slots)}"
+            )
+
+    for expectation in VERIFIED_REPEATED_FAMILY_ROW_EXPECTATIONS:
+        chunk = chunk_by_label.get(expectation.class_name)
+        if chunk is None:
+            errors.append(f"missing repeated-family class {expectation.class_name}")
+            report_lines.append(
+                f"row\t{expectation.class_name}\t0x{expectation.slot:02X}\tpresent\tmissing-class\tmismatch"
+            )
+            continue
+
+        row = next(
+            (candidate for candidate in rows_by_entry.get(chunk.index, []) if candidate.slot == expectation.slot),
+            None,
+        )
+        if row is None:
+            errors.append(f"missing row {expectation.class_name} slot 0x{expectation.slot:02X}")
+            report_lines.append(
+                f"row\t{expectation.class_name}\t0x{expectation.slot:02X}\tpresent\tmissing-row\tmismatch"
+            )
+            continue
+
+        actual_values = (
+            row.raw_event_entry_word,
+            row.raw_code_offset,
+            row.derived_body_start,
+            row.derived_body_end,
+            row.derived_body_length,
+            repeated_status_by_row.get((row.entry_index, row.slot), ""),
+        )
+        expected_values = (
+            expectation.raw_event_entry_word,
+            expectation.raw_code_offset,
+            expectation.derived_body_start,
+            expectation.derived_body_end,
+            expectation.derived_body_length,
+            expectation.repeated_template_status,
+        )
+        status = "ok" if actual_values == expected_values else "mismatch"
+        report_lines.append(
+            "row\t{class_name}\t0x{slot:02X}\t{expected}\t{actual}\t{status}".format(
+                class_name=expectation.class_name,
+                slot=expectation.slot,
+                expected="|".join(
+                    [
+                        f"0x{expectation.raw_event_entry_word:04X}",
+                        f"0x{expectation.raw_code_offset:08X}",
+                        f"0x{expectation.derived_body_start:04X}",
+                        f"0x{expectation.derived_body_end:04X}",
+                        str(expectation.derived_body_length),
+                        expectation.repeated_template_status,
+                    ]
+                ),
+                actual="|".join(
+                    [
+                        f"0x{row.raw_event_entry_word:04X}",
+                        f"0x{row.raw_code_offset:08X}",
+                        format_optional_hex(row.derived_body_start, 4),
+                        format_optional_hex(row.derived_body_end, 4),
+                        str(row.derived_body_length if row.derived_body_length is not None else ""),
+                        repeated_status_by_row.get((row.entry_index, row.slot), ""),
+                    ]
+                ),
+                status=status,
+            )
+        )
+        if status != "ok":
+            errors.append(
+                "{class_name} slot 0x{slot:02X}: expected {expected}, found {actual}".format(
+                    class_name=expectation.class_name,
+                    slot=expectation.slot,
+                    expected=expected_values,
+                    actual=actual_values,
+                )
+            )
+
+    if errors:
+        raise ValueError(
+            "repeated-family regression mismatch:\n- " + "\n- ".join(errors)
+        )
+
+    return report_lines
+
+
 def readable_neighbor_chunks(
    center: ExtractedChunk,
    chunk_by_index: dict[int, ExtractedChunk],
@ -1556,6 +2020,17 @@ def write_summary(out_dir: pathlib.Path, input_path: pathlib.Path, data: bytes,
        "entry_index\tobject_index\tclass_id\tclass_name_hint\traw_code_base_u32\tcode_base_minus_one\tconservative_event_count\tevent_table_end\tclass_parse_status\tdata_offset\tdeclared_size\tprimary_label"
    ]
    parsed_class_chunks = [chunk for chunk in chunks if chunk.class_parse_status == "parsed-class-layout"]
+    class_event_rows, rows_by_entry, raw_data_by_entry = build_class_event_rows(parsed_class_chunks)
+    repeated_status_by_row = build_repeated_template_status_map(
+        parsed_class_chunks,
+        rows_by_entry,
+        raw_data_by_entry,
+    )
+    repeated_family_regression_lines = validate_verified_repeated_family_regressions(
+        parsed_class_chunks,
+        rows_by_entry,
+        repeated_status_by_row,
+    )
    for chunk in parsed_class_chunks:
        class_layout_lines.append(
            "{index}\t0x{object_index:X}\t0x{class_id:X}\t{class_name_hint}\t0x{raw_code_base_u32:X}\t0x{code_base_minus_one:X}\t{conservative_event_count}\t0x{event_table_end:X}\t{class_parse_status}\t0x{data_offset:X}\t0x{declared_size:X}\t{primary_label}".format(
@ -1576,28 +2051,39 @@ def write_summary(out_dir: pathlib.Path, input_path: pathlib.Path, data: bytes,
    (out_dir / "class_layout_index.tsv").write_text("\n".join(class_layout_lines) + "\n", encoding="utf-8")

    class_event_lines = [
-        "entry_index\tobject_index\tclass_id\tclass_name_hint\tslot\tevent_name_hint\traw_event_entry_word\traw_code_offset"
+        "entry_index\tobject_index\tclass_id\tclass_name_hint\tslot\tevent_name_hint\traw_event_entry_word\traw_code_offset\tderived_body_start\tderived_body_end\tderived_body_length\trepeated_template_status"
    ]
-    for chunk in parsed_class_chunks:
-        raw_data = pathlib.Path(chunk.raw_path).read_bytes()
-        assert chunk.conservative_event_count is not None
-        for slot in range(chunk.conservative_event_count):
-            entry_offset = 20 + 6 * slot
-            raw_word = read_u16_le(raw_data, entry_offset)
-            raw_code_offset = read_u32_le(raw_data, entry_offset + 2)
-            class_event_lines.append(
-                "{entry_index}\t0x{object_index:X}\t0x{class_id:X}\t{class_name_hint}\t0x{slot:02X}\t{event_name_hint}\t0x{raw_word:04X}\t0x{raw_code_offset:08X}".format(
-                    entry_index=chunk.index,
-                    object_index=chunk.object_index,
-                    class_id=chunk.class_id,
-                    class_name_hint=chunk.class_name_hint or "",
-                    slot=slot,
-                    event_name_hint=scummvm_event_name_hint(slot) or "",
-                    raw_word=raw_word,
-                    raw_code_offset=raw_code_offset,
-                )
+    for row in class_event_rows:
+        class_event_lines.append(
+            "{entry_index}\t0x{object_index:X}\t0x{class_id:X}\t{class_name_hint}\t0x{slot:02X}\t{event_name_hint}\t0x{raw_event_entry_word:04X}\t0x{raw_code_offset:08X}\t{derived_body_start}\t{derived_body_end}\t{derived_body_length}\t{repeated_template_status}".format(
+                entry_index=row.entry_index,
+                object_index=row.object_index,
+                class_id=row.class_id,
+                class_name_hint=row.class_name_hint,
+                slot=row.slot,
+                event_name_hint=row.event_name_hint or "",
+                raw_event_entry_word=row.raw_event_entry_word,
+                raw_code_offset=row.raw_code_offset,
+                derived_body_start=format_optional_hex(row.derived_body_start, 4),
+                derived_body_end=format_optional_hex(row.derived_body_end, 4),
+                derived_body_length=(row.derived_body_length if row.derived_body_length is not None else ""),
+                repeated_template_status=repeated_status_by_row.get((row.entry_index, row.slot), ""),
            )
+        )
    (out_dir / "class_event_index.tsv").write_text("\n".join(class_event_lines) + "\n", encoding="utf-8")
+    for family_artifact_spec in FAMILY_ARTIFACT_SPECS:
+        write_family_decompile_artifact(
+            out_dir,
+            parsed_class_chunks,
+            rows_by_entry,
+            raw_data_by_entry,
+            repeated_status_by_row,
+            family_artifact_spec,
+        )
+    (out_dir / "repeated_family_regressions.tsv").write_text(
+        "\n".join(repeated_family_regression_lines) + "\n",
+        encoding="utf-8",
+    )

    neighborhood_lines = [
        "center_index\tneighbor_index\tprimary_label\tfield_names\tfield_tags"
@ -1763,7 +2249,9 @@ def write_summary(out_dir: pathlib.Path, input_path: pathlib.Path, data: bytes,
    lines.append("- `.strings.txt` files are the main human-readable output for now; `.txt` files are emitted only for chunks that look text-like.")
    lines.append("- `descriptor_index.tsv` summarizes guessed class labels, field names, and compact tag patterns for descriptor-like chunks.")
    lines.append("- `class_layout_index.tsv` records the conservative owner-loaded class parsing state: object index, class id, class-name hint, raw bytes-8..11 field, derived code-base-minus-one, and event-count/table-end values when the local divisibility and bounds checks succeed.")
-    lines.append("- `class_event_index.tsv` expands parsed owner-loaded classes into raw 6-byte event rows with slot numbers, ScummVM event-name hints for `0x00..0x1f`, unresolved leading words, and raw code-offset dwords for round-trip tooling work.")
+    lines.append("- `class_event_index.tsv` now also emits derived body-window columns (`derived_body_start`, `derived_body_end`, `derived_body_length`) plus conservative `repeated_template_status` tags for verified repeated families.")
+    lines.append("- `boot_family_decompile.md` / `.tsv`, `callback_family_decompile.md` / `.tsv`, and `environmental_family_decompile.md` / `.tsv` now provide reversible per-class decompile artifacts for the `_BOOT`, `SURCAM*`, and environmental repeated-family lanes.")
+    lines.append("- `repeated_family_regressions.tsv` enforces the current repeated-family slot sets plus the verified raw-row and derived body-window fields for `JELYHACK/JELYH2`, `_BOOT`, `SURCAM*`, and `FLAMEBOX/NOSTRIL/STEAMBOX`.")
    lines.append("- `descriptor_neighborhoods.tsv` captures local table neighborhoods around trigger/event-related classes such as `JELYHACK`, `NPCTRIG`, `CRUZTRIG`, `TRIGPAD`, and `SPECIAL`.")
    lines.append("- `referent_anchor_event_graph.tsv` groups referent-bearing descriptors with nearby event-bearing neighbors so the attachment model can be inspected without ad hoc grepping.")
    lines.append("- `jelyhack_island_graph.md` now uses a wider local window so the `JELYHACK` / `JELYH2` anchors can be inspected alongside the nearby event-bearing `REE_BOOT`, `SURCAMEW`, and `SFXTRIG` descriptors rather than stopping at the referent-only neighbors.")
--- a/tools/usecode_family_compare.py
+++ b/tools/usecode_family_compare.py
@ -0,0 +1,165 @@
+import csv
+import glob
+import hashlib
+import json
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+LAYOUT_PATH = ROOT / "USECODE" / "EUSECODE_extracted" / "class_layout_index.tsv"
+EVENT_PATH = ROOT / "USECODE" / "EUSECODE_extracted" / "class_event_index.tsv"
+CHUNKS_DIR = ROOT / "USECODE" / "EUSECODE_extracted" / "chunks"
+
+FAMILIES = {
+    "BOOT": {
+        "classes": ["AND_BOOT", "BRO_BOOT", "COR_BOOT", "REE_BOOT", "VAR_BOOT"],
+        "slots": [0x0A, 0x0F, 0x10],
+    },
+    "SURCAM": {
+        "classes": ["SURCAMNS", "SURCAMEW"],
+        "slots": [0x20, 0x21, 0x22],
+    },
+    "JELY": {
+        "classes": ["JELYHACK", "JELYH2"],
+        "slots": [0x01],
+    },
+}
+
+
+def parse_hex(value: str) -> int:
+    return int(value, 16)
+
+
+def common_prefix_length(blobs: list[bytes]) -> int:
+    if not blobs:
+        return 0
+    limit = min(len(blob) for blob in blobs)
+    for index in range(limit):
+        current = blobs[0][index]
+        if any(blob[index] != current for blob in blobs[1:]):
+            return index
+    return limit
+
+
+def common_suffix_length(blobs: list[bytes]) -> int:
+    return common_prefix_length([blob[::-1] for blob in blobs])
+
+
+def first_diff_positions(blobs: list[bytes], limit: int = 8) -> list[int]:
+    positions: list[int] = []
+    max_len = max(len(blob) for blob in blobs)
+    for index in range(max_len):
+        values = {blob[index] if index < len(blob) else None for blob in blobs}
+        if len(values) > 1:
+            positions.append(index)
+        if len(positions) >= limit:
+            break
+    return positions
+
+
+def load_layouts(targets: set[str]) -> dict[str, dict[str, str]]:
+    layouts: dict[str, dict[str, str]] = {}
+    with LAYOUT_PATH.open("r", encoding="utf-8", newline="") as handle:
+        reader = csv.DictReader(handle, delimiter="\t")
+        for row in reader:
+            if row["class_name_hint"] in targets:
+                layouts[row["class_name_hint"]] = row
+    return layouts
+
+
+def load_events(targets: set[str]) -> dict[str, list[dict[str, str]]]:
+    events: dict[str, list[dict[str, str]]] = {}
+    with EVENT_PATH.open("r", encoding="utf-8", newline="") as handle:
+        reader = csv.DictReader(handle, delimiter="\t")
+        for row in reader:
+            if row["class_name_hint"] in targets:
+                events.setdefault(row["class_name_hint"], []).append(row)
+    for rows in events.values():
+        rows.sort(key=lambda row: parse_hex(row["slot"]))
+    return events
+
+
+def resolve_chunk(data_offset: int) -> Path:
+    matches = glob.glob(str(CHUNKS_DIR / f"chunk_*_off_{data_offset:06X}_len_*.bin"))
+    if len(matches) != 1:
+        raise RuntimeError(f"chunk lookup failed for 0x{data_offset:06X}: {matches}")
+    return Path(matches[0])
+
+
+def build_rows() -> dict[tuple[str, int], dict[str, object]]:
+    targets = {name for family in FAMILIES.values() for name in family["classes"]}
+    layouts = load_layouts(targets)
+    events = load_events(targets)
+    rows_by_key: dict[tuple[str, int], dict[str, object]] = {}
+
+    for class_name, layout in layouts.items():
+        chunk_path = resolve_chunk(parse_hex(layout["data_offset"]))
+        blob = chunk_path.read_bytes()
+        code_base_minus_one = parse_hex(layout["code_base_minus_one"])
+        nonzero_rows = [row for row in events[class_name] if parse_hex(row["raw_code_offset"]) != 0]
+        offsets = sorted({parse_hex(row["raw_code_offset"]) for row in nonzero_rows})
+
+        for row in nonzero_rows:
+            slot = parse_hex(row["slot"])
+            code_offset = parse_hex(row["raw_code_offset"])
+            start = code_base_minus_one + code_offset
+            next_offsets = [offset for offset in offsets if offset > code_offset]
+            end = code_base_minus_one + next_offsets[0] if next_offsets else len(blob)
+            body = blob[start:end]
+            rows_by_key[(class_name, slot)] = {
+                "class_name": class_name,
+                "slot": slot,
+                "event_name_hint": row["event_name_hint"],
+                "raw_event_entry_word": row["raw_event_entry_word"],
+                "raw_code_offset": row["raw_code_offset"],
+                "start": start,
+                "end": end,
+                "length": len(body),
+                "sha1": hashlib.sha1(body).hexdigest(),
+                "preview": body[:16].hex(" "),
+                "chunk_path": str(chunk_path).replace("\\", "/"),
+                "body": body,
+            }
+
+    return rows_by_key
+
+
+def main() -> None:
+    rows_by_key = build_rows()
+    for family_name, family in FAMILIES.items():
+        print(f"## {family_name}")
+        for slot in family["slots"]:
+            print(f"SLOT 0x{slot:02X}")
+            subset = [rows_by_key[(class_name, slot)] for class_name in family["classes"]]
+            for row in subset:
+                print(
+                    "\t".join(
+                        [
+                            str(row["class_name"]),
+                            str(row["event_name_hint"]),
+                            str(row["raw_event_entry_word"]),
+                            str(row["raw_code_offset"]),
+                            f"{row['start']:04X}-{row['end']:04X}",
+                            str(row["length"]),
+                            str(row["sha1"])[:12],
+                            str(row["preview"]),
+                            str(row["chunk_path"]),
+                        ]
+                    )
+                )
+
+            groups: dict[str, list[str]] = {}
+            for row in subset:
+                groups.setdefault(str(row["sha1"]), []).append(str(row["class_name"]))
+            blobs = [row["body"] for row in subset]
+            print("identical_groups=" + json.dumps(list(groups.values())))
+            print(
+                f"common_prefix_len={common_prefix_length(blobs)} "
+                f"common_suffix_len={common_suffix_length(blobs)}"
+            )
+            print("first_diff_positions=" + json.dumps(first_diff_positions(blobs)))
+            print()
+
+
+if __name__ == "__main__":
+    main()