Add detailed class event processing and family comparison tools

- Enhance `extract_eusecode_flx.py` to derive class event rows with additional metadata including derived body windows and repeated template statuses.
- Introduce `usecode_family_compare.py` for comparing event families, analyzing commonalities in event bodies, and generating reports on identical groups and differences.
- Implement new data structures for managing class event rows and family artifact specifications.
- Update output formats to include derived body information and repeated family regression checks.
- Ensure robust validation of repeated family expectations against actual extracted data.
This commit is contained in:
MaddoScientisto 2026-03-22 23:24:46 +01:00
commit 4d3c8cd81b
23 changed files with 15033 additions and 14221 deletions

View file

@ -15,6 +15,7 @@ to support the next decoding pass.
from __future__ import annotations
import argparse
import hashlib
import json
import pathlib
import struct
@ -61,6 +62,21 @@ class ExtractedChunk:
class_parse_status: str | None = None
@dataclass(frozen=True)
class ClassEventRow:
entry_index: int
object_index: int
class_id: int
class_name_hint: str
slot: int
event_name_hint: str | None
raw_event_entry_word: int
raw_code_offset: int
derived_body_start: int | None
derived_body_end: int | None
derived_body_length: int | None
@dataclass(frozen=True)
class FlxTable:
entry_count: int
@ -69,6 +85,25 @@ class FlxTable:
entries: list[CandidateEntry]
@dataclass(frozen=True)
class FamilyArtifactSpec:
output_stem: str
title: str
labels: tuple[str, ...]
@dataclass(frozen=True)
class RepeatedFamilyRowExpectation:
class_name: str
slot: int
raw_event_entry_word: int
raw_code_offset: int
derived_body_start: int
derived_body_end: int
derived_body_length: int
repeated_template_status: str
def read_u32_le(data: bytes, offset: int) -> int:
return struct.unpack_from("<I", data, offset)[0]
@ -454,6 +489,73 @@ SCUMMVM_EVENT_NAME_HINTS: tuple[str, ...] = (
)
VERIFIED_REPEATED_TEMPLATE_FAMILIES: tuple[tuple[str, tuple[str, ...]], ...] = (
("referent-anchor-twin", ("JELYHACK", "JELYH2")),
("boot-event-core", ("AND_BOOT", "BRO_BOOT", "COR_BOOT", "REE_BOOT", "VAR_BOOT")),
("callback-eventtrigger", ("SURCAMNS", "SURCAMEW")),
("environmental-event", ("FLAMEBOX", "NOSTRIL", "STEAMBOX")),
)
FAMILY_ARTIFACT_SPECS: tuple[FamilyArtifactSpec, ...] = (
FamilyArtifactSpec(
output_stem="boot_family_decompile",
title="_BOOT Family Decompiled Event Sketches",
labels=("AND_BOOT", "BRO_BOOT", "COR_BOOT", "REE_BOOT", "VAR_BOOT"),
),
FamilyArtifactSpec(
output_stem="callback_family_decompile",
title="SURCAM Callback Family Decompiled Event Sketches",
labels=("SURCAMNS", "SURCAMEW"),
),
FamilyArtifactSpec(
output_stem="environmental_family_decompile",
title="Environmental Family Decompiled Event Sketches",
labels=("FLAMEBOX", "NOSTRIL", "STEAMBOX"),
),
)
VERIFIED_REPEATED_FAMILY_ROW_EXPECTATIONS: tuple[RepeatedFamilyRowExpectation, ...] = (
RepeatedFamilyRowExpectation("JELYHACK", 0x01, 0x002A, 0x00000001, 0x00D4, 0x00FE, 42, "referent-anchor-twin/shared-slot-0x01/same-length-template"),
RepeatedFamilyRowExpectation("JELYH2", 0x01, 0x002A, 0x00000001, 0x00D4, 0x00FE, 42, "referent-anchor-twin/shared-slot-0x01/same-length-template"),
RepeatedFamilyRowExpectation("AND_BOOT", 0x0A, 0x0253, 0x00000001, 0x00D4, 0x0327, 595, "boot-event-core/shared-slot-0x0A/shared-slot-template"),
RepeatedFamilyRowExpectation("AND_BOOT", 0x0F, 0x0237, 0x00000254, 0x0327, 0x055E, 567, "boot-event-core/shared-slot-0x0F/shared-slot-template"),
RepeatedFamilyRowExpectation("AND_BOOT", 0x10, 0x003B, 0x0000048B, 0x055E, 0x0599, 59, "boot-event-core/shared-slot-0x10/same-length-template"),
RepeatedFamilyRowExpectation("BRO_BOOT", 0x0A, 0x02D5, 0x00000001, 0x00D4, 0x03A9, 725, "boot-event-core/shared-slot-0x0A/shared-slot-template"),
RepeatedFamilyRowExpectation("BRO_BOOT", 0x0F, 0x024C, 0x000002D6, 0x03A9, 0x05F5, 588, "boot-event-core/shared-slot-0x0F/shared-slot-template"),
RepeatedFamilyRowExpectation("BRO_BOOT", 0x10, 0x003B, 0x00000522, 0x05F5, 0x0630, 59, "boot-event-core/shared-slot-0x10/same-length-template"),
RepeatedFamilyRowExpectation("COR_BOOT", 0x0A, 0x0227, 0x00000001, 0x00D4, 0x02FB, 551, "boot-event-core/shared-slot-0x0A/shared-slot-template"),
RepeatedFamilyRowExpectation("COR_BOOT", 0x0F, 0x0234, 0x00000228, 0x02FB, 0x052F, 564, "boot-event-core/shared-slot-0x0F/shared-slot-template"),
RepeatedFamilyRowExpectation("COR_BOOT", 0x10, 0x003B, 0x0000045C, 0x052F, 0x056A, 59, "boot-event-core/shared-slot-0x10/same-length-template"),
RepeatedFamilyRowExpectation("REE_BOOT", 0x0A, 0x034B, 0x00000001, 0x00D4, 0x041F, 843, "boot-event-core/shared-slot-0x0A/shared-slot-template"),
RepeatedFamilyRowExpectation("REE_BOOT", 0x0F, 0x025C, 0x0000034C, 0x041F, 0x067B, 604, "boot-event-core/shared-slot-0x0F/shared-slot-template"),
RepeatedFamilyRowExpectation("REE_BOOT", 0x10, 0x003B, 0x000005A8, 0x067B, 0x06B6, 59, "boot-event-core/shared-slot-0x10/same-length-template"),
RepeatedFamilyRowExpectation("VAR_BOOT", 0x0A, 0x029A, 0x00000001, 0x00D4, 0x036E, 666, "boot-event-core/shared-slot-0x0A/shared-slot-template"),
RepeatedFamilyRowExpectation("VAR_BOOT", 0x0F, 0x0244, 0x0000029B, 0x036E, 0x05B2, 580, "boot-event-core/shared-slot-0x0F/shared-slot-template"),
RepeatedFamilyRowExpectation("VAR_BOOT", 0x10, 0x003B, 0x000004DF, 0x05B2, 0x05ED, 59, "boot-event-core/shared-slot-0x10/same-length-template"),
RepeatedFamilyRowExpectation("SURCAMNS", 0x01, 0x0051, 0x000000D2, 0x01B7, 0x0208, 81, "callback-eventtrigger/shared-slot-0x01/shared-slot-template"),
RepeatedFamilyRowExpectation("SURCAMNS", 0x0A, 0x00D1, 0x00000001, 0x00E6, 0x01B7, 209, "callback-eventtrigger/shared-slot-0x0A/same-length-template"),
RepeatedFamilyRowExpectation("SURCAMNS", 0x20, 0x02BA, 0x00000123, 0x0208, 0x04C2, 698, "callback-eventtrigger/shared-slot-0x20/same-length-template"),
RepeatedFamilyRowExpectation("SURCAMNS", 0x21, 0x0709, 0x000003DD, 0x04C2, 0x0BCB, 1801, "callback-eventtrigger/shared-slot-0x21/shared-slot-template"),
RepeatedFamilyRowExpectation("SURCAMNS", 0x22, 0x01A3, 0x00000AE6, 0x0BCB, 0x0D6E, 419, "callback-eventtrigger/shared-slot-0x22/same-length-template"),
RepeatedFamilyRowExpectation("SURCAMEW", 0x01, 0x00F7, 0x000000D2, 0x01B7, 0x02AE, 247, "callback-eventtrigger/shared-slot-0x01/shared-slot-template"),
RepeatedFamilyRowExpectation("SURCAMEW", 0x0A, 0x00D1, 0x00000001, 0x00E6, 0x01B7, 209, "callback-eventtrigger/shared-slot-0x0A/same-length-template"),
RepeatedFamilyRowExpectation("SURCAMEW", 0x20, 0x02BA, 0x000001C9, 0x02AE, 0x0568, 698, "callback-eventtrigger/shared-slot-0x20/same-length-template"),
RepeatedFamilyRowExpectation("SURCAMEW", 0x21, 0x0655, 0x00000483, 0x0568, 0x0BBD, 1621, "callback-eventtrigger/shared-slot-0x21/shared-slot-template"),
RepeatedFamilyRowExpectation("SURCAMEW", 0x22, 0x01A3, 0x00000AD8, 0x0BBD, 0x0D60, 419, "callback-eventtrigger/shared-slot-0x22/same-length-template"),
RepeatedFamilyRowExpectation("FLAMEBOX", 0x0A, 0x026A, 0x00000001, 0x00E0, 0x034A, 618, "environmental-event/shared-slot-0x0A/shared-slot-template"),
RepeatedFamilyRowExpectation("FLAMEBOX", 0x20, 0x01AC, 0x0000026B, 0x034A, 0x04F6, 428, "environmental-event/shared-slot-0x20/shared-slot-template"),
RepeatedFamilyRowExpectation("FLAMEBOX", 0x21, 0x029A, 0x00000417, 0x04F6, 0x0790, 666, "environmental-event/shared-slot-0x21/shared-slot-template"),
RepeatedFamilyRowExpectation("NOSTRIL", 0x0A, 0x00C0, 0x00000001, 0x00E0, 0x01A0, 192, "environmental-event/shared-slot-0x0A/shared-slot-template"),
RepeatedFamilyRowExpectation("NOSTRIL", 0x20, 0x0129, 0x000000C1, 0x01A0, 0x02C9, 297, "environmental-event/shared-slot-0x20/shared-slot-template"),
RepeatedFamilyRowExpectation("NOSTRIL", 0x21, 0x01BE, 0x000001EA, 0x02C9, 0x0487, 446, "environmental-event/shared-slot-0x21/shared-slot-template"),
RepeatedFamilyRowExpectation("STEAMBOX", 0x0A, 0x0266, 0x00000001, 0x00E0, 0x0346, 614, "environmental-event/shared-slot-0x0A/shared-slot-template"),
RepeatedFamilyRowExpectation("STEAMBOX", 0x20, 0x01F6, 0x00000267, 0x0346, 0x053C, 502, "environmental-event/shared-slot-0x20/shared-slot-template"),
RepeatedFamilyRowExpectation("STEAMBOX", 0x21, 0x02A7, 0x0000045D, 0x053C, 0x07E3, 679, "environmental-event/shared-slot-0x21/shared-slot-template"),
)
def scummvm_event_name_hint(slot: int) -> str | None:
if 0 <= slot < len(SCUMMVM_EVENT_NAME_HINTS):
return SCUMMVM_EVENT_NAME_HINTS[slot]
@ -532,6 +634,368 @@ def annotate_class_layout(chunks: list[ExtractedChunk]) -> None:
chunk.class_parse_status = "parsed-class-layout"
def derive_class_event_rows(chunk: ExtractedChunk, raw_data: bytes) -> list[ClassEventRow]:
if chunk.class_parse_status != "parsed-class-layout":
return []
if chunk.object_index is None or chunk.class_id is None or chunk.conservative_event_count is None:
return []
provisional_rows: list[tuple[int, int, int]] = []
for slot in range(chunk.conservative_event_count):
entry_offset = 20 + 6 * slot
raw_word = read_u16_le(raw_data, entry_offset)
raw_code_offset = read_u32_le(raw_data, entry_offset + 2)
provisional_rows.append((slot, raw_word, raw_code_offset))
non_zero_offsets = sorted(
{
raw_code_offset
for _, _, raw_code_offset in provisional_rows
if raw_code_offset != 0
}
)
rows: list[ClassEventRow] = []
for slot, raw_word, raw_code_offset in provisional_rows:
derived_body_start: int | None = None
derived_body_end: int | None = None
derived_body_length: int | None = None
if raw_code_offset != 0 and chunk.code_base_minus_one is not None:
body_start = chunk.code_base_minus_one + raw_code_offset
next_offsets = [offset for offset in non_zero_offsets if offset > raw_code_offset]
body_end = chunk.code_base_minus_one + next_offsets[0] if next_offsets else len(raw_data)
if 0 <= body_start <= body_end <= len(raw_data):
derived_body_start = body_start
derived_body_end = body_end
derived_body_length = body_end - body_start
rows.append(
ClassEventRow(
entry_index=chunk.index,
object_index=chunk.object_index,
class_id=chunk.class_id,
class_name_hint=chunk.class_name_hint or "",
slot=slot,
event_name_hint=scummvm_event_name_hint(slot),
raw_event_entry_word=raw_word,
raw_code_offset=raw_code_offset,
derived_body_start=derived_body_start,
derived_body_end=derived_body_end,
derived_body_length=derived_body_length,
)
)
return rows
def build_class_event_rows(
parsed_class_chunks: list[ExtractedChunk],
) -> tuple[list[ClassEventRow], dict[int, list[ClassEventRow]], dict[int, bytes]]:
all_rows: list[ClassEventRow] = []
rows_by_entry: dict[int, list[ClassEventRow]] = {}
raw_data_by_entry: dict[int, bytes] = {}
for chunk in parsed_class_chunks:
raw_data = pathlib.Path(chunk.raw_path).read_bytes()
raw_data_by_entry[chunk.index] = raw_data
rows = derive_class_event_rows(chunk, raw_data)
rows_by_entry[chunk.index] = rows
all_rows.extend(rows)
return all_rows, rows_by_entry, raw_data_by_entry
def build_repeated_template_status_map(
parsed_class_chunks: list[ExtractedChunk],
rows_by_entry: dict[int, list[ClassEventRow]],
raw_data_by_entry: dict[int, bytes],
) -> dict[tuple[int, int], str]:
status_by_row: dict[tuple[int, int], str] = {}
chunk_by_label = {
chunk.primary_label: chunk
for chunk in parsed_class_chunks
if chunk.primary_label
}
for family_name, labels in VERIFIED_REPEATED_TEMPLATE_FAMILIES:
family_chunks = [chunk_by_label[label] for label in labels if label in chunk_by_label]
if len(family_chunks) < 2:
continue
rows_by_slot: dict[int, list[tuple[ExtractedChunk, ClassEventRow, bytes]]] = {}
for chunk in family_chunks:
raw_data = raw_data_by_entry.get(chunk.index)
if raw_data is None:
continue
for row in rows_by_entry.get(chunk.index, []):
if row.raw_code_offset == 0:
continue
if row.derived_body_start is None or row.derived_body_end is None:
continue
body = raw_data[row.derived_body_start:row.derived_body_end]
rows_by_slot.setdefault(row.slot, []).append((chunk, row, body))
for slot, slot_rows in rows_by_slot.items():
if len(slot_rows) < 2:
continue
lengths = {len(body) for _, _, body in slot_rows}
bodies = {body for _, _, body in slot_rows}
if len(bodies) == 1:
status_suffix = "exact-body-clone"
elif len(lengths) == 1:
status_suffix = "same-length-template"
else:
status_suffix = "shared-slot-template"
status = f"{family_name}/shared-slot-0x{slot:02X}/{status_suffix}"
for chunk, row, _ in slot_rows:
status_by_row[(chunk.index, row.slot)] = status
return status_by_row
def format_optional_hex(value: int | None, width: int = 0) -> str:
if value is None:
return ""
if width > 0:
return f"0x{value:0{width}X}"
return f"0x{value:X}"
def hex_edge(data: bytes, width: int = 8) -> str:
if not data:
return ""
return data[:width].hex()
def hex_tail(data: bytes, width: int = 8) -> str:
if not data:
return ""
return data[-width:].hex()
def write_family_decompile_artifact(
out_dir: pathlib.Path,
parsed_class_chunks: list[ExtractedChunk],
rows_by_entry: dict[int, list[ClassEventRow]],
raw_data_by_entry: dict[int, bytes],
repeated_status_by_row: dict[tuple[int, int], str],
spec: FamilyArtifactSpec,
) -> None:
family_labels = set(spec.labels)
family_chunks = [chunk for chunk in parsed_class_chunks if chunk.primary_label in family_labels]
if not family_chunks:
return
family_chunks.sort(key=lambda chunk: chunk.primary_label or "")
tsv_lines = [
"entry_index\tclass_id\tclass_name\tslot\tevent_name_hint\traw_event_entry_word\traw_code_offset\tderived_body_start\tderived_body_end\tderived_body_length\trepeated_template_status\tbody_sha1\tbody_prefix_hex\tbody_suffix_hex"
]
md_lines = [
f"# {spec.title}",
"",
"This is a reversible per-class rendering derived directly from `class_event_index.tsv` plus the raw extracted chunk bytes.",
"ScummVM event labels remain hints only; the authoritative data here is the slot id, raw row bytes, and derived body window.",
"",
]
for chunk in family_chunks:
rows = [row for row in rows_by_entry.get(chunk.index, []) if row.raw_code_offset != 0]
if not rows:
continue
raw_data = raw_data_by_entry[chunk.index]
md_lines.extend([
f"## {chunk.primary_label}",
"",
"```yaml",
"class:",
f" entry_index: 0x{chunk.index:03X}",
f" class_id: 0x{chunk.class_id:X}",
f" class_name: {chunk.primary_label}",
f" class_object_index: 0x{chunk.object_index:X}",
f" raw_code_base_u32: 0x{chunk.raw_code_base_u32:X}",
f" code_base_minus_one: 0x{chunk.code_base_minus_one:X}",
f" conservative_event_count: {chunk.conservative_event_count}",
" events:",
])
for row in rows:
body = b""
if row.derived_body_start is not None and row.derived_body_end is not None:
body = raw_data[row.derived_body_start:row.derived_body_end]
repeated_status = repeated_status_by_row.get((row.entry_index, row.slot), "")
body_sha1 = hashlib.sha1(body).hexdigest() if body else ""
md_lines.extend([
f" - slot: 0x{row.slot:02x}",
f" event_name_hint: {row.event_name_hint or ''}",
f" raw_event_entry_word: 0x{row.raw_event_entry_word:04x}",
f" raw_code_offset: 0x{row.raw_code_offset:08x}",
f" derived_body_start: {format_optional_hex(row.derived_body_start, 4).lower() or 'null'}",
f" derived_body_end: {format_optional_hex(row.derived_body_end, 4).lower() or 'null'}",
f" derived_body_length: {row.derived_body_length if row.derived_body_length is not None else 'null'}",
f" repeated_template_status: {repeated_status or 'unique-or-unclassified'}",
f" body_sha1: {body_sha1 or 'null'}",
f" body_prefix_hex: {hex_edge(body) or 'null'}",
f" body_suffix_hex: {hex_tail(body) or 'null'}",
])
tsv_lines.append(
"{entry_index}\t0x{class_id:X}\t{class_name}\t0x{slot:02X}\t{event_name_hint}\t0x{raw_event_entry_word:04X}\t0x{raw_code_offset:08X}\t{derived_body_start}\t{derived_body_end}\t{derived_body_length}\t{repeated_template_status}\t{body_sha1}\t{body_prefix_hex}\t{body_suffix_hex}".format(
entry_index=row.entry_index,
class_id=row.class_id,
class_name=chunk.primary_label or "",
slot=row.slot,
event_name_hint=row.event_name_hint or "",
raw_event_entry_word=row.raw_event_entry_word,
raw_code_offset=row.raw_code_offset,
derived_body_start=format_optional_hex(row.derived_body_start, 4),
derived_body_end=format_optional_hex(row.derived_body_end, 4),
derived_body_length=(row.derived_body_length if row.derived_body_length is not None else ""),
repeated_template_status=repeated_status,
body_sha1=body_sha1,
body_prefix_hex=hex_edge(body),
body_suffix_hex=hex_tail(body),
)
)
md_lines.extend([
"```",
"",
])
(out_dir / f"{spec.output_stem}.md").write_text("\n".join(md_lines), encoding="utf-8")
(out_dir / f"{spec.output_stem}.tsv").write_text("\n".join(tsv_lines) + "\n", encoding="utf-8")
def validate_verified_repeated_family_regressions(
parsed_class_chunks: list[ExtractedChunk],
rows_by_entry: dict[int, list[ClassEventRow]],
repeated_status_by_row: dict[tuple[int, int], str],
) -> list[str]:
chunk_by_label = {
chunk.primary_label: chunk
for chunk in parsed_class_chunks
if chunk.primary_label
}
expected_slots_by_class: dict[str, set[int]] = {}
for expectation in VERIFIED_REPEATED_FAMILY_ROW_EXPECTATIONS:
expected_slots_by_class.setdefault(expectation.class_name, set()).add(expectation.slot)
report_lines = [
"record_type\tclass_name\tslot\texpected\tactual\tstatus"
]
errors: list[str] = []
for class_name, expected_slots in sorted(expected_slots_by_class.items()):
chunk = chunk_by_label.get(class_name)
actual_slots: set[int] = set()
if chunk is not None:
actual_slots = {
row.slot
for row in rows_by_entry.get(chunk.index, [])
if row.raw_code_offset != 0
}
status = "ok" if actual_slots == expected_slots else "mismatch"
report_lines.append(
"slot-set\t{class_name}\t*\t{expected}\t{actual}\t{status}".format(
class_name=class_name,
expected=",".join(f"0x{slot:02X}" for slot in sorted(expected_slots)),
actual=",".join(f"0x{slot:02X}" for slot in sorted(actual_slots)),
status=status,
)
)
if status != "ok":
errors.append(
f"{class_name}: expected non-zero slots {sorted(expected_slots)}, found {sorted(actual_slots)}"
)
for expectation in VERIFIED_REPEATED_FAMILY_ROW_EXPECTATIONS:
chunk = chunk_by_label.get(expectation.class_name)
if chunk is None:
errors.append(f"missing repeated-family class {expectation.class_name}")
report_lines.append(
f"row\t{expectation.class_name}\t0x{expectation.slot:02X}\tpresent\tmissing-class\tmismatch"
)
continue
row = next(
(candidate for candidate in rows_by_entry.get(chunk.index, []) if candidate.slot == expectation.slot),
None,
)
if row is None:
errors.append(f"missing row {expectation.class_name} slot 0x{expectation.slot:02X}")
report_lines.append(
f"row\t{expectation.class_name}\t0x{expectation.slot:02X}\tpresent\tmissing-row\tmismatch"
)
continue
actual_values = (
row.raw_event_entry_word,
row.raw_code_offset,
row.derived_body_start,
row.derived_body_end,
row.derived_body_length,
repeated_status_by_row.get((row.entry_index, row.slot), ""),
)
expected_values = (
expectation.raw_event_entry_word,
expectation.raw_code_offset,
expectation.derived_body_start,
expectation.derived_body_end,
expectation.derived_body_length,
expectation.repeated_template_status,
)
status = "ok" if actual_values == expected_values else "mismatch"
report_lines.append(
"row\t{class_name}\t0x{slot:02X}\t{expected}\t{actual}\t{status}".format(
class_name=expectation.class_name,
slot=expectation.slot,
expected="|".join(
[
f"0x{expectation.raw_event_entry_word:04X}",
f"0x{expectation.raw_code_offset:08X}",
f"0x{expectation.derived_body_start:04X}",
f"0x{expectation.derived_body_end:04X}",
str(expectation.derived_body_length),
expectation.repeated_template_status,
]
),
actual="|".join(
[
f"0x{row.raw_event_entry_word:04X}",
f"0x{row.raw_code_offset:08X}",
format_optional_hex(row.derived_body_start, 4),
format_optional_hex(row.derived_body_end, 4),
str(row.derived_body_length if row.derived_body_length is not None else ""),
repeated_status_by_row.get((row.entry_index, row.slot), ""),
]
),
status=status,
)
)
if status != "ok":
errors.append(
"{class_name} slot 0x{slot:02X}: expected {expected}, found {actual}".format(
class_name=expectation.class_name,
slot=expectation.slot,
expected=expected_values,
actual=actual_values,
)
)
if errors:
raise ValueError(
"repeated-family regression mismatch:\n- " + "\n- ".join(errors)
)
return report_lines
def readable_neighbor_chunks(
center: ExtractedChunk,
chunk_by_index: dict[int, ExtractedChunk],
@ -1556,6 +2020,17 @@ def write_summary(out_dir: pathlib.Path, input_path: pathlib.Path, data: bytes,
"entry_index\tobject_index\tclass_id\tclass_name_hint\traw_code_base_u32\tcode_base_minus_one\tconservative_event_count\tevent_table_end\tclass_parse_status\tdata_offset\tdeclared_size\tprimary_label"
]
parsed_class_chunks = [chunk for chunk in chunks if chunk.class_parse_status == "parsed-class-layout"]
class_event_rows, rows_by_entry, raw_data_by_entry = build_class_event_rows(parsed_class_chunks)
repeated_status_by_row = build_repeated_template_status_map(
parsed_class_chunks,
rows_by_entry,
raw_data_by_entry,
)
repeated_family_regression_lines = validate_verified_repeated_family_regressions(
parsed_class_chunks,
rows_by_entry,
repeated_status_by_row,
)
for chunk in parsed_class_chunks:
class_layout_lines.append(
"{index}\t0x{object_index:X}\t0x{class_id:X}\t{class_name_hint}\t0x{raw_code_base_u32:X}\t0x{code_base_minus_one:X}\t{conservative_event_count}\t0x{event_table_end:X}\t{class_parse_status}\t0x{data_offset:X}\t0x{declared_size:X}\t{primary_label}".format(
@ -1576,28 +2051,39 @@ def write_summary(out_dir: pathlib.Path, input_path: pathlib.Path, data: bytes,
(out_dir / "class_layout_index.tsv").write_text("\n".join(class_layout_lines) + "\n", encoding="utf-8")
class_event_lines = [
"entry_index\tobject_index\tclass_id\tclass_name_hint\tslot\tevent_name_hint\traw_event_entry_word\traw_code_offset"
"entry_index\tobject_index\tclass_id\tclass_name_hint\tslot\tevent_name_hint\traw_event_entry_word\traw_code_offset\tderived_body_start\tderived_body_end\tderived_body_length\trepeated_template_status"
]
for chunk in parsed_class_chunks:
raw_data = pathlib.Path(chunk.raw_path).read_bytes()
assert chunk.conservative_event_count is not None
for slot in range(chunk.conservative_event_count):
entry_offset = 20 + 6 * slot
raw_word = read_u16_le(raw_data, entry_offset)
raw_code_offset = read_u32_le(raw_data, entry_offset + 2)
class_event_lines.append(
"{entry_index}\t0x{object_index:X}\t0x{class_id:X}\t{class_name_hint}\t0x{slot:02X}\t{event_name_hint}\t0x{raw_word:04X}\t0x{raw_code_offset:08X}".format(
entry_index=chunk.index,
object_index=chunk.object_index,
class_id=chunk.class_id,
class_name_hint=chunk.class_name_hint or "",
slot=slot,
event_name_hint=scummvm_event_name_hint(slot) or "",
raw_word=raw_word,
raw_code_offset=raw_code_offset,
)
for row in class_event_rows:
class_event_lines.append(
"{entry_index}\t0x{object_index:X}\t0x{class_id:X}\t{class_name_hint}\t0x{slot:02X}\t{event_name_hint}\t0x{raw_event_entry_word:04X}\t0x{raw_code_offset:08X}\t{derived_body_start}\t{derived_body_end}\t{derived_body_length}\t{repeated_template_status}".format(
entry_index=row.entry_index,
object_index=row.object_index,
class_id=row.class_id,
class_name_hint=row.class_name_hint,
slot=row.slot,
event_name_hint=row.event_name_hint or "",
raw_event_entry_word=row.raw_event_entry_word,
raw_code_offset=row.raw_code_offset,
derived_body_start=format_optional_hex(row.derived_body_start, 4),
derived_body_end=format_optional_hex(row.derived_body_end, 4),
derived_body_length=(row.derived_body_length if row.derived_body_length is not None else ""),
repeated_template_status=repeated_status_by_row.get((row.entry_index, row.slot), ""),
)
)
(out_dir / "class_event_index.tsv").write_text("\n".join(class_event_lines) + "\n", encoding="utf-8")
for family_artifact_spec in FAMILY_ARTIFACT_SPECS:
write_family_decompile_artifact(
out_dir,
parsed_class_chunks,
rows_by_entry,
raw_data_by_entry,
repeated_status_by_row,
family_artifact_spec,
)
(out_dir / "repeated_family_regressions.tsv").write_text(
"\n".join(repeated_family_regression_lines) + "\n",
encoding="utf-8",
)
neighborhood_lines = [
"center_index\tneighbor_index\tprimary_label\tfield_names\tfield_tags"
@ -1763,7 +2249,9 @@ def write_summary(out_dir: pathlib.Path, input_path: pathlib.Path, data: bytes,
lines.append("- `.strings.txt` files are the main human-readable output for now; `.txt` files are emitted only for chunks that look text-like.")
lines.append("- `descriptor_index.tsv` summarizes guessed class labels, field names, and compact tag patterns for descriptor-like chunks.")
lines.append("- `class_layout_index.tsv` records the conservative owner-loaded class parsing state: object index, class id, class-name hint, raw bytes-8..11 field, derived code-base-minus-one, and event-count/table-end values when the local divisibility and bounds checks succeed.")
lines.append("- `class_event_index.tsv` expands parsed owner-loaded classes into raw 6-byte event rows with slot numbers, ScummVM event-name hints for `0x00..0x1f`, unresolved leading words, and raw code-offset dwords for round-trip tooling work.")
lines.append("- `class_event_index.tsv` now also emits derived body-window columns (`derived_body_start`, `derived_body_end`, `derived_body_length`) plus conservative `repeated_template_status` tags for verified repeated families.")
lines.append("- `boot_family_decompile.md` / `.tsv`, `callback_family_decompile.md` / `.tsv`, and `environmental_family_decompile.md` / `.tsv` now provide reversible per-class decompile artifacts for the `_BOOT`, `SURCAM*`, and environmental repeated-family lanes.")
lines.append("- `repeated_family_regressions.tsv` enforces the current repeated-family slot sets plus the verified raw-row and derived body-window fields for `JELYHACK/JELYH2`, `_BOOT`, `SURCAM*`, and `FLAMEBOX/NOSTRIL/STEAMBOX`.")
lines.append("- `descriptor_neighborhoods.tsv` captures local table neighborhoods around trigger/event-related classes such as `JELYHACK`, `NPCTRIG`, `CRUZTRIG`, `TRIGPAD`, and `SPECIAL`.")
lines.append("- `referent_anchor_event_graph.tsv` groups referent-bearing descriptors with nearby event-bearing neighbors so the attachment model can be inspected without ad hoc grepping.")
lines.append("- `jelyhack_island_graph.md` now uses a wider local window so the `JELYHACK` / `JELYH2` anchors can be inspected alongside the nearby event-bearing `REE_BOOT`, `SURCAMEW`, and `SFXTRIG` descriptors rather than stopping at the referent-only neighbors.")