Add extractor for Crusader's EUSECODE.FLX container

- Implemented a Python script to extract data from the EUSECODE.FLX file format.
- Defined data structures for candidate entries and extracted chunks using dataclasses.
- Added functions to read and parse the FLX table, extract candidate data, and generate human-readable output files.
- Included functionality for analyzing extracted data, including generating summaries, descriptors, and event family reports.
- Implemented utilities for calculating printable ratios, zero ratios, and identifying text-like data.
- Added support for writing various output formats, including JSON, TSV, and Markdown.
This commit is contained in:
MaddoScientisto 2026-03-22 14:27:38 +01:00
commit 3daffbf113
58 changed files with 30295 additions and 2504 deletions

View file

@ -0,0 +1,788 @@
#!/usr/bin/env python3
"""Extractor for Crusader's EUSECODE.FLX container.
Current validated layout:
- 0x80-byte header area
- little-endian entry count at file offset 0x54
- entry table begins at 0x80
- each entry is 8 bytes: <u32 data_offset, u32 declared_size>
The exact semantics of the payload records are still under RE, so the extractor dumps
all non-zero entries and emits human-readable sidecars (.strings.txt and index files)
to support the next decoding pass.
"""
from __future__ import annotations
import argparse
import json
import pathlib
import struct
from dataclasses import asdict, dataclass
DEFAULT_INPUT = pathlib.Path(r"k:\ghidra\Crusader_Decomp\USECODE\EUSECODE.FLX")
DEFAULT_OUTPUT = pathlib.Path(r"k:\ghidra\Crusader_Decomp\USECODE\EUSECODE_extracted")
@dataclass(frozen=True)
class CandidateEntry:
table_offset: int
data_offset: int
declared_size: int
@dataclass
class ExtractedChunk:
index: int
table_offset: int
data_offset: int
declared_size: int
next_offset: int | None
extracted_size: int
overlap_with_next: bool
text_like: bool
printable_ratio: float
zero_ratio: float
preview: str
raw_path: str
strings_path: str
text_path: str | None
primary_label: str | None
field_names: list[str]
field_tags: list[str]
@dataclass(frozen=True)
class FlxTable:
entry_count: int
table_offset: int
table_end: int
entries: list[CandidateEntry]
def read_u32_le(data: bytes, offset: int) -> int:
return struct.unpack_from("<I", data, offset)[0]
def read_u16_le(data: bytes, offset: int) -> int:
return struct.unpack_from("<H", data, offset)[0]
def ascii_preview(data: bytes, limit: int = 64) -> str:
preview = []
for byte in data[:limit]:
if 0x20 <= byte <= 0x7E:
preview.append(chr(byte))
else:
preview.append(".")
return "".join(preview)
def printable_ratio(data: bytes) -> float:
if not data:
return 0.0
printable = sum(1 for byte in data if byte in (0x09, 0x0A, 0x0D) or 0x20 <= byte <= 0x7E)
return printable / len(data)
def zero_ratio(data: bytes) -> float:
if not data:
return 0.0
return data.count(0) / len(data)
def iter_printable_runs(data: bytes, min_len: int = 4) -> list[str]:
runs: list[str] = []
current = bytearray()
for byte in data:
if byte in (0x09, 0x0A, 0x0D) or 0x20 <= byte <= 0x7E:
current.append(byte)
continue
if len(current) >= min_len:
runs.append(current.decode("latin-1"))
current.clear()
if len(current) >= min_len:
runs.append(current.decode("latin-1"))
return runs
def summarize_descriptor(strings: list[str]) -> tuple[str | None, list[str]]:
label_counts: dict[str, int] = {}
field_names: list[str] = []
seen_fields: set[str] = set()
for value in strings:
if value.isupper() and any(ch.isalpha() for ch in value):
label_counts[value] = label_counts.get(value, 0) + 1
continue
if value and value[0].islower() and value.replace("_", "").isalnum() and value not in seen_fields:
seen_fields.add(value)
field_names.append(value)
primary_label = None
if label_counts:
primary_label = sorted(label_counts.items(), key=lambda item: (-item[1], item[0]))[0][0]
return primary_label, field_names
def extract_field_tag_records(data: bytes, field_names: list[str]) -> list[str]:
tags: list[str] = []
seen: set[str] = set()
for field_name in field_names:
needle = field_name.encode("latin-1")
start = 0
while True:
pos = data.find(needle, start)
if pos < 3:
break
tag = f"{data[pos - 3]:02X}:{data[pos - 2]:02X}{data[pos - 1]:02X}->{field_name}"
if tag not in seen:
seen.add(tag)
tags.append(tag)
start = pos + 1
tags.sort()
return tags
def has_referent_field(chunk: ExtractedChunk) -> bool:
if "referent" in chunk.field_names:
return True
return any(tag.endswith("->referent") for tag in chunk.field_tags)
def get_event_evidence(chunk: ExtractedChunk) -> list[str]:
evidence: list[str] = []
seen: set[str] = set()
for field_name in chunk.field_names:
if "event" not in field_name.lower():
continue
marker = f"field:{field_name}"
if marker not in seen:
seen.add(marker)
evidence.append(marker)
for field_tag in chunk.field_tags:
if "->event" not in field_tag.lower():
continue
marker = f"tag:{field_tag}"
if marker not in seen:
seen.add(marker)
evidence.append(marker)
return evidence
def chunk_role(chunk: ExtractedChunk) -> str:
if chunk.primary_label in {"JELYHACK", "JELYH2"}:
return "referent-anchor"
if get_event_evidence(chunk):
return "event-bearing"
if has_referent_field(chunk):
return "referent-neighbor"
return "neighbor"
def has_event_trigger_field(chunk: ExtractedChunk) -> bool:
if any("eventtrigger" == field_name.lower() for field_name in chunk.field_names):
return True
return any("->eventtrigger" in field_tag.lower() for field_tag in chunk.field_tags)
def event_tag_kind(chunk: ExtractedChunk) -> str:
if any("->eventtrigger" in field_tag.lower() for field_tag in chunk.field_tags):
return "eventTrigger"
if any(field_tag.lower().endswith("->event") for field_tag in chunk.field_tags):
return "event"
return ""
def classify_event_family(chunk: ExtractedChunk) -> str:
if event_tag_kind(chunk) == "eventTrigger":
return "callback-eventtrigger"
if event_tag_kind(chunk) != "event":
return ""
if chunk.primary_label == "EVENT":
return "event-hub"
if chunk.primary_label and chunk.primary_label.endswith("_BOOT"):
return "boot-event-core"
if chunk.field_names == ["referent", "event"]:
return "minimal-event-core"
if any(name in chunk.field_names for name in ("flame", "flame2", "fire", "fire2", "steam", "steam2")):
return "environmental-event"
if "typeNpc" in chunk.field_names:
return "npc-trigger"
return "specialized-event"
def header_u16_words(data: bytes, count: int = 16) -> list[str]:
limit = min(len(data) // 2, count)
return [f"0x{read_u16_le(data, index * 2):04X}" for index in range(limit)]
def header_u32_words(data: bytes, count: int = 8) -> list[str]:
limit = min(len(data) // 4, count)
return [f"0x{read_u32_le(data, index * 4):08X}" for index in range(limit)]
def interesting_printable_markers(data: bytes) -> list[str]:
markers: list[str] = []
seen: set[str] = set()
for run in iter_printable_runs(data, min_len=3):
if not any(token in run for token in ("wx[", "wt$[", "t$t=t@", "$Q", "?\n", "?\r")):
continue
if run not in seen:
seen.add(run)
markers.append(run)
return markers[:8]
def write_island_graph(
out_dir: pathlib.Path,
output_name: str,
title: str,
center_labels: set[str],
descriptor_chunks: list[ExtractedChunk],
chunk_by_index: dict[int, ExtractedChunk],
total_chunks: int,
window: int = 5,
) -> None:
centers = [chunk for chunk in descriptor_chunks if chunk.primary_label in center_labels]
if not centers:
return
island_indices = sorted(
{
neighbor_index
for center in centers
for neighbor_index in range(max(0, center.index - window), min(total_chunks, center.index + window + 1))
}
)
island_lines = [f"# {title}", "", "## Nodes", "", "| Index | Label | Role | Fields | Event Evidence |", "|---:|---|---|---|---|"]
for index in island_indices:
chunk = chunk_by_index[index]
island_lines.append(
"| {index} | {label} | {role} | {fields} | {evidence} |".format(
index=index,
label=chunk.primary_label or "",
role=chunk_role(chunk),
fields=",".join(chunk.field_names) or "-",
evidence=",".join(get_event_evidence(chunk)) or "-",
)
)
island_lines.extend(["", "## Edges", "", "| Source | Relation | Target | Evidence |", "|---|---|---|---|"])
for center in centers:
for neighbor_index in range(max(0, center.index - window), min(total_chunks, center.index + window + 1)):
if neighbor_index == center.index:
continue
neighbor = chunk_by_index[neighbor_index]
relation = f"table-neighbor({neighbor.index - center.index:+d})"
event_evidence = get_event_evidence(neighbor)
if event_evidence:
relation = f"possible-event-attachment({neighbor.index - center.index:+d})"
island_lines.append(
"| {source} ({source_index}) | {relation} | {target} ({target_index}) | {evidence} |".format(
source=center.primary_label,
source_index=center.index,
relation=relation,
target=neighbor.primary_label or "",
target_index=neighbor.index,
evidence=",".join(event_evidence) or "same local extraction neighborhood",
)
)
(out_dir / output_name).write_text("\n".join(island_lines) + "\n", encoding="utf-8")
def write_descriptor_compare(
out_dir: pathlib.Path,
output_name: str,
labels: set[str],
descriptor_chunks: list[ExtractedChunk],
) -> None:
compare_lines = [
"entry_index\tlabel\trole\tdata_offset\tdeclared_size\theader_u16\theader_u32\tprintable_markers\tfield_tags"
]
for chunk in descriptor_chunks:
if chunk.primary_label not in labels:
continue
raw_data = pathlib.Path(chunk.raw_path).read_bytes()
compare_lines.append(
"{index}\t{label}\t{role}\t0x{data_offset:X}\t0x{declared_size:X}\t{header_u16}\t{header_u32}\t{markers}\t{field_tags}".format(
index=chunk.index,
label=chunk.primary_label,
role=chunk_role(chunk),
data_offset=chunk.data_offset,
declared_size=chunk.declared_size,
header_u16=",".join(header_u16_words(raw_data)),
header_u32=",".join(header_u32_words(raw_data)),
markers="|".join(interesting_printable_markers(raw_data)),
field_tags=",".join(chunk.field_tags),
)
)
(out_dir / output_name).write_text("\n".join(compare_lines) + "\n", encoding="utf-8")
def write_event_family_reports(
out_dir: pathlib.Path,
descriptor_chunks: list[ExtractedChunk],
chunk_by_index: dict[int, ExtractedChunk],
total_chunks: int,
) -> None:
family_lines = [
"entry_index\tlabel\tfamily\ttag_kind\trole\tfield_count\tfield_names\tfield_tags\tdata_offset\tdeclared_size\tlocal_event_neighbors"
]
families: dict[str, list[ExtractedChunk]] = {}
for chunk in descriptor_chunks:
family = classify_event_family(chunk)
if not family:
continue
families.setdefault(family, []).append(chunk)
local_event_neighbors = 0
for neighbor_index in range(max(0, chunk.index - 5), min(total_chunks, chunk.index + 6)):
if neighbor_index == chunk.index:
continue
neighbor = chunk_by_index[neighbor_index]
if event_tag_kind(neighbor):
local_event_neighbors += 1
family_lines.append(
"{index}\t{label}\t{family}\t{tag_kind}\t{role}\t{field_count}\t{field_names}\t{field_tags}\t0x{data_offset:X}\t0x{declared_size:X}\t{local_event_neighbors}".format(
index=chunk.index,
label=chunk.primary_label or "",
family=family,
tag_kind=event_tag_kind(chunk),
role=chunk_role(chunk),
field_count=len(chunk.field_names),
field_names=",".join(chunk.field_names),
field_tags=",".join(chunk.field_tags),
data_offset=chunk.data_offset,
declared_size=chunk.declared_size,
local_event_neighbors=local_event_neighbors,
)
)
(out_dir / "event_family_index.tsv").write_text("\n".join(family_lines) + "\n", encoding="utf-8")
summary_lines = ["# Event Family Summary", ""]
family_order = [
"event-hub",
"boot-event-core",
"npc-trigger",
"minimal-event-core",
"environmental-event",
"specialized-event",
"callback-eventtrigger",
]
for family in family_order:
family_chunks = families.get(family, [])
if not family_chunks:
continue
summary_lines.append(f"## {family}")
summary_lines.append("")
summary_lines.append("| Index | Label | Tag Kind | Fields | Size | Local Event Neighbors |")
summary_lines.append("|---:|---|---|---|---:|---:|")
for chunk in sorted(family_chunks, key=lambda value: value.index):
local_event_neighbors = 0
for neighbor_index in range(max(0, chunk.index - 5), min(total_chunks, chunk.index + 6)):
if neighbor_index == chunk.index:
continue
neighbor = chunk_by_index[neighbor_index]
if event_tag_kind(neighbor):
local_event_neighbors += 1
summary_lines.append(
"| {index} | {label} | {tag_kind} | {fields} | 0x{declared_size:X} | {local_event_neighbors} |".format(
index=chunk.index,
label=chunk.primary_label or "",
tag_kind=event_tag_kind(chunk),
fields=",".join(chunk.field_names) or "-",
declared_size=chunk.declared_size,
local_event_neighbors=local_event_neighbors,
)
)
summary_lines.append("")
(out_dir / "event_family_summary.md").write_text("\n".join(summary_lines), encoding="utf-8")
def looks_text_like(data: bytes) -> bool:
if not data:
return False
ratio = printable_ratio(data)
if ratio < 0.80:
return False
if b"\r\n" in data or b"\n" in data:
return True
return zero_ratio(data) < 0.05
def parse_flx_table(data: bytes, table_offset: int = 0x80, count_offset: int = 0x54) -> FlxTable:
file_size = len(data)
entry_count = read_u32_le(data, count_offset)
table_end = table_offset + entry_count * 8
if table_end > file_size:
raise ValueError(
f"FLX table extends past EOF: entry_count={entry_count} table_end=0x{table_end:X} file_size=0x{file_size:X}"
)
entries: list[CandidateEntry] = []
for index in range(entry_count):
offset = table_offset + index * 8
data_offset = read_u32_le(data, offset)
declared_size = read_u32_le(data, offset + 4)
if data_offset == 0 and declared_size == 0:
continue
if data_offset <= 0 or data_offset > file_size:
continue
if declared_size <= 0:
continue
entries.append(CandidateEntry(offset, data_offset, declared_size))
return FlxTable(
entry_count=entry_count,
table_offset=table_offset,
table_end=table_end,
entries=entries,
)
def dump_chunk(
base_dir: pathlib.Path, chunk_name: str, data: bytes
) -> tuple[str, str, str | None, bool, float, float, str, str | None, list[str], list[str]]:
raw_path = base_dir / f"{chunk_name}.bin"
strings_path = base_dir / f"{chunk_name}.strings.txt"
text_path = base_dir / f"{chunk_name}.txt"
raw_path.write_bytes(data)
runs = iter_printable_runs(data)
strings_path.write_text("\n".join(runs) + ("\n" if runs else ""), encoding="utf-8")
primary_label, field_names = summarize_descriptor(runs)
field_tags = extract_field_tag_records(data, field_names)
text_like = looks_text_like(data)
actual_text_path: str | None = None
if text_like:
text_path.write_text(data.decode("latin-1", errors="replace"), encoding="utf-8")
actual_text_path = str(text_path)
return (
str(raw_path),
str(strings_path),
actual_text_path,
text_like,
printable_ratio(data),
zero_ratio(data),
ascii_preview(data),
primary_label,
field_names,
field_tags,
)
def extract_candidates(data: bytes, out_dir: pathlib.Path, entries: list[CandidateEntry]) -> list[ExtractedChunk]:
chunks_dir = out_dir / "chunks"
chunks_dir.mkdir(parents=True, exist_ok=True)
extracted: list[ExtractedChunk] = []
file_size = len(data)
sorted_entries = sorted(enumerate(entries), key=lambda item: (item[1].data_offset, item[0]))
next_by_original_index: dict[int, int | None] = {}
for position, (original_index, entry) in enumerate(sorted_entries):
next_offset = sorted_entries[position + 1][1].data_offset if position + 1 < len(sorted_entries) else None
next_by_original_index[original_index] = next_offset
for index, entry in enumerate(entries):
next_offset = next_by_original_index.get(index)
chunk_end = min(file_size, entry.data_offset + entry.declared_size)
chunk_data = data[entry.data_offset:chunk_end]
overlap = next_offset is not None and (entry.data_offset + entry.declared_size) > next_offset
chunk_name = (
f"chunk_{index:03d}_table_{entry.table_offset:04X}_off_{entry.data_offset:06X}_len_{entry.declared_size:06X}"
)
raw_path, strings_path, text_path, text_like, print_ratio, z_ratio, preview, primary_label, field_names, field_tags = dump_chunk(
chunks_dir, chunk_name, chunk_data
)
extracted.append(
ExtractedChunk(
index=index,
table_offset=entry.table_offset,
data_offset=entry.data_offset,
declared_size=entry.declared_size,
next_offset=next_offset,
extracted_size=len(chunk_data),
overlap_with_next=overlap,
text_like=text_like,
printable_ratio=round(print_ratio, 4),
zero_ratio=round(z_ratio, 4),
preview=preview,
raw_path=raw_path,
strings_path=strings_path,
text_path=text_path,
primary_label=primary_label,
field_names=field_names,
field_tags=field_tags,
)
)
return extracted
def write_summary(out_dir: pathlib.Path, input_path: pathlib.Path, data: bytes, entries: list[CandidateEntry], chunks: list[ExtractedChunk]) -> None:
summary = {
"input_path": str(input_path),
"file_size": len(data),
"header_preview_hex": data[:128].hex(),
"header_preview_ascii": ascii_preview(data[:128], 128),
"candidate_entries": [asdict(entry) for entry in entries],
"chunks": [asdict(chunk) for chunk in chunks],
}
(out_dir / "summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8")
index_lines = [
"entry_index\ttable_offset\tdata_offset\tdeclared_size\textracted_size\ttext_like\tprintable_ratio\tzero_ratio\toverlap_with_next\tprimary_label\tfield_names\tfield_tags\tpreview"
]
for chunk in chunks:
index_lines.append(
"{index}\t0x{table_offset:X}\t0x{data_offset:X}\t0x{declared_size:X}\t0x{extracted_size:X}\t{text_like}\t{printable_ratio:.4f}\t{zero_ratio:.4f}\t{overlap}\t{primary_label}\t{field_names}\t{field_tags}\t{preview}".format(
index=chunk.index,
table_offset=chunk.table_offset,
data_offset=chunk.data_offset,
declared_size=chunk.declared_size,
extracted_size=chunk.extracted_size,
text_like=int(chunk.text_like),
printable_ratio=chunk.printable_ratio,
zero_ratio=chunk.zero_ratio,
overlap=int(chunk.overlap_with_next),
primary_label=chunk.primary_label or "",
field_names=",".join(chunk.field_names),
field_tags=",".join(chunk.field_tags),
preview=chunk.preview.replace("\t", " "),
)
)
(out_dir / "entry_index.tsv").write_text("\n".join(index_lines) + "\n", encoding="utf-8")
descriptor_lines = [
"entry_index\tprimary_label\tfield_names\tfield_tags\tdata_offset\tdeclared_size"
]
descriptor_chunks = [chunk for chunk in chunks if chunk.primary_label or chunk.field_names]
for chunk in descriptor_chunks:
descriptor_lines.append(
"{index}\t{primary_label}\t{field_names}\t{field_tags}\t0x{data_offset:X}\t0x{declared_size:X}".format(
index=chunk.index,
primary_label=chunk.primary_label or "",
field_names=",".join(chunk.field_names),
field_tags=",".join(chunk.field_tags),
data_offset=chunk.data_offset,
declared_size=chunk.declared_size,
)
)
(out_dir / "descriptor_index.tsv").write_text("\n".join(descriptor_lines) + "\n", encoding="utf-8")
neighborhood_lines = [
"center_index\tneighbor_index\tprimary_label\tfield_names\tfield_tags"
]
interesting = {"JELYHACK", "JELYH2", "NPCTRIG", "CRUZTRIG", "TRIGPAD", "SPECIAL", "EVENT", "SFXTRIG"}
interesting_indices = [chunk.index for chunk in chunks if chunk.primary_label in interesting]
seen_pairs: set[tuple[int, int]] = set()
chunk_by_index = {chunk.index: chunk for chunk in chunks}
for center_index in interesting_indices:
for neighbor_index in range(max(0, center_index - 4), min(len(chunks), center_index + 5)):
pair = (center_index, neighbor_index)
if pair in seen_pairs:
continue
seen_pairs.add(pair)
chunk = chunk_by_index[neighbor_index]
neighborhood_lines.append(
"{center_index}\t{neighbor_index}\t{primary_label}\t{field_names}\t{field_tags}".format(
center_index=center_index,
neighbor_index=neighbor_index,
primary_label=chunk.primary_label or "",
field_names=",".join(chunk.field_names),
field_tags=",".join(chunk.field_tags),
)
)
(out_dir / "descriptor_neighborhoods.tsv").write_text("\n".join(neighborhood_lines) + "\n", encoding="utf-8")
anchor_graph_lines = [
"anchor_index\tanchor_label\tanchor_fields\tneighbor_index\tdistance\tneighbor_label\tneighbor_fields\tneighbor_role\tevent_evidence"
]
for anchor in descriptor_chunks:
if not anchor.primary_label or not has_referent_field(anchor):
continue
for neighbor_index in range(max(0, anchor.index - 5), min(len(chunks), anchor.index + 6)):
if neighbor_index == anchor.index:
continue
neighbor = chunk_by_index[neighbor_index]
event_evidence = get_event_evidence(neighbor)
if not event_evidence:
continue
anchor_graph_lines.append(
"{anchor_index}\t{anchor_label}\t{anchor_fields}\t{neighbor_index}\t{distance:+d}\t{neighbor_label}\t{neighbor_fields}\t{neighbor_role}\t{event_evidence}".format(
anchor_index=anchor.index,
anchor_label=anchor.primary_label,
anchor_fields=",".join(anchor.field_names),
neighbor_index=neighbor.index,
distance=neighbor.index - anchor.index,
neighbor_label=neighbor.primary_label or "",
neighbor_fields=",".join(neighbor.field_names),
neighbor_role=chunk_role(neighbor),
event_evidence=",".join(event_evidence),
)
)
(out_dir / "referent_anchor_event_graph.tsv").write_text("\n".join(anchor_graph_lines) + "\n", encoding="utf-8")
write_island_graph(
out_dir,
"jelyhack_island_graph.md",
"JELYHACK Island Graph",
{"JELYHACK", "JELYH2"},
descriptor_chunks,
chunk_by_index,
len(chunks),
)
write_descriptor_compare(
out_dir,
"jelyhack_descriptor_compare.tsv",
{"JELYHACK", "JELYH2", "REE_BOOT", "SURCAMEW", "SFXTRIG"},
descriptor_chunks,
)
write_island_graph(
out_dir,
"event_island_graph.md",
"EVENT Cluster Graph",
{"EVENT", "COR_BOOT", "NPCTRIG", "ROLL_NS", "CRUZTRIG"},
descriptor_chunks,
chunk_by_index,
len(chunks),
)
write_descriptor_compare(
out_dir,
"event_descriptor_compare.tsv",
{"ROLL_NS", "COR_BOOT", "EVENT", "NPCTRIG", "CRUZTRIG", "NPC_ONLY", "VMAIL"},
descriptor_chunks,
)
write_island_graph(
out_dir,
"boot_frontier_graph.md",
"AND/BRO Boot Frontier Graph",
{"AND_BOOT", "BRO_BOOT"},
descriptor_chunks,
chunk_by_index,
len(chunks),
window=6,
)
write_descriptor_compare(
out_dir,
"boot_family_compare.tsv",
{"AND_BOOT", "BRO_BOOT", "COR_BOOT", "VAR_BOOT", "REE_BOOT"},
descriptor_chunks,
)
write_island_graph(
out_dir,
"environmental_event_graph.md",
"Environmental Event Graph",
{"FLAMEBOX", "NOSTRIL", "STEAMBOX"},
descriptor_chunks,
chunk_by_index,
len(chunks),
window=5,
)
write_descriptor_compare(
out_dir,
"environmental_family_compare.tsv",
{"FLAMEBOX", "NOSTRIL", "STEAMBOX"},
descriptor_chunks,
)
write_descriptor_compare(
out_dir,
"callback_trigger_compare.tsv",
{"SURCAMNS", "SURCAMEW"},
descriptor_chunks,
)
write_event_family_reports(out_dir, descriptor_chunks, chunk_by_index, len(chunks))
lines = []
lines.append("# EUSECODE.FLX First-Pass Extraction")
lines.append("")
lines.append(f"Input: {input_path}")
lines.append(f"File size: 0x{len(data):X} ({len(data)} bytes)")
lines.append(f"Candidate entries: {len(entries)}")
lines.append("")
lines.append("## Header Preview")
lines.append("")
lines.append(f"ASCII: `{ascii_preview(data[:128], 128)}`")
lines.append("")
lines.append("## Chunks")
lines.append("")
lines.append("| # | Table Off | Data Off | Declared Size | Next Off | Text | Overlap | Preview |")
lines.append("|---:|---:|---:|---:|---:|:---:|:---:|---|")
for chunk in chunks:
next_off = f"0x{chunk.next_offset:X}" if chunk.next_offset is not None else "-"
lines.append(
"| {index} | 0x{table_offset:X} | 0x{data_offset:X} | 0x{declared_size:X} | {next_off} | {text_like} | {overlap} | {preview} |".format(
index=chunk.index,
table_offset=chunk.table_offset,
data_offset=chunk.data_offset,
declared_size=chunk.declared_size,
next_off=next_off,
text_like="yes" if chunk.text_like else "no",
overlap="yes" if chunk.overlap_with_next else "no",
preview=chunk.preview.replace("|", "/"),
)
)
lines.append("")
lines.append("## Notes")
lines.append("")
lines.append("- The extractor now parses the validated FLX table directly: entry count at `0x54`, table at `0x80`, 8 bytes per entry.")
lines.append("- Overlapping declared sizes likely mean some entries are counts or record spans rather than exact chunk lengths.")
lines.append("- `.strings.txt` files are the main human-readable output for now; `.txt` files are emitted only for chunks that look text-like.")
lines.append("- `descriptor_index.tsv` summarizes guessed class labels, field names, and compact tag patterns for descriptor-like chunks.")
lines.append("- `descriptor_neighborhoods.tsv` captures local table neighborhoods around trigger/event-related classes such as `JELYHACK`, `NPCTRIG`, `CRUZTRIG`, `TRIGPAD`, and `SPECIAL`.")
lines.append("- `referent_anchor_event_graph.tsv` groups referent-bearing descriptors with nearby event-bearing neighbors so the attachment model can be inspected without ad hoc grepping.")
lines.append("- `jelyhack_island_graph.md` renders the first focused graph view for the `JELYHACK` / `JELYH2` neighborhood, marking likely event-bearing attachments such as `REE_BOOT`, `SURCAMEW`, and `SFXTRIG` when they appear within the local table window.")
lines.append("- `jelyhack_descriptor_compare.tsv` captures the first 16 header words, first 8 dwords, and a few odd printable markers for the core JELYHACK-island descriptors so structural similarity can be compared without raw hex dumps.")
lines.append("- `event_island_graph.md` renders the denser `EVENT` / `COR_BOOT` / `NPCTRIG` / `ROLL_NS` / `CRUZTRIG` island, which currently looks like the strongest event-explicit neighborhood outside the JELYHACK anchor case.")
lines.append("- `event_descriptor_compare.tsv` captures the same header-word and printable-marker comparison for the `EVENT` island so large event-bearing descriptors can be contrasted with neighboring trigger and referent records.")
lines.append("- `boot_frontier_graph.md` renders the upstream referent neighborhood feeding `AND_BOOT` / `BRO_BOOT`, which is currently the clearest unexplored boot-event frontier.")
lines.append("- `boot_family_compare.tsv` compares the five `_BOOT` event cores (`AND_BOOT`, `BRO_BOOT`, `COR_BOOT`, `VAR_BOOT`, `REE_BOOT`) by header words, markers, and field tags.")
lines.append("- `environmental_event_graph.md` renders the three hazard/event islands centered on `FLAMEBOX`, `NOSTRIL`, and `STEAMBOX`, each surrounded by its own referent-heavy local neighborhood.")
lines.append("- `environmental_family_compare.tsv` compares the environmental event trio so the shared hazard pattern (`referent,event,<hazard>,<hazard2>,direction,count`) can be contrasted directly.")
lines.append("- `callback_trigger_compare.tsv` compares `SURCAMNS` and `SURCAMEW` directly so the callback-only `eventTrigger` lane can be checked against the active `event` families without raw hex dumps.")
lines.append("- `event_family_index.tsv` and `event_family_summary.md` classify all current `event` and `eventTrigger` descriptors into reusable families such as boot-event cores, minimal event cores, environmental events, and callback-only surveillance triggers.")
(out_dir / "README.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
all_strings = iter_printable_runs(data)
(out_dir / "all_strings.txt").write_text("\n".join(all_strings) + ("\n" if all_strings else ""), encoding="utf-8")
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("input", nargs="?", type=pathlib.Path, default=DEFAULT_INPUT)
parser.add_argument("output", nargs="?", type=pathlib.Path, default=DEFAULT_OUTPUT)
return parser.parse_args()
def main() -> int:
args = parse_args()
data = args.input.read_bytes()
args.output.mkdir(parents=True, exist_ok=True)
flx_table = parse_flx_table(data)
entries = flx_table.entries
chunks = extract_candidates(data, args.output, entries)
write_summary(args.output, args.input, data, entries, chunks)
print(
f"Parsed {flx_table.entry_count} table slots with {len(chunks)} non-zero entries; extracted to {args.output}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())