import csv import glob import hashlib import json from pathlib import Path ROOT = Path(__file__).resolve().parents[1] LAYOUT_PATH = ROOT / "USECODE" / "EUSECODE_extracted" / "class_layout_index.tsv" EVENT_PATH = ROOT / "USECODE" / "EUSECODE_extracted" / "class_event_index.tsv" CHUNKS_DIR = ROOT / "USECODE" / "EUSECODE_extracted" / "chunks" FAMILIES = { "BOOT": { "classes": ["AND_BOOT", "BRO_BOOT", "COR_BOOT", "REE_BOOT", "VAR_BOOT"], "slots": [0x0A, 0x0F, 0x10], }, "SURCAM": { "classes": ["SURCAMNS", "SURCAMEW"], "slots": [0x20, 0x21, 0x22], }, "JELY": { "classes": ["JELYHACK", "JELYH2"], "slots": [0x01], }, } def parse_hex(value: str) -> int: return int(value, 16) def common_prefix_length(blobs: list[bytes]) -> int: if not blobs: return 0 limit = min(len(blob) for blob in blobs) for index in range(limit): current = blobs[0][index] if any(blob[index] != current for blob in blobs[1:]): return index return limit def common_suffix_length(blobs: list[bytes]) -> int: return common_prefix_length([blob[::-1] for blob in blobs]) def first_diff_positions(blobs: list[bytes], limit: int = 8) -> list[int]: positions: list[int] = [] max_len = max(len(blob) for blob in blobs) for index in range(max_len): values = {blob[index] if index < len(blob) else None for blob in blobs} if len(values) > 1: positions.append(index) if len(positions) >= limit: break return positions def load_layouts(targets: set[str]) -> dict[str, dict[str, str]]: layouts: dict[str, dict[str, str]] = {} with LAYOUT_PATH.open("r", encoding="utf-8", newline="") as handle: reader = csv.DictReader(handle, delimiter="\t") for row in reader: if row["class_name_hint"] in targets: layouts[row["class_name_hint"]] = row return layouts def load_events(targets: set[str]) -> dict[str, list[dict[str, str]]]: events: dict[str, list[dict[str, str]]] = {} with EVENT_PATH.open("r", encoding="utf-8", newline="") as handle: reader = csv.DictReader(handle, delimiter="\t") for row in reader: if row["class_name_hint"] in targets: events.setdefault(row["class_name_hint"], []).append(row) for rows in events.values(): rows.sort(key=lambda row: parse_hex(row["slot"])) return events def resolve_chunk(data_offset: int) -> Path: matches = glob.glob(str(CHUNKS_DIR / f"chunk_*_off_{data_offset:06X}_len_*.bin")) if len(matches) != 1: raise RuntimeError(f"chunk lookup failed for 0x{data_offset:06X}: {matches}") return Path(matches[0]) def build_rows() -> dict[tuple[str, int], dict[str, object]]: targets = {name for family in FAMILIES.values() for name in family["classes"]} layouts = load_layouts(targets) events = load_events(targets) rows_by_key: dict[tuple[str, int], dict[str, object]] = {} for class_name, layout in layouts.items(): chunk_path = resolve_chunk(parse_hex(layout["data_offset"])) blob = chunk_path.read_bytes() code_base_minus_one = parse_hex(layout["code_base_minus_one"]) nonzero_rows = [row for row in events[class_name] if parse_hex(row["raw_code_offset"]) != 0] offsets = sorted({parse_hex(row["raw_code_offset"]) for row in nonzero_rows}) for row in nonzero_rows: slot = parse_hex(row["slot"]) code_offset = parse_hex(row["raw_code_offset"]) start = code_base_minus_one + code_offset next_offsets = [offset for offset in offsets if offset > code_offset] end = code_base_minus_one + next_offsets[0] if next_offsets else len(blob) body = blob[start:end] rows_by_key[(class_name, slot)] = { "class_name": class_name, "slot": slot, "event_name_hint": row["event_name_hint"], "raw_event_entry_word": row["raw_event_entry_word"], "raw_code_offset": row["raw_code_offset"], "start": start, "end": end, "length": len(body), "sha1": hashlib.sha1(body).hexdigest(), "preview": body[:16].hex(" "), "chunk_path": str(chunk_path).replace("\\", "/"), "body": body, } return rows_by_key def main() -> None: rows_by_key = build_rows() for family_name, family in FAMILIES.items(): print(f"## {family_name}") for slot in family["slots"]: print(f"SLOT 0x{slot:02X}") subset = [rows_by_key[(class_name, slot)] for class_name in family["classes"]] for row in subset: print( "\t".join( [ str(row["class_name"]), str(row["event_name_hint"]), str(row["raw_event_entry_word"]), str(row["raw_code_offset"]), f"{row['start']:04X}-{row['end']:04X}", str(row["length"]), str(row["sha1"])[:12], str(row["preview"]), str(row["chunk_path"]), ] ) ) groups: dict[str, list[str]] = {} for row in subset: groups.setdefault(str(row["sha1"]), []).append(str(row["class_name"])) blobs = [row["body"] for row in subset] print("identical_groups=" + json.dumps(list(groups.values()))) print( f"common_prefix_len={common_prefix_length(blobs)} " f"common_suffix_len={common_suffix_length(blobs)}" ) print("first_diff_positions=" + json.dumps(first_diff_positions(blobs))) print() if __name__ == "__main__": main()