Crusader_Decomp/tmp_immortality_scan.py

import csv
import pathlib
import struct

ROOT = pathlib.Path(r"K:/ghidra/Crusader_Decomp")
TARGETS = {189, 190, 191, 272, 273, 283, 285}
TARGET_COMPARE_CLASSES = {"NPCTRIG", "COR_BOOT", "REE_BOOT", "SFXTRIG"}


def find_all(haystack: bytes, needle: bytes) -> list[int]:
    offsets: list[int] = []
    start = 0
    while True:
        found = haystack.find(needle, start)
        if found < 0:
            return offsets
        offsets.append(found)
        start = found + 1

def lcp(left: bytes, right: bytes) -> int:
    count = 0
    limit = min(len(left), len(right))
    while count < limit and left[count] == right[count]:
        count += 1
    return count


def lcs(left: bytes, right: bytes) -> int:
    count = 0
    limit = min(len(left), len(right))
    while count < limit and left[-1 - count] == right[-1 - count]:
        count += 1
    return count


rows = list(
    csv.DictReader(
        (ROOT / "USECODE/EUSECODE_extracted/class_event_index.tsv").open("r", encoding="utf-8"),
        delimiter="\t",
    )
)
rows_by_entry: dict[int, list[dict[str, object]]] = {}
for row in rows:
    entry_index = int(row["entry_index"])
    if entry_index not in TARGETS:
        continue
    if not row["derived_body_start"]:
        continue
    rows_by_entry.setdefault(entry_index, []).append(
        {
            "class_name": row["class_name_hint"],
            "slot": int(row["slot"], 0),
            "event_name_hint": row["event_name_hint"],
            "body_start": int(row["derived_body_start"], 0),
            "body_end": int(row["derived_body_end"], 0),
        }
    )

chunk_files: dict[int, pathlib.Path] = {}
for chunk_path in (ROOT / "USECODE/EUSECODE_extracted/chunks").glob("chunk_*.bin"):
    entry_index = int(chunk_path.name.split("_")[1])
    if entry_index in rows_by_entry:
        chunk_files[entry_index] = chunk_path

bodies: dict[tuple[str, int], bytes] = {}
for entry_index in sorted(rows_by_entry):
    chunk_path = chunk_files[entry_index]
    data = chunk_path.read_bytes()
    class_name = str(rows_by_entry[entry_index][0]["class_name"])
    print(f"ENTRY {entry_index} {class_name} FILE {chunk_path.name}")
    for row in sorted(rows_by_entry[entry_index], key=lambda item: int(item["body_start"])):
        body = data[int(row["body_start"]):int(row["body_end"])]
        class_name = str(row["class_name"])
        slot = int(row["slot"])
        bodies[(class_name, slot)] = body
        hits_0410_16 = find_all(body, struct.pack("<H", 0x0410))
        hits_0410_32 = find_all(body, struct.pack("<I", 0x00000410))
        hits_1004_16 = find_all(body, struct.pack("<H", 0x1004))
        print(
            "BODY class={class_name} slot=0x{slot:02X} hint={hint} start=0x{start:04X} end=0x{end:04X} len={length} le16_0410={count16}:{offs16} le32_00000410={count32}:{offs32} le16_1004={count1004}:{offs1004} first16={first16} last16={last16}".format(
                class_name=class_name,
                slot=slot,
                hint=str(row["event_name_hint"] or "-"),
                start=int(row["body_start"]),
                end=int(row["body_end"]),
                length=len(body),
                count16=len(hits_0410_16),
                offs16=",".join(f"0x{offset:04X}" for offset in hits_0410_16[:16]) or "-",
                count32=len(hits_0410_32),
                offs32=",".join(f"0x{offset:04X}" for offset in hits_0410_32[:16]) or "-",
                count1004=len(hits_1004_16),
                offs1004=",".join(f"0x{offset:04X}" for offset in hits_1004_16[:16]) or "-",
                first16=body[:16].hex(),
                last16=body[-16:].hex(),
            )
        )
    print()

print("TOP_STRUCTURAL_PAIRS")
comparisons: list[tuple[int, int, int, tuple[str, int], tuple[str, int], int, int]] = []
compare_keys = [key for key in bodies if key[0] in TARGET_COMPARE_CLASSES]
for left_index, left_key in enumerate(compare_keys):
    for right_key in compare_keys[left_index + 1:]:
        left_body = bodies[left_key]
        right_body = bodies[right_key]
        prefix = lcp(left_body, right_body)
        suffix = lcs(left_body, right_body)
        comparisons.append((prefix + suffix, prefix, suffix, left_key, right_key, len(left_body), len(right_body)))
comparisons.sort(reverse=True)
for total, prefix, suffix, left_key, right_key, left_len, right_len in comparisons[:12]:
    print(
        f"PAIR {left_key[0]}:0x{left_key[1]:02X} len={left_len} <-> {right_key[0]}:0x{right_key[1]:02X} len={right_len} prefix={prefix} suffix={suffix} total={total}"
    )
Add 'annotate-usecode' command to import USECODE IR JSON annotations - Introduced a new command 'annotate-usecode' to import USECODE IR JSON annotation hints as Ghidra comments on compiled anchors. - Added argument parsing for multiple IR JSON files, comment type selection, and a dry-run option. - Implemented logic to read annotation records from the provided IR files and set comments on the corresponding addresses in Ghidra. - Enhanced JSON schema to include response structure for the new command. 2026-03-24 18:14:20 +01:00			`import csv`
			`import pathlib`
			`import struct`

			`ROOT = pathlib.Path(r"K:/ghidra/Crusader_Decomp")`
			`TARGETS = {189, 190, 191, 272, 273, 283, 285}`
			`TARGET_COMPARE_CLASSES = {"NPCTRIG", "COR_BOOT", "REE_BOOT", "SFXTRIG"}`


			`def find_all(haystack: bytes, needle: bytes) -> list[int]:`
			`offsets: list[int] = []`
			`start = 0`
			`while True:`
			`found = haystack.find(needle, start)`
			`if found < 0:`
			`return offsets`
			`offsets.append(found)`
			`start = found + 1`

			`def lcp(left: bytes, right: bytes) -> int:`
			`count = 0`
			`limit = min(len(left), len(right))`
			`while count < limit and left[count] == right[count]:`
			`count += 1`
			`return count`


			`def lcs(left: bytes, right: bytes) -> int:`
			`count = 0`
			`limit = min(len(left), len(right))`
			`while count < limit and left[-1 - count] == right[-1 - count]:`
			`count += 1`
			`return count`


			`rows = list(`
			`csv.DictReader(`
			`(ROOT / "USECODE/EUSECODE_extracted/class_event_index.tsv").open("r", encoding="utf-8"),`
			`delimiter="\t",`
			`)`
			`)`
			`rows_by_entry: dict[int, list[dict[str, object]]] = {}`
			`for row in rows:`
			`entry_index = int(row["entry_index"])`
			`if entry_index not in TARGETS:`
			`continue`
			`if not row["derived_body_start"]:`
			`continue`
			`rows_by_entry.setdefault(entry_index, []).append(`
			`{`
			`"class_name": row["class_name_hint"],`
			`"slot": int(row["slot"], 0),`
			`"event_name_hint": row["event_name_hint"],`
			`"body_start": int(row["derived_body_start"], 0),`
			`"body_end": int(row["derived_body_end"], 0),`
			`}`
			`)`

			`chunk_files: dict[int, pathlib.Path] = {}`
			`for chunk_path in (ROOT / "USECODE/EUSECODE_extracted/chunks").glob("chunk_*.bin"):`
			`entry_index = int(chunk_path.name.split("_")[1])`
			`if entry_index in rows_by_entry:`
			`chunk_files[entry_index] = chunk_path`

			`bodies: dict[tuple[str, int], bytes] = {}`
			`for entry_index in sorted(rows_by_entry):`
			`chunk_path = chunk_files[entry_index]`
			`data = chunk_path.read_bytes()`
			`class_name = str(rows_by_entry[entry_index][0]["class_name"])`
			`print(f"ENTRY {entry_index} {class_name} FILE {chunk_path.name}")`
			`for row in sorted(rows_by_entry[entry_index], key=lambda item: int(item["body_start"])):`
			`body = data[int(row["body_start"]):int(row["body_end"])]`
			`class_name = str(row["class_name"])`
			`slot = int(row["slot"])`
			`bodies[(class_name, slot)] = body`
			`hits_0410_16 = find_all(body, struct.pack("<H", 0x0410))`
			`hits_0410_32 = find_all(body, struct.pack("<I", 0x00000410))`
			`hits_1004_16 = find_all(body, struct.pack("<H", 0x1004))`
			`print(`
			`"BODY class={class_name} slot=0x{slot:02X} hint={hint} start=0x{start:04X} end=0x{end:04X} len={length} le16_0410={count16}:{offs16} le32_00000410={count32}:{offs32} le16_1004={count1004}:{offs1004} first16={first16} last16={last16}".format(`
			`class_name=class_name,`
			`slot=slot,`
			`hint=str(row["event_name_hint"] or "-"),`
			`start=int(row["body_start"]),`
			`end=int(row["body_end"]),`
			`length=len(body),`
			`count16=len(hits_0410_16),`
			`offs16=",".join(f"0x{offset:04X}" for offset in hits_0410_16[:16]) or "-",`
			`count32=len(hits_0410_32),`
			`offs32=",".join(f"0x{offset:04X}" for offset in hits_0410_32[:16]) or "-",`
			`count1004=len(hits_1004_16),`
			`offs1004=",".join(f"0x{offset:04X}" for offset in hits_1004_16[:16]) or "-",`
			`first16=body[:16].hex(),`
			`last16=body[-16:].hex(),`
			`)`
			`)`
			`print()`

			`print("TOP_STRUCTURAL_PAIRS")`
			`comparisons: list[tuple[int, int, int, tuple[str, int], tuple[str, int], int, int]] = []`
			`compare_keys = [key for key in bodies if key[0] in TARGET_COMPARE_CLASSES]`
			`for left_index, left_key in enumerate(compare_keys):`
			`for right_key in compare_keys[left_index + 1:]:`
			`left_body = bodies[left_key]`
			`right_body = bodies[right_key]`
			`prefix = lcp(left_body, right_body)`
			`suffix = lcs(left_body, right_body)`
			`comparisons.append((prefix + suffix, prefix, suffix, left_key, right_key, len(left_body), len(right_body)))`
			`comparisons.sort(reverse=True)`
			`for total, prefix, suffix, left_key, right_key, left_len, right_len in comparisons[:12]:`
			`print(`
			`f"PAIR {left_key[0]}:0x{left_key[1]:02X} len={left_len} <-> {right_key[0]}:0x{right_key[1]:02X} len={right_len} prefix={prefix} suffix={suffix} total={total}"`
			`)`