Add 'annotate-usecode' command to import USECODE IR JSON annotations

- Introduced a new command 'annotate-usecode' to import USECODE IR JSON annotation hints as Ghidra comments on compiled anchors. - Added argument parsing for multiple IR JSON files, comment type selection, and a dry-run option. - Implemented logic to read annotation records from the provided IR files and set comments on the corresponding addresses in Ghidra. - Enhanced JSON schema to include response structure for the new command.
2026-03-24 18:14:20 +01:00 · 2026-03-24 18:14:20 +01:00 · daa363c3d2
commit daa363c3d2
parent 4d3c8cd81b
39 changed files with 41450 additions and 871 deletions
--- a/tmp_immortality_scan.py
+++ b/tmp_immortality_scan.py
@ -0,0 +1,113 @@
+import csv
+import pathlib
+import struct
+
+ROOT = pathlib.Path(r"K:/ghidra/Crusader_Decomp")
+TARGETS = {189, 190, 191, 272, 273, 283, 285}
+TARGET_COMPARE_CLASSES = {"NPCTRIG", "COR_BOOT", "REE_BOOT", "SFXTRIG"}
+
+
+def find_all(haystack: bytes, needle: bytes) -> list[int]:
+    offsets: list[int] = []
+    start = 0
+    while True:
+        found = haystack.find(needle, start)
+        if found < 0:
+            return offsets
+        offsets.append(found)
+        start = found + 1
+
+def lcp(left: bytes, right: bytes) -> int:
+    count = 0
+    limit = min(len(left), len(right))
+    while count < limit and left[count] == right[count]:
+        count += 1
+    return count
+
+
+def lcs(left: bytes, right: bytes) -> int:
+    count = 0
+    limit = min(len(left), len(right))
+    while count < limit and left[-1 - count] == right[-1 - count]:
+        count += 1
+    return count
+
+
+rows = list(
+    csv.DictReader(
+        (ROOT / "USECODE/EUSECODE_extracted/class_event_index.tsv").open("r", encoding="utf-8"),
+        delimiter="\t",
+    )
+)
+rows_by_entry: dict[int, list[dict[str, object]]] = {}
+for row in rows:
+    entry_index = int(row["entry_index"])
+    if entry_index not in TARGETS:
+        continue
+    if not row["derived_body_start"]:
+        continue
+    rows_by_entry.setdefault(entry_index, []).append(
+        {
+            "class_name": row["class_name_hint"],
+            "slot": int(row["slot"], 0),
+            "event_name_hint": row["event_name_hint"],
+            "body_start": int(row["derived_body_start"], 0),
+            "body_end": int(row["derived_body_end"], 0),
+        }
+    )
+
+chunk_files: dict[int, pathlib.Path] = {}
+for chunk_path in (ROOT / "USECODE/EUSECODE_extracted/chunks").glob("chunk_*.bin"):
+    entry_index = int(chunk_path.name.split("_")[1])
+    if entry_index in rows_by_entry:
+        chunk_files[entry_index] = chunk_path
+
+bodies: dict[tuple[str, int], bytes] = {}
+for entry_index in sorted(rows_by_entry):
+    chunk_path = chunk_files[entry_index]
+    data = chunk_path.read_bytes()
+    class_name = str(rows_by_entry[entry_index][0]["class_name"])
+    print(f"ENTRY {entry_index} {class_name} FILE {chunk_path.name}")
+    for row in sorted(rows_by_entry[entry_index], key=lambda item: int(item["body_start"])):
+        body = data[int(row["body_start"]):int(row["body_end"])]
+        class_name = str(row["class_name"])
+        slot = int(row["slot"])
+        bodies[(class_name, slot)] = body
+        hits_0410_16 = find_all(body, struct.pack("<H", 0x0410))
+        hits_0410_32 = find_all(body, struct.pack("<I", 0x00000410))
+        hits_1004_16 = find_all(body, struct.pack("<H", 0x1004))
+        print(
+            "BODY class={class_name} slot=0x{slot:02X} hint={hint} start=0x{start:04X} end=0x{end:04X} len={length} le16_0410={count16}:{offs16} le32_00000410={count32}:{offs32} le16_1004={count1004}:{offs1004} first16={first16} last16={last16}".format(
+                class_name=class_name,
+                slot=slot,
+                hint=str(row["event_name_hint"] or "-"),
+                start=int(row["body_start"]),
+                end=int(row["body_end"]),
+                length=len(body),
+                count16=len(hits_0410_16),
+                offs16=",".join(f"0x{offset:04X}" for offset in hits_0410_16[:16]) or "-",
+                count32=len(hits_0410_32),
+                offs32=",".join(f"0x{offset:04X}" for offset in hits_0410_32[:16]) or "-",
+                count1004=len(hits_1004_16),
+                offs1004=",".join(f"0x{offset:04X}" for offset in hits_1004_16[:16]) or "-",
+                first16=body[:16].hex(),
+                last16=body[-16:].hex(),
+            )
+        )
+    print()
+
+print("TOP_STRUCTURAL_PAIRS")
+comparisons: list[tuple[int, int, int, tuple[str, int], tuple[str, int], int, int]] = []
+compare_keys = [key for key in bodies if key[0] in TARGET_COMPARE_CLASSES]
+for left_index, left_key in enumerate(compare_keys):
+    for right_key in compare_keys[left_index + 1:]:
+        left_body = bodies[left_key]
+        right_body = bodies[right_key]
+        prefix = lcp(left_body, right_body)
+        suffix = lcs(left_body, right_body)
+        comparisons.append((prefix + suffix, prefix, suffix, left_key, right_key, len(left_body), len(right_body)))
+comparisons.sort(reverse=True)
+for total, prefix, suffix, left_key, right_key, left_len, right_len in comparisons[:12]:
+    print(
+        f"PAIR {left_key[0]}:0x{left_key[1]:02X} len={left_len} <-> {right_key[0]}:0x{right_key[1]:02X} len={right_len} prefix={prefix} suffix={suffix} total={total}"
+    )