#!/usr/bin/env python3 """ Ghidra Fixup Annotator — Batch-applies NE relocation fixup comments. Reads ne_reloc_far_calls.tsv and applies a disassembly comment at each CALLF 0000:ffff call site identifying the resolved real target. Usage via Ghidra MCP: call set_disassembly_comment for each entry. Usage standalone: generates a Ghidra Python script (.py) for GhidraScript runner. This script generates the Ghidra-side .py script. """ import os, csv BASE_DIR = r'k:\ghidra\Crusader_Decomp' TSV_PATH = os.path.join(BASE_DIR, 'ne_reloc_far_calls.tsv') IMPORT_TSV = os.path.join(BASE_DIR, 'ne_reloc_far_imports.tsv') OUT_SCRIPT = os.path.join(BASE_DIR, 'ghidra_apply_fixup_comments.py') def ghidra_addr_to_flat(addr_str): """Convert 'SSSS:OOOO' to flat integer.""" seg, off = addr_str.split(':') return (int(seg, 16) << 16) + int(off, 16) def main(): # Read internal far call fixups entries = [] with open(TSV_PATH, 'r') as f: reader = csv.DictReader(f, delimiter='\t') for row in reader: entries.append(row) # Read import far call fixups imports = [] with open(IMPORT_TSV, 'r') as f: reader = csv.DictReader(f, delimiter='\t') for row in reader: imports.append(row) print(f"Loaded {len(entries)} internal far-call fixups") print(f"Loaded {len(imports)} import far-call fixups") # Generate Ghidra Python script # The CALLF instruction is at source_ghidra (the opcode byte 0x9A). # The comment should go on that instruction address. lines = [] lines.append('# Auto-generated NE relocation fixup comments for CRUSADER-RAW.EXE') lines.append('# Resolves every CALLF 0x0000:ffff to its real NE target.') lines.append('# Run this in Ghidra Script Manager (Python/Jython).') lines.append('') lines.append('from ghidra.program.model.listing import CodeUnit') lines.append('from ghidra.program.model.address import AddressSet') lines.append('') lines.append('listing = currentProgram.getListing()') lines.append('space = currentProgram.getAddressFactory().getDefaultAddressSpace()') lines.append('count = 0') lines.append('errors = 0') lines.append('') lines.append('def set_comment(flat_addr, comment):') lines.append(' global count, errors') lines.append(' try:') lines.append(' addr = space.getAddress(flat_addr)') lines.append(' cu = listing.getCodeUnitAt(addr)') lines.append(' if cu is not None:') lines.append(' cu.setComment(CodeUnit.EOL_COMMENT, comment)') lines.append(' count += 1') lines.append(' else:') lines.append(' errors += 1') lines.append(' except Exception as e:') lines.append(' errors += 1') lines.append('') # Internal fixups: comment = "NE FIXUP -> target_ghidra (target_label)" for row in entries: src = row['source_ghidra'] tgt = row['target_ghidra'] label = row['target_label'] # The CALLF opcode is at source_ghidra, operand at +1. # But the TSV source_ghidra is the OPERAND address (file_offset of the fixup). # We need the CALLF instruction address = source - 1. flat = ghidra_addr_to_flat(src) - 1 # back up 1 byte to the 0x9A opcode comment = f"NE FIXUP -> {tgt} ({label})" lines.append(f'set_comment(0x{flat:X}, "{comment}")') # Import fixups for row in imports: src = row['source_ghidra'] tgt = row['target'] flat = ghidra_addr_to_flat(src) - 1 comment = f"NE IMPORT -> {tgt}" lines.append(f'set_comment(0x{flat:X}, "{comment}")') lines.append('') lines.append(f'println("Applied %d fixup comments (%d errors)" % (count, errors))') with open(OUT_SCRIPT, 'w') as f: f.write('\n'.join(lines)) print(f"Generated Ghidra script: {OUT_SCRIPT}") print(f" {len(entries) + len(imports)} comment operations") print(f" Copy to your Ghidra scripts directory and run from Script Manager.") # Also generate a quick stats summary # Count unique targets target_counts = {} for row in entries: tgt = row['target_ghidra'] target_counts[tgt] = target_counts.get(tgt, 0) + 1 print(f"\n Unique internal far-call targets: {len(target_counts)}") print(f"\n Top 20 most-called targets:") for tgt, cnt in sorted(target_counts.items(), key=lambda x: -x[1])[:20]: # Find the label label = next((r['target_label'] for r in entries if r['target_ghidra'] == tgt), '?') print(f" {tgt} ({label}) — {cnt} call sites") if __name__ == '__main__': main()