Crusader_Decomp/ghidra_fixup_annotator.py

121 lines
4.6 KiB
Python

#!/usr/bin/env python3
"""
Ghidra Fixup Annotator — Batch-applies NE relocation fixup comments.
Reads ne_reloc_far_calls.tsv and applies a disassembly comment at each
CALLF 0000:ffff call site identifying the resolved real target.
Usage via Ghidra MCP: call set_disassembly_comment for each entry.
Usage standalone: generates a Ghidra Python script (.py) for GhidraScript runner.
This script generates the Ghidra-side .py script.
"""
import os, csv
BASE_DIR = r'k:\ghidra\Crusader_Decomp'
TSV_PATH = os.path.join(BASE_DIR, 'ne_reloc_far_calls.tsv')
IMPORT_TSV = os.path.join(BASE_DIR, 'ne_reloc_far_imports.tsv')
OUT_SCRIPT = os.path.join(BASE_DIR, 'ghidra_apply_fixup_comments.py')
def ghidra_addr_to_flat(addr_str):
"""Convert 'SSSS:OOOO' to flat integer."""
seg, off = addr_str.split(':')
return (int(seg, 16) << 16) + int(off, 16)
def main():
# Read internal far call fixups
entries = []
with open(TSV_PATH, 'r') as f:
reader = csv.DictReader(f, delimiter='\t')
for row in reader:
entries.append(row)
# Read import far call fixups
imports = []
with open(IMPORT_TSV, 'r') as f:
reader = csv.DictReader(f, delimiter='\t')
for row in reader:
imports.append(row)
print(f"Loaded {len(entries)} internal far-call fixups")
print(f"Loaded {len(imports)} import far-call fixups")
# Generate Ghidra Python script
# The CALLF instruction is at source_ghidra (the opcode byte 0x9A).
# The comment should go on that instruction address.
lines = []
lines.append('# Auto-generated NE relocation fixup comments for CRUSADER-RAW.EXE')
lines.append('# Resolves every CALLF 0x0000:ffff to its real NE target.')
lines.append('# Run this in Ghidra Script Manager (Python/Jython).')
lines.append('')
lines.append('from ghidra.program.model.listing import CodeUnit')
lines.append('from ghidra.program.model.address import AddressSet')
lines.append('')
lines.append('listing = currentProgram.getListing()')
lines.append('space = currentProgram.getAddressFactory().getDefaultAddressSpace()')
lines.append('count = 0')
lines.append('errors = 0')
lines.append('')
lines.append('def set_comment(flat_addr, comment):')
lines.append(' global count, errors')
lines.append(' try:')
lines.append(' addr = space.getAddress(flat_addr)')
lines.append(' cu = listing.getCodeUnitAt(addr)')
lines.append(' if cu is not None:')
lines.append(' cu.setComment(CodeUnit.EOL_COMMENT, comment)')
lines.append(' count += 1')
lines.append(' else:')
lines.append(' errors += 1')
lines.append(' except Exception as e:')
lines.append(' errors += 1')
lines.append('')
# Internal fixups: comment = "NE FIXUP -> target_ghidra (target_label)"
for row in entries:
src = row['source_ghidra']
tgt = row['target_ghidra']
label = row['target_label']
# The CALLF opcode is at source_ghidra, operand at +1.
# But the TSV source_ghidra is the OPERAND address (file_offset of the fixup).
# We need the CALLF instruction address = source - 1.
flat = ghidra_addr_to_flat(src) - 1 # back up 1 byte to the 0x9A opcode
comment = f"NE FIXUP -> {tgt} ({label})"
lines.append(f'set_comment(0x{flat:X}, "{comment}")')
# Import fixups
for row in imports:
src = row['source_ghidra']
tgt = row['target']
flat = ghidra_addr_to_flat(src) - 1
comment = f"NE IMPORT -> {tgt}"
lines.append(f'set_comment(0x{flat:X}, "{comment}")')
lines.append('')
lines.append(f'println("Applied %d fixup comments (%d errors)" % (count, errors))')
with open(OUT_SCRIPT, 'w') as f:
f.write('\n'.join(lines))
print(f"Generated Ghidra script: {OUT_SCRIPT}")
print(f" {len(entries) + len(imports)} comment operations")
print(f" Copy to your Ghidra scripts directory and run from Script Manager.")
# Also generate a quick stats summary
# Count unique targets
target_counts = {}
for row in entries:
tgt = row['target_ghidra']
target_counts[tgt] = target_counts.get(tgt, 0) + 1
print(f"\n Unique internal far-call targets: {len(target_counts)}")
print(f"\n Top 20 most-called targets:")
for tgt, cnt in sorted(target_counts.items(), key=lambda x: -x[1])[:20]:
# Find the label
label = next((r['target_label'] for r in entries if r['target_ghidra'] == tgt), '?')
print(f" {tgt} ({label}) — {cnt} call sites")
if __name__ == '__main__':
main()