Add various scripts and JSON plans for Ghidra project

- Introduced `seg043_boundary_repair.json` to manage function boundaries in segment 043. - Created `read_file.py` for reading and printing file content size. - Added `resolve_bb4f.py` to resolve specific function call targets. - Implemented `resolve_top_targets.py` to find resolved NE targets for top-called wrapper functions. - Added `script_contents.txt` to summarize NE relocation far calls. - Updated `tier4_ghidra.txt`, `tier4_ghidra_check.txt`, `tier4_output.txt`, and `tier4_result.txt` with function call statistics. - Created `tier5_errors.txt` for error logging and `tier5_output.txt` for additional function call statistics. - Established `tools` directory with helper scripts for the Ghidra project, including CLI and common functionalities. - Implemented command-line interface in `cli.py` for various project operations. - Added `common.py` for shared functions and configurations across tools. - Introduced `validate_fixups.py` to validate NE relocation fixups against known addresses.
2026-03-20 23:50:39 +01:00 · 2026-03-20 23:50:39 +01:00 · 24d4416003
commit 24d4416003
parent 6b9eb205d4
36 changed files with 145712 additions and 14 deletions
--- a/ne_reloc_parser.py
+++ b/ne_reloc_parser.py
@ -0,0 +1,379 @@
+#!/usr/bin/env python3
+"""
+NE Relocation Table Parser for Crusader: No Remorse
+====================================================
+Reads the NE header + per-segment relocation entries from CRUSADER.EXE.
+Resolves each CALLF 0x0000:FFFF fixup to its real inter-segment target.
+Emits a mapping file suitable for Ghidra annotation.
+
+NE binary: CRUSADER.EXE (bound MZ+NE, NE header at 0x36F70)
+Raw import: Ghidra loads the whole file as flat RAM.
+  Ghidra flat address = file_offset  (since it's a raw binary import)
+  Ghidra seg:off      = (flat >> 16) : (flat & 0xFFFF)
+"""
+
+import struct, sys, os, json
+from collections import defaultdict
+
+EXE_PATH = r'k:\ghidra\Crusader_Decomp\CRUSADER.EXE'
+NE_HEADER_OFFSET = 0x36F70   # e_lfanew from MZ header
+
+# ── NE relocation entry address-type codes ──
+ADDR_LOBYTE   = 0
+ADDR_SELECTOR = 2
+ADDR_FARPTR   = 3   # 16:16 far pointer  ← this is CALLF target
+ADDR_OFFSET   = 5
+ADDR_48PTR    = 11
+ADDR_OFFSET32 = 13
+
+# ── NE relocation entry relocation-type codes ──
+REL_INTERNAL  = 0   # intra-module (segment:offset)
+REL_IMPORTORD = 1   # imported by ordinal
+REL_IMPORTNAM = 2   # imported by name
+REL_OSFIXUP   = 3   # OS fixup
+
+ADDR_TYPE_NAMES = {
+    0: 'lobyte', 2: 'selector', 3: 'far_ptr_16:16',
+    5: 'offset16', 11: 'ptr_48', 13: 'offset32'
+}
+REL_TYPE_NAMES = {
+    0: 'internal', 1: 'import_ordinal', 2: 'import_name', 3: 'osfixup'
+}
+
+
+def read_u8(data, off):
+    return data[off]
+
+def read_u16(data, off):
+    return struct.unpack_from('<H', data, off)[0]
+
+def read_u32(data, off):
+    return struct.unpack_from('<I', data, off)[0]
+
+
+def parse_ne_header(data, ne_off):
+    """Parse key fields from the NE header."""
+    magic = data[ne_off:ne_off+2]
+    assert magic == b'NE', f"Bad NE magic at 0x{ne_off:X}: {magic}"
+
+    hdr = {}
+    hdr['linker_ver'] = read_u8(data, ne_off + 2)
+    hdr['linker_rev'] = read_u8(data, ne_off + 3)
+    hdr['entry_table_off'] = read_u16(data, ne_off + 4) + ne_off
+    hdr['entry_table_len'] = read_u16(data, ne_off + 6)
+    hdr['flags'] = read_u16(data, ne_off + 12)
+    hdr['auto_data_seg'] = read_u16(data, ne_off + 14)
+    hdr['seg_table_off'] = read_u16(data, ne_off + 34) + ne_off
+    hdr['resource_table_off'] = read_u16(data, ne_off + 36) + ne_off
+    hdr['resident_name_off'] = read_u16(data, ne_off + 38) + ne_off
+    hdr['module_ref_off'] = read_u16(data, ne_off + 40) + ne_off
+    hdr['imported_name_off'] = read_u16(data, ne_off + 42) + ne_off
+    hdr['nonresident_name_off'] = read_u32(data, ne_off + 44)
+    hdr['moveable_entries'] = read_u16(data, ne_off + 48)
+    hdr['alignment_shift'] = read_u16(data, ne_off + 50)
+    hdr['num_resource_segs'] = read_u16(data, ne_off + 52)
+    hdr['target_os'] = read_u8(data, ne_off + 54)
+    hdr['num_segments'] = read_u16(data, ne_off + 44 - 10)  # field at offset 0x1C
+    # Actually let me re-check the NE header layout more carefully
+
+    # NE header fields (offsets relative to NE signature):
+    # 0x1C = number of segments
+    # 0x22 = offset of segment table (relative to NE header)
+    # 0x32 = alignment shift count
+    hdr['num_segments'] = read_u16(data, ne_off + 0x1C)
+    hdr['seg_table_off'] = read_u16(data, ne_off + 0x22) + ne_off
+    hdr['alignment_shift'] = read_u16(data, ne_off + 0x32)
+    hdr['module_ref_off'] = read_u16(data, ne_off + 0x28) + ne_off
+    hdr['imported_name_off'] = read_u16(data, ne_off + 0x2A) + ne_off
+    hdr['num_module_refs'] = read_u16(data, ne_off + 0x1E)
+
+    return hdr
+
+
+def parse_segment_table(data, hdr):
+    """Parse the NE segment table entries (8 bytes each)."""
+    segments = []
+    off = hdr['seg_table_off']
+    shift = hdr['alignment_shift']
+
+    for i in range(hdr['num_segments']):
+        sector_off  = read_u16(data, off)
+        seg_len     = read_u16(data, off + 2)
+        seg_flags   = read_u16(data, off + 4)
+        min_alloc   = read_u16(data, off + 6)
+
+        file_offset = sector_off << shift if sector_off != 0 else 0
+        has_reloc   = bool(seg_flags & 0x0100)
+
+        # Fix zero length = 64K
+        if seg_len == 0 and sector_off != 0:
+            seg_len = 0x10000
+
+        segments.append({
+            'index': i + 1,  # 1-based segment number
+            'file_offset': file_offset,
+            'length': seg_len,
+            'flags': seg_flags,
+            'min_alloc': min_alloc,
+            'has_reloc': has_reloc,
+        })
+        off += 8
+
+    return segments
+
+
+def parse_module_refs(data, hdr):
+    """Parse the module reference table → imported module names."""
+    modules = []
+    mref_off = hdr['module_ref_off']
+    iname_off = hdr['imported_name_off']
+    for i in range(hdr['num_module_refs']):
+        name_off_rel = read_u16(data, mref_off + i * 2)
+        name_off_abs = iname_off + name_off_rel
+        name_len = read_u8(data, name_off_abs)
+        name = data[name_off_abs + 1: name_off_abs + 1 + name_len].decode('ascii', errors='replace')
+        modules.append(name)
+    return modules
+
+
+def parse_relocations(data, seg):
+    """Parse relocation entries for a single segment."""
+    if not seg['has_reloc']:
+        return []
+
+    # Relocation table starts right after the segment data in the file
+    reloc_off = seg['file_offset'] + seg['length']
+    num_relocs = read_u16(data, reloc_off)
+    reloc_off += 2
+
+    entries = []
+    for i in range(num_relocs):
+        addr_type = read_u8(data, reloc_off)
+        rel_type  = read_u8(data, reloc_off + 1)
+        chain_off = read_u16(data, reloc_off + 2)  # offset within segment where fixup applies
+
+        # Additive flag is bit 2 of rel_type
+        additive = bool(rel_type & 0x04)
+        rel_type_base = rel_type & 0x03
+
+        entry = {
+            'addr_type': addr_type,
+            'addr_type_name': ADDR_TYPE_NAMES.get(addr_type, f'unk_{addr_type}'),
+            'rel_type': rel_type_base,
+            'rel_type_name': REL_TYPE_NAMES.get(rel_type_base, f'unk_{rel_type_base}'),
+            'additive': additive,
+            'seg_offset': chain_off,
+            'seg_index': seg['index'],
+        }
+
+        if rel_type_base == REL_INTERNAL:
+            # Internal reference
+            target_seg = read_u8(data, reloc_off + 4)
+            reserved   = read_u8(data, reloc_off + 5)
+            target_off = read_u16(data, reloc_off + 6)
+
+            if target_seg == 0xFF:
+                # Moveable segment, target_off is entry table ordinal
+                entry['target_type'] = 'moveable_entry'
+                entry['entry_ordinal'] = target_off
+            else:
+                entry['target_type'] = 'fixed'
+                entry['target_seg'] = target_seg   # 1-based segment number
+                entry['target_offset'] = target_off
+        elif rel_type_base == REL_IMPORTORD:
+            module_idx = read_u16(data, reloc_off + 4)  # 1-based
+            ordinal    = read_u16(data, reloc_off + 6)
+            entry['target_type'] = 'import_ordinal'
+            entry['module_index'] = module_idx
+            entry['ordinal'] = ordinal
+        elif rel_type_base == REL_IMPORTNAM:
+            module_idx = read_u16(data, reloc_off + 4)  # 1-based
+            name_off   = read_u16(data, reloc_off + 6)
+            entry['target_type'] = 'import_name'
+            entry['module_index'] = module_idx
+            entry['name_offset'] = name_off
+        elif rel_type_base == REL_OSFIXUP:
+            fixup_type = read_u16(data, reloc_off + 4)
+            entry['target_type'] = 'osfixup'
+            entry['osfixup_type'] = fixup_type
+
+        entries.append(entry)
+        reloc_off += 8
+
+    return entries
+
+
+def follow_reloc_chain(data, seg, first_offset, addr_type):
+    """
+    NE relocations use a chain: the first entry points to an offset in
+    the segment. At that offset, a word points to the next offset
+    needing the same fixup. 0xFFFF terminates the chain.
+    Returns all offsets in the chain.
+    """
+    offsets = []
+    seg_data_start = seg['file_offset']
+    seg_len = seg['length']
+    current = first_offset
+
+    visited = set()
+    while current != 0xFFFF and current < seg_len:
+        if current in visited:
+            break  # cycle protection
+        visited.add(current)
+        offsets.append(current)
+
+        # For far_ptr: the call instruction is CALLF seg:off at the offset
+        # The offset field (first word) at current contains the next chain link
+        next_ptr_file = seg_data_start + current
+        if next_ptr_file + 2 > len(data):
+            break
+        next_off = read_u16(data, next_ptr_file)
+        current = next_off
+
+    return offsets
+
+
+def file_offset_to_ghidra(file_off):
+    """Convert file offset to Ghidra seg:off address string (raw import)."""
+    seg = file_off >> 16
+    off = file_off & 0xFFFF
+    return f'{seg:04x}:{off:04x}'
+
+
+def main():
+    print(f"Reading {EXE_PATH}...")
+    with open(EXE_PATH, 'rb') as f:
+        data = f.read()
+    print(f"  File size: {len(data)} bytes (0x{len(data):X})")
+
+    # Verify NE header location
+    # Check MZ header first
+    assert data[0:2] == b'MZ', "Not an MZ executable"
+    lfanew = read_u32(data, 0x3C)
+    print(f"  e_lfanew from MZ header: 0x{lfanew:X}")
+    # Use the known NE offset
+    ne_off = NE_HEADER_OFFSET
+    print(f"  Using NE header at: 0x{ne_off:X}")
+
+    hdr = parse_ne_header(data, ne_off)
+    print(f"  Segments: {hdr['num_segments']}")
+    print(f"  Alignment shift: {hdr['alignment_shift']}")
+    print(f"  Module refs: {hdr['num_module_refs']}")
+
+    modules = parse_module_refs(data, hdr)
+    print(f"  Imported modules: {modules}")
+
+    segments = parse_segment_table(data, hdr)
+
+    # Parse all relocations
+    all_fixups = []     # list of resolved fixup records
+    stats = defaultdict(int)
+
+    for seg in segments:
+        relocs = parse_relocations(data, seg)
+        if not relocs:
+            continue
+
+        for reloc in relocs:
+            # Follow the chain to find ALL offsets needing this fixup
+            chain = follow_reloc_chain(data, seg, reloc['seg_offset'], reloc['addr_type'])
+
+            for fixup_off in chain:
+                fixup_file_off = seg['file_offset'] + fixup_off
+                ghidra_addr = file_offset_to_ghidra(fixup_file_off)
+
+                rec = {
+                    'source_seg': seg['index'],
+                    'source_offset_in_seg': fixup_off,
+                    'source_file_offset': fixup_file_off,
+                    'source_ghidra': ghidra_addr,
+                    'addr_type': reloc['addr_type_name'],
+                    'rel_type': reloc['rel_type_name'],
+                }
+
+                if reloc.get('target_type') == 'fixed':
+                    target_seg_idx = reloc['target_seg']
+                    target_off = reloc['target_offset']
+                    target_seg_info = segments[target_seg_idx - 1]
+                    target_file_off = target_seg_info['file_offset'] + target_off
+                    target_ghidra = file_offset_to_ghidra(target_file_off)
+                    rec['target'] = f'seg{target_seg_idx:03d}:{target_off:04x}'
+                    rec['target_ghidra'] = target_ghidra
+                    rec['target_file_offset'] = target_file_off
+                elif reloc.get('target_type') == 'moveable_entry':
+                    rec['target'] = f'entry_ordinal_{reloc["entry_ordinal"]}'
+                    rec['target_ghidra'] = '?'
+                elif reloc.get('target_type') == 'import_ordinal':
+                    mod_idx = reloc['module_index']
+                    mod_name = modules[mod_idx - 1] if mod_idx <= len(modules) else f'mod{mod_idx}'
+                    rec['target'] = f'{mod_name}.{reloc["ordinal"]}'
+                    rec['target_ghidra'] = '?'
+                elif reloc.get('target_type') == 'import_name':
+                    mod_idx = reloc['module_index']
+                    mod_name = modules[mod_idx - 1] if mod_idx <= len(modules) else f'mod{mod_idx}'
+                    # Read the imported name
+                    iname_base = hdr['imported_name_off']
+                    name_off = iname_base + reloc['name_offset']
+                    name_len = read_u8(data, name_off)
+                    name = data[name_off+1:name_off+1+name_len].decode('ascii', errors='replace')
+                    rec['target'] = f'{mod_name}.{name}'
+                    rec['target_ghidra'] = '?'
+                elif reloc.get('target_type') == 'osfixup':
+                    rec['target'] = f'osfixup_{reloc["osfixup_type"]}'
+                    rec['target_ghidra'] = '?'
+                else:
+                    rec['target'] = '???'
+                    rec['target_ghidra'] = '?'
+
+                all_fixups.append(rec)
+                stats[reloc['addr_type_name']] += 1
+
+    print(f"\n  Total resolved fixup points: {len(all_fixups)}")
+    print(f"  By address type: {dict(stats)}")
+
+    # Filter to just far_ptr (CALLF) fixups with internal targets — these are the ones
+    # that decompile as CALLF 0000:ffff in Ghidra
+    far_calls = [f for f in all_fixups if f['addr_type'] == 'far_ptr_16:16' and f.get('target_ghidra', '?') != '?']
+    far_imports = [f for f in all_fixups if f['addr_type'] == 'far_ptr_16:16' and f.get('target_ghidra', '?') == '?']
+    print(f"  Far-call internal fixups: {len(far_calls)}")
+    print(f"  Far-call import fixups:   {len(far_imports)}")
+
+    # Save full results
+    out_path = os.path.join(os.path.dirname(EXE_PATH), 'ne_reloc_fixups.json')
+    with open(out_path, 'w') as f:
+        json.dump(all_fixups, f, indent=2)
+    print(f"\n  Full fixup table written to: {out_path}")
+
+    # Save a focused far-call table (TSV) for easy use
+    tsv_path = os.path.join(os.path.dirname(EXE_PATH), 'ne_reloc_far_calls.tsv')
+    with open(tsv_path, 'w') as f:
+        f.write("source_ghidra\ttarget_ghidra\ttarget_label\tsource_seg\tsource_off_in_seg\n")
+        for rec in sorted(far_calls, key=lambda r: r['source_file_offset']):
+            f.write(f"{rec['source_ghidra']}\t{rec['target_ghidra']}\t{rec['target']}\t")
+            f.write(f"seg{rec['source_seg']:03d}\t0x{rec['source_offset_in_seg']:04x}\n")
+    print(f"  Far-call internal TSV:     {tsv_path}")
+
+    # Also save import far-calls
+    imp_path = os.path.join(os.path.dirname(EXE_PATH), 'ne_reloc_far_imports.tsv')
+    with open(imp_path, 'w') as f:
+        f.write("source_ghidra\ttarget\tsource_seg\tsource_off_in_seg\n")
+        for rec in sorted(far_imports, key=lambda r: r['source_file_offset']):
+            f.write(f"{rec['source_ghidra']}\t{rec['target']}\t")
+            f.write(f"seg{rec['source_seg']:03d}\t0x{rec['source_offset_in_seg']:04x}\n")
+    print(f"  Far-call import TSV:       {imp_path}")
+
+    # Print a sample of game-segment far calls (seg039=seg001 region in raw, file offset 0x6E200)
+    print("\n── Sample: seg039 (NE seg 39, game seg001 area) far-call fixups ──")
+    seg39_calls = [f for f in far_calls if f['source_seg'] == 39]
+    for rec in sorted(seg39_calls, key=lambda r: r['source_offset_in_seg'])[:30]:
+        print(f"  {rec['source_ghidra']}  →  {rec['target_ghidra']}  ({rec['target']})")
+
+    # Print a sample around the entity_ai_update_loop / entity_animation area
+    print("\n── Sample: seg059 (NE seg 59, game 0007: area) far-call fixups ──")
+    seg59_calls = [f for f in far_calls if f['source_seg'] == 59]
+    for rec in sorted(seg59_calls, key=lambda r: r['source_offset_in_seg'])[:30]:
+        print(f"  {rec['source_ghidra']}  →  {rec['target_ghidra']}  ({rec['target']})")
+
+
+if __name__ == '__main__':
+    main()