Add various scripts and JSON plans for Ghidra project
- Introduced `seg043_boundary_repair.json` to manage function boundaries in segment 043. - Created `read_file.py` for reading and printing file content size. - Added `resolve_bb4f.py` to resolve specific function call targets. - Implemented `resolve_top_targets.py` to find resolved NE targets for top-called wrapper functions. - Added `script_contents.txt` to summarize NE relocation far calls. - Updated `tier4_ghidra.txt`, `tier4_ghidra_check.txt`, `tier4_output.txt`, and `tier4_result.txt` with function call statistics. - Created `tier5_errors.txt` for error logging and `tier5_output.txt` for additional function call statistics. - Established `tools` directory with helper scripts for the Ghidra project, including CLI and common functionalities. - Implemented command-line interface in `cli.py` for various project operations. - Added `common.py` for shared functions and configurations across tools. - Introduced `validate_fixups.py` to validate NE relocation fixups against known addresses.
This commit is contained in:
parent
6b9eb205d4
commit
24d4416003
36 changed files with 145712 additions and 14 deletions
379
ne_reloc_parser.py
Normal file
379
ne_reloc_parser.py
Normal file
|
|
@ -0,0 +1,379 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
NE Relocation Table Parser for Crusader: No Remorse
|
||||
====================================================
|
||||
Reads the NE header + per-segment relocation entries from CRUSADER.EXE.
|
||||
Resolves each CALLF 0x0000:FFFF fixup to its real inter-segment target.
|
||||
Emits a mapping file suitable for Ghidra annotation.
|
||||
|
||||
NE binary: CRUSADER.EXE (bound MZ+NE, NE header at 0x36F70)
|
||||
Raw import: Ghidra loads the whole file as flat RAM.
|
||||
Ghidra flat address = file_offset (since it's a raw binary import)
|
||||
Ghidra seg:off = (flat >> 16) : (flat & 0xFFFF)
|
||||
"""
|
||||
|
||||
import struct, sys, os, json
|
||||
from collections import defaultdict
|
||||
|
||||
EXE_PATH = r'k:\ghidra\Crusader_Decomp\CRUSADER.EXE'
|
||||
NE_HEADER_OFFSET = 0x36F70 # e_lfanew from MZ header
|
||||
|
||||
# ── NE relocation entry address-type codes ──
|
||||
ADDR_LOBYTE = 0
|
||||
ADDR_SELECTOR = 2
|
||||
ADDR_FARPTR = 3 # 16:16 far pointer ← this is CALLF target
|
||||
ADDR_OFFSET = 5
|
||||
ADDR_48PTR = 11
|
||||
ADDR_OFFSET32 = 13
|
||||
|
||||
# ── NE relocation entry relocation-type codes ──
|
||||
REL_INTERNAL = 0 # intra-module (segment:offset)
|
||||
REL_IMPORTORD = 1 # imported by ordinal
|
||||
REL_IMPORTNAM = 2 # imported by name
|
||||
REL_OSFIXUP = 3 # OS fixup
|
||||
|
||||
ADDR_TYPE_NAMES = {
|
||||
0: 'lobyte', 2: 'selector', 3: 'far_ptr_16:16',
|
||||
5: 'offset16', 11: 'ptr_48', 13: 'offset32'
|
||||
}
|
||||
REL_TYPE_NAMES = {
|
||||
0: 'internal', 1: 'import_ordinal', 2: 'import_name', 3: 'osfixup'
|
||||
}
|
||||
|
||||
|
||||
def read_u8(data, off):
|
||||
return data[off]
|
||||
|
||||
def read_u16(data, off):
|
||||
return struct.unpack_from('<H', data, off)[0]
|
||||
|
||||
def read_u32(data, off):
|
||||
return struct.unpack_from('<I', data, off)[0]
|
||||
|
||||
|
||||
def parse_ne_header(data, ne_off):
|
||||
"""Parse key fields from the NE header."""
|
||||
magic = data[ne_off:ne_off+2]
|
||||
assert magic == b'NE', f"Bad NE magic at 0x{ne_off:X}: {magic}"
|
||||
|
||||
hdr = {}
|
||||
hdr['linker_ver'] = read_u8(data, ne_off + 2)
|
||||
hdr['linker_rev'] = read_u8(data, ne_off + 3)
|
||||
hdr['entry_table_off'] = read_u16(data, ne_off + 4) + ne_off
|
||||
hdr['entry_table_len'] = read_u16(data, ne_off + 6)
|
||||
hdr['flags'] = read_u16(data, ne_off + 12)
|
||||
hdr['auto_data_seg'] = read_u16(data, ne_off + 14)
|
||||
hdr['seg_table_off'] = read_u16(data, ne_off + 34) + ne_off
|
||||
hdr['resource_table_off'] = read_u16(data, ne_off + 36) + ne_off
|
||||
hdr['resident_name_off'] = read_u16(data, ne_off + 38) + ne_off
|
||||
hdr['module_ref_off'] = read_u16(data, ne_off + 40) + ne_off
|
||||
hdr['imported_name_off'] = read_u16(data, ne_off + 42) + ne_off
|
||||
hdr['nonresident_name_off'] = read_u32(data, ne_off + 44)
|
||||
hdr['moveable_entries'] = read_u16(data, ne_off + 48)
|
||||
hdr['alignment_shift'] = read_u16(data, ne_off + 50)
|
||||
hdr['num_resource_segs'] = read_u16(data, ne_off + 52)
|
||||
hdr['target_os'] = read_u8(data, ne_off + 54)
|
||||
hdr['num_segments'] = read_u16(data, ne_off + 44 - 10) # field at offset 0x1C
|
||||
# Actually let me re-check the NE header layout more carefully
|
||||
|
||||
# NE header fields (offsets relative to NE signature):
|
||||
# 0x1C = number of segments
|
||||
# 0x22 = offset of segment table (relative to NE header)
|
||||
# 0x32 = alignment shift count
|
||||
hdr['num_segments'] = read_u16(data, ne_off + 0x1C)
|
||||
hdr['seg_table_off'] = read_u16(data, ne_off + 0x22) + ne_off
|
||||
hdr['alignment_shift'] = read_u16(data, ne_off + 0x32)
|
||||
hdr['module_ref_off'] = read_u16(data, ne_off + 0x28) + ne_off
|
||||
hdr['imported_name_off'] = read_u16(data, ne_off + 0x2A) + ne_off
|
||||
hdr['num_module_refs'] = read_u16(data, ne_off + 0x1E)
|
||||
|
||||
return hdr
|
||||
|
||||
|
||||
def parse_segment_table(data, hdr):
|
||||
"""Parse the NE segment table entries (8 bytes each)."""
|
||||
segments = []
|
||||
off = hdr['seg_table_off']
|
||||
shift = hdr['alignment_shift']
|
||||
|
||||
for i in range(hdr['num_segments']):
|
||||
sector_off = read_u16(data, off)
|
||||
seg_len = read_u16(data, off + 2)
|
||||
seg_flags = read_u16(data, off + 4)
|
||||
min_alloc = read_u16(data, off + 6)
|
||||
|
||||
file_offset = sector_off << shift if sector_off != 0 else 0
|
||||
has_reloc = bool(seg_flags & 0x0100)
|
||||
|
||||
# Fix zero length = 64K
|
||||
if seg_len == 0 and sector_off != 0:
|
||||
seg_len = 0x10000
|
||||
|
||||
segments.append({
|
||||
'index': i + 1, # 1-based segment number
|
||||
'file_offset': file_offset,
|
||||
'length': seg_len,
|
||||
'flags': seg_flags,
|
||||
'min_alloc': min_alloc,
|
||||
'has_reloc': has_reloc,
|
||||
})
|
||||
off += 8
|
||||
|
||||
return segments
|
||||
|
||||
|
||||
def parse_module_refs(data, hdr):
|
||||
"""Parse the module reference table → imported module names."""
|
||||
modules = []
|
||||
mref_off = hdr['module_ref_off']
|
||||
iname_off = hdr['imported_name_off']
|
||||
for i in range(hdr['num_module_refs']):
|
||||
name_off_rel = read_u16(data, mref_off + i * 2)
|
||||
name_off_abs = iname_off + name_off_rel
|
||||
name_len = read_u8(data, name_off_abs)
|
||||
name = data[name_off_abs + 1: name_off_abs + 1 + name_len].decode('ascii', errors='replace')
|
||||
modules.append(name)
|
||||
return modules
|
||||
|
||||
|
||||
def parse_relocations(data, seg):
|
||||
"""Parse relocation entries for a single segment."""
|
||||
if not seg['has_reloc']:
|
||||
return []
|
||||
|
||||
# Relocation table starts right after the segment data in the file
|
||||
reloc_off = seg['file_offset'] + seg['length']
|
||||
num_relocs = read_u16(data, reloc_off)
|
||||
reloc_off += 2
|
||||
|
||||
entries = []
|
||||
for i in range(num_relocs):
|
||||
addr_type = read_u8(data, reloc_off)
|
||||
rel_type = read_u8(data, reloc_off + 1)
|
||||
chain_off = read_u16(data, reloc_off + 2) # offset within segment where fixup applies
|
||||
|
||||
# Additive flag is bit 2 of rel_type
|
||||
additive = bool(rel_type & 0x04)
|
||||
rel_type_base = rel_type & 0x03
|
||||
|
||||
entry = {
|
||||
'addr_type': addr_type,
|
||||
'addr_type_name': ADDR_TYPE_NAMES.get(addr_type, f'unk_{addr_type}'),
|
||||
'rel_type': rel_type_base,
|
||||
'rel_type_name': REL_TYPE_NAMES.get(rel_type_base, f'unk_{rel_type_base}'),
|
||||
'additive': additive,
|
||||
'seg_offset': chain_off,
|
||||
'seg_index': seg['index'],
|
||||
}
|
||||
|
||||
if rel_type_base == REL_INTERNAL:
|
||||
# Internal reference
|
||||
target_seg = read_u8(data, reloc_off + 4)
|
||||
reserved = read_u8(data, reloc_off + 5)
|
||||
target_off = read_u16(data, reloc_off + 6)
|
||||
|
||||
if target_seg == 0xFF:
|
||||
# Moveable segment, target_off is entry table ordinal
|
||||
entry['target_type'] = 'moveable_entry'
|
||||
entry['entry_ordinal'] = target_off
|
||||
else:
|
||||
entry['target_type'] = 'fixed'
|
||||
entry['target_seg'] = target_seg # 1-based segment number
|
||||
entry['target_offset'] = target_off
|
||||
elif rel_type_base == REL_IMPORTORD:
|
||||
module_idx = read_u16(data, reloc_off + 4) # 1-based
|
||||
ordinal = read_u16(data, reloc_off + 6)
|
||||
entry['target_type'] = 'import_ordinal'
|
||||
entry['module_index'] = module_idx
|
||||
entry['ordinal'] = ordinal
|
||||
elif rel_type_base == REL_IMPORTNAM:
|
||||
module_idx = read_u16(data, reloc_off + 4) # 1-based
|
||||
name_off = read_u16(data, reloc_off + 6)
|
||||
entry['target_type'] = 'import_name'
|
||||
entry['module_index'] = module_idx
|
||||
entry['name_offset'] = name_off
|
||||
elif rel_type_base == REL_OSFIXUP:
|
||||
fixup_type = read_u16(data, reloc_off + 4)
|
||||
entry['target_type'] = 'osfixup'
|
||||
entry['osfixup_type'] = fixup_type
|
||||
|
||||
entries.append(entry)
|
||||
reloc_off += 8
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def follow_reloc_chain(data, seg, first_offset, addr_type):
|
||||
"""
|
||||
NE relocations use a chain: the first entry points to an offset in
|
||||
the segment. At that offset, a word points to the next offset
|
||||
needing the same fixup. 0xFFFF terminates the chain.
|
||||
Returns all offsets in the chain.
|
||||
"""
|
||||
offsets = []
|
||||
seg_data_start = seg['file_offset']
|
||||
seg_len = seg['length']
|
||||
current = first_offset
|
||||
|
||||
visited = set()
|
||||
while current != 0xFFFF and current < seg_len:
|
||||
if current in visited:
|
||||
break # cycle protection
|
||||
visited.add(current)
|
||||
offsets.append(current)
|
||||
|
||||
# For far_ptr: the call instruction is CALLF seg:off at the offset
|
||||
# The offset field (first word) at current contains the next chain link
|
||||
next_ptr_file = seg_data_start + current
|
||||
if next_ptr_file + 2 > len(data):
|
||||
break
|
||||
next_off = read_u16(data, next_ptr_file)
|
||||
current = next_off
|
||||
|
||||
return offsets
|
||||
|
||||
|
||||
def file_offset_to_ghidra(file_off):
|
||||
"""Convert file offset to Ghidra seg:off address string (raw import)."""
|
||||
seg = file_off >> 16
|
||||
off = file_off & 0xFFFF
|
||||
return f'{seg:04x}:{off:04x}'
|
||||
|
||||
|
||||
def main():
|
||||
print(f"Reading {EXE_PATH}...")
|
||||
with open(EXE_PATH, 'rb') as f:
|
||||
data = f.read()
|
||||
print(f" File size: {len(data)} bytes (0x{len(data):X})")
|
||||
|
||||
# Verify NE header location
|
||||
# Check MZ header first
|
||||
assert data[0:2] == b'MZ', "Not an MZ executable"
|
||||
lfanew = read_u32(data, 0x3C)
|
||||
print(f" e_lfanew from MZ header: 0x{lfanew:X}")
|
||||
# Use the known NE offset
|
||||
ne_off = NE_HEADER_OFFSET
|
||||
print(f" Using NE header at: 0x{ne_off:X}")
|
||||
|
||||
hdr = parse_ne_header(data, ne_off)
|
||||
print(f" Segments: {hdr['num_segments']}")
|
||||
print(f" Alignment shift: {hdr['alignment_shift']}")
|
||||
print(f" Module refs: {hdr['num_module_refs']}")
|
||||
|
||||
modules = parse_module_refs(data, hdr)
|
||||
print(f" Imported modules: {modules}")
|
||||
|
||||
segments = parse_segment_table(data, hdr)
|
||||
|
||||
# Parse all relocations
|
||||
all_fixups = [] # list of resolved fixup records
|
||||
stats = defaultdict(int)
|
||||
|
||||
for seg in segments:
|
||||
relocs = parse_relocations(data, seg)
|
||||
if not relocs:
|
||||
continue
|
||||
|
||||
for reloc in relocs:
|
||||
# Follow the chain to find ALL offsets needing this fixup
|
||||
chain = follow_reloc_chain(data, seg, reloc['seg_offset'], reloc['addr_type'])
|
||||
|
||||
for fixup_off in chain:
|
||||
fixup_file_off = seg['file_offset'] + fixup_off
|
||||
ghidra_addr = file_offset_to_ghidra(fixup_file_off)
|
||||
|
||||
rec = {
|
||||
'source_seg': seg['index'],
|
||||
'source_offset_in_seg': fixup_off,
|
||||
'source_file_offset': fixup_file_off,
|
||||
'source_ghidra': ghidra_addr,
|
||||
'addr_type': reloc['addr_type_name'],
|
||||
'rel_type': reloc['rel_type_name'],
|
||||
}
|
||||
|
||||
if reloc.get('target_type') == 'fixed':
|
||||
target_seg_idx = reloc['target_seg']
|
||||
target_off = reloc['target_offset']
|
||||
target_seg_info = segments[target_seg_idx - 1]
|
||||
target_file_off = target_seg_info['file_offset'] + target_off
|
||||
target_ghidra = file_offset_to_ghidra(target_file_off)
|
||||
rec['target'] = f'seg{target_seg_idx:03d}:{target_off:04x}'
|
||||
rec['target_ghidra'] = target_ghidra
|
||||
rec['target_file_offset'] = target_file_off
|
||||
elif reloc.get('target_type') == 'moveable_entry':
|
||||
rec['target'] = f'entry_ordinal_{reloc["entry_ordinal"]}'
|
||||
rec['target_ghidra'] = '?'
|
||||
elif reloc.get('target_type') == 'import_ordinal':
|
||||
mod_idx = reloc['module_index']
|
||||
mod_name = modules[mod_idx - 1] if mod_idx <= len(modules) else f'mod{mod_idx}'
|
||||
rec['target'] = f'{mod_name}.{reloc["ordinal"]}'
|
||||
rec['target_ghidra'] = '?'
|
||||
elif reloc.get('target_type') == 'import_name':
|
||||
mod_idx = reloc['module_index']
|
||||
mod_name = modules[mod_idx - 1] if mod_idx <= len(modules) else f'mod{mod_idx}'
|
||||
# Read the imported name
|
||||
iname_base = hdr['imported_name_off']
|
||||
name_off = iname_base + reloc['name_offset']
|
||||
name_len = read_u8(data, name_off)
|
||||
name = data[name_off+1:name_off+1+name_len].decode('ascii', errors='replace')
|
||||
rec['target'] = f'{mod_name}.{name}'
|
||||
rec['target_ghidra'] = '?'
|
||||
elif reloc.get('target_type') == 'osfixup':
|
||||
rec['target'] = f'osfixup_{reloc["osfixup_type"]}'
|
||||
rec['target_ghidra'] = '?'
|
||||
else:
|
||||
rec['target'] = '???'
|
||||
rec['target_ghidra'] = '?'
|
||||
|
||||
all_fixups.append(rec)
|
||||
stats[reloc['addr_type_name']] += 1
|
||||
|
||||
print(f"\n Total resolved fixup points: {len(all_fixups)}")
|
||||
print(f" By address type: {dict(stats)}")
|
||||
|
||||
# Filter to just far_ptr (CALLF) fixups with internal targets — these are the ones
|
||||
# that decompile as CALLF 0000:ffff in Ghidra
|
||||
far_calls = [f for f in all_fixups if f['addr_type'] == 'far_ptr_16:16' and f.get('target_ghidra', '?') != '?']
|
||||
far_imports = [f for f in all_fixups if f['addr_type'] == 'far_ptr_16:16' and f.get('target_ghidra', '?') == '?']
|
||||
print(f" Far-call internal fixups: {len(far_calls)}")
|
||||
print(f" Far-call import fixups: {len(far_imports)}")
|
||||
|
||||
# Save full results
|
||||
out_path = os.path.join(os.path.dirname(EXE_PATH), 'ne_reloc_fixups.json')
|
||||
with open(out_path, 'w') as f:
|
||||
json.dump(all_fixups, f, indent=2)
|
||||
print(f"\n Full fixup table written to: {out_path}")
|
||||
|
||||
# Save a focused far-call table (TSV) for easy use
|
||||
tsv_path = os.path.join(os.path.dirname(EXE_PATH), 'ne_reloc_far_calls.tsv')
|
||||
with open(tsv_path, 'w') as f:
|
||||
f.write("source_ghidra\ttarget_ghidra\ttarget_label\tsource_seg\tsource_off_in_seg\n")
|
||||
for rec in sorted(far_calls, key=lambda r: r['source_file_offset']):
|
||||
f.write(f"{rec['source_ghidra']}\t{rec['target_ghidra']}\t{rec['target']}\t")
|
||||
f.write(f"seg{rec['source_seg']:03d}\t0x{rec['source_offset_in_seg']:04x}\n")
|
||||
print(f" Far-call internal TSV: {tsv_path}")
|
||||
|
||||
# Also save import far-calls
|
||||
imp_path = os.path.join(os.path.dirname(EXE_PATH), 'ne_reloc_far_imports.tsv')
|
||||
with open(imp_path, 'w') as f:
|
||||
f.write("source_ghidra\ttarget\tsource_seg\tsource_off_in_seg\n")
|
||||
for rec in sorted(far_imports, key=lambda r: r['source_file_offset']):
|
||||
f.write(f"{rec['source_ghidra']}\t{rec['target']}\t")
|
||||
f.write(f"seg{rec['source_seg']:03d}\t0x{rec['source_offset_in_seg']:04x}\n")
|
||||
print(f" Far-call import TSV: {imp_path}")
|
||||
|
||||
# Print a sample of game-segment far calls (seg039=seg001 region in raw, file offset 0x6E200)
|
||||
print("\n── Sample: seg039 (NE seg 39, game seg001 area) far-call fixups ──")
|
||||
seg39_calls = [f for f in far_calls if f['source_seg'] == 39]
|
||||
for rec in sorted(seg39_calls, key=lambda r: r['source_offset_in_seg'])[:30]:
|
||||
print(f" {rec['source_ghidra']} → {rec['target_ghidra']} ({rec['target']})")
|
||||
|
||||
# Print a sample around the entity_ai_update_loop / entity_animation area
|
||||
print("\n── Sample: seg059 (NE seg 59, game 0007: area) far-call fixups ──")
|
||||
seg59_calls = [f for f in far_calls if f['source_seg'] == 59]
|
||||
for rec in sorted(seg59_calls, key=lambda r: r['source_offset_in_seg'])[:30]:
|
||||
print(f" {rec['source_ghidra']} → {rec['target_ghidra']} ({rec['target']})")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue