Crusader_Decomp/scripts/analyze_weapons_table_region.py

import sys
from collections import Counter, defaultdict

FN = r"binary/Crusader - No Remorse Weapons Main Ram.bin"
OFFSETS = [0x133000, 0x133416, 0x1335d4]
WINDOW_BEFORE = 0x100
WINDOW_AFTER = 0x200

def hexdump(buf, base):
    lines = []
    for i in range(0, len(buf), 16):
        chunk = buf[i:i+16]
        hexs = ' '.join(f"{b:02x}" for b in chunk)
        ascii_ = ''.join((chr(b) if 32 <= b < 127 else '.') for b in chunk)
        lines.append(f"{base+i:08x}: {hexs:<47}  {ascii_}")
    return '\n'.join(lines)


def analyze_region(buf, base):
    print(f"\n-- Analysis for region base 0x{base:x}, length {len(buf):x} --")
    ctr = Counter(buf)
    print("Top byte frequencies:")
    for b,c in ctr.most_common(12):
        print(f"  0x{b:02x}: {c}")
    # positions of 0x0c/0x0d
    pos0c = [i for i,b in enumerate(buf) if b==0x0c]
    pos0d = [i for i,b in enumerate(buf) if b==0x0d]
    print(f"Count 0x0c: {len(pos0c)}, sample positions (rel): {pos0c[:12]}")
    print(f"Count 0x0d: {len(pos0d)}, sample positions (rel): {pos0d[:12]}")

    # stride detection via start-similarity
    best = []
    for stride in range(4,129):
        n = len(buf)//stride
        if n < 3:
            continue
        matches = 0
        total = 0
        for i in range(n-1):
            a = buf[i*stride:i*stride+8]
            b = buf[(i+1)*stride:(i+1)*stride+8]
            total += 8
            matches += sum(1 for x,y in zip(a,b) if x==y)
        score = matches/total
        best.append((score, stride, n))
    best.sort(reverse=True)
    print("Top candidate strides (score, stride, record_count):")
    for s,stride,n in best[:8]:
        print(f"  {s:.3f}, {stride}, {n}")

    if best:
        top_stride = best[0][1]
        print(f"\nSample records using stride {top_stride} (showing first 8 bytes of each record):")
        n = len(buf)//top_stride
        for i in range(min(n,12)):
            rec = buf[i*top_stride:(i+1)*top_stride]
            print(f"  rec#{i:02d} @ {base + i*top_stride:08x}: {' '.join(f'{b:02x}' for b in rec[:12])}")

    # look for small incrementing sequences at any fixed offset inside stride
    def find_incrementing(offset_within, length=6):
        vals = []
        for i in range(0, (len(buf)-offset_within)//top_stride):
            pos = i*top_stride + offset_within
            vals.append(buf[pos])
        # find runs of increasing or consistent values
        if len(vals) < 3:
            return None
        return vals[:min(32,len(vals))]

    # search offsets 0..min(32, stride-1)
    inc_candidates = []
    for off in range(0, min(32, top_stride)):
        vals = []
        nrecs = len(buf)//top_stride
        for i in range(nrecs):
            vals.append(buf[i*top_stride + off])
        # measure monotonic segments
        diffs = sum(1 for i in range(1,len(vals)) if vals[i] != vals[i-1])
        if diffs > 0:
            inc_candidates.append((diffs, off, vals[:16]))
    inc_candidates.sort(reverse=True)
    if inc_candidates:
        print('\nTop changing offsets within stride (changes, offset, sample_values):')
        for d,off,sample in inc_candidates[:8]:
            print(f"  {d}, {off}, {sample}")


if __name__ == '__main__':
    try:
        with open(FN, 'rb') as f:
            data = f.read()
    except FileNotFoundError:
        print('ERROR: file not found:', FN)
        sys.exit(2)

    for off in OFFSETS:
        start = max(0, off - WINDOW_BEFORE)
        end = min(len(data), off + WINDOW_AFTER)
        region = data[start:end]
        print('\n' + '='*60)
        print(f"Dump around 0x{off:08x} (file offsets 0x{start:08x}-0x{end:08x})")
        print(hexdump(region, start))
        analyze_region(region, start)

    # unified larger window covering the three offsets
    big_start = max(0, min(OFFSETS) - 0x200)
    big_end = min(len(data), max(OFFSETS) + 0x300)
    big = data[big_start:big_end]
    print('\n' + '='*60)
    print(f"Unified window 0x{big_start:08x}-0x{big_end:08x}, length {len(big):x}")
    # run stride search on big window
    ctr = Counter(big)
    print('Unified top bytes:', ctr.most_common(12))
    best = []
    for stride in range(4,129):
        n = len(big)//stride
        if n < 4:
            continue
        matches = 0
        total = 0
        for i in range(n-1):
            a = big[i*stride:i*stride+8]
            b = big[(i+1)*stride:(i+1)*stride+8]
            total += 8
            matches += sum(1 for x,y in zip(a,b) if x==y)
        score = matches/total
        best.append((score, stride, n))
    best.sort(reverse=True)
    print('Unified top candidate strides (score, stride, n):')
    for s,stride,n in best[:12]:
        print(f"  {s:.3f}, {stride}, {n}")

    # show sample records for top unified stride
    if best:
        top = best[0][1]
        print(f"\nUnified sample records with stride {top}:")
        n = len(big)//top
        for i in range(min(n,12)):
            rec = big[i*top:(i+1)*top]
            print(f"  rec#{i:02d} @ {big_start + i*top:08x}: {' '.join(f'{b:02x}' for b in rec[:16])}")

    print('\nDone')