Crusader_Decomp/tools/psx_extract_wdl.py

1087 lines
38 KiB
Python
Raw Permalink Normal View History

2026-03-30 00:19:01 +02:00
from __future__ import annotations
import argparse
import csv
import json
import struct
import zlib
from pathlib import Path
KNOWN_TIM_FLAGS = {0, 1, 2, 3, 8, 9, 10, 11}
def u32(data: bytes, offset: int) -> int:
return struct.unpack_from("<I", data, offset)[0]
def carve_tim(data: bytes, offset: int) -> dict[str, int] | None:
if offset + 12 > len(data) or u32(data, offset) != 0x10:
return None
flags = u32(data, offset + 4)
if flags not in KNOWN_TIM_FLAGS:
return None
cursor = offset + 8
has_clut = (flags & 0x8) != 0
if has_clut:
if cursor + 12 > len(data):
return None
clut_size = u32(data, cursor)
if clut_size < 12 or cursor + clut_size > len(data):
return None
cursor += clut_size
if cursor + 12 > len(data):
return None
image_size = u32(data, cursor)
if image_size < 12 or cursor + image_size > len(data):
return None
cursor += image_size
return {
"offset": offset,
"size": cursor - offset,
"flags": flags,
"has_clut": has_clut,
}
def scan_tims(data: bytes) -> list[dict[str, int]]:
hits: list[dict[str, int]] = []
offset = 0
while offset <= len(data) - 12:
hit = carve_tim(data, offset)
if hit is None:
offset += 4
continue
hits.append(hit)
offset = hit["offset"] + max(hit["size"], 4)
return hits
def write_blob(path: Path, data: bytes) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(data)
def write_u16x6_csv(path: Path, data: bytes) -> None:
usable_size = len(data) - (len(data) % 12)
if usable_size < 12:
return
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", newline="", encoding="ascii") as handle:
writer = csv.writer(handle)
writer.writerow(["index", "offset", "field0", "field1", "field2", "field3", "field4", "field5"])
for index, offset in enumerate(range(0, usable_size, 12)):
fields = struct.unpack_from("<6H", data, offset)
writer.writerow(
[
index,
f"0x{offset:X}",
*(f"0x{value:04X}" for value in fields),
]
)
def write_u16x12_csv(path: Path, data: bytes) -> None:
usable_size = len(data) - (len(data) % 24)
if usable_size < 24:
return
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", newline="", encoding="ascii") as handle:
writer = csv.writer(handle)
writer.writerow(
[
"index",
"offset",
"field0",
"field1",
"field2",
"field3",
"field4",
"field5",
"field6",
"field7",
"field8",
"field9",
"field10",
"field11",
]
)
for index, offset in enumerate(range(0, usable_size, 24)):
fields = struct.unpack_from("<12H", data, offset)
writer.writerow(
[
index,
f"0x{offset:X}",
*(f"0x{value:04X}" for value in fields),
]
)
def write_u32x5_csv(path: Path, data: bytes) -> None:
usable_size = len(data) - (len(data) % 20)
if usable_size < 20:
return
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", newline="", encoding="ascii") as handle:
writer = csv.writer(handle)
writer.writerow(["index", "offset", "field0", "field1", "field2", "field3", "field4"])
for index, offset in enumerate(range(0, usable_size, 20)):
fields = struct.unpack_from("<5I", data, offset)
writer.writerow([index, f"0x{offset:X}", *(f"0x{value:08X}" for value in fields)])
def write_lset_stream_probe_json(path: Path, data: bytes) -> None:
if len(data) < 4:
return
stream_count = u32(data, 0)
headers: list[dict[str, object]] = []
max_headers = min(stream_count, 64)
for index in range(max_headers):
offset = 4 + index * 20
if offset + 20 > len(data):
break
chunk = data[offset : offset + 20]
headers.append(
{
"index": index,
"offset": offset,
"hex": chunk.hex(),
"u32le": [u32(chunk, word_offset) for word_offset in range(0, 20, 4)],
}
)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(
{
"stream_count_le": stream_count,
"header_region_size": len(data),
"header_chunk_size": 20,
"note": "Raw loader-grounded probe for the LSET post-audio region that begins with the level_resource_stream_load count.",
"headers": headers,
},
indent=2,
)
+ "\n",
encoding="ascii",
)
def write_paired_u16x6_json(path: Path, data: bytes) -> None:
usable_size = len(data) - (len(data) % 24)
if usable_size < 24:
return
rows: list[dict[str, object]] = []
for index, offset in enumerate(range(0, usable_size, 24)):
left = struct.unpack_from("<6H", data, offset)
right = struct.unpack_from("<6H", data, offset + 12)
rows.append(
{
"index": index,
"offset": offset,
"left": {
"u0": left[0],
"u1": left[1],
"u2": left[2],
"u3": left[3],
"u4": left[4],
"u5": left[5],
},
"right": {
"u0": right[0],
"u1": right[1],
"u2": right[2],
"u3": right[3],
"u4": right[4],
"u5": right[5],
},
}
)
field_ranges: dict[str, dict[str, int]] = {}
for side in ("left", "right"):
for field_index in range(6):
key = f"{side}.u{field_index}"
values = [row[side][f"u{field_index}"] for row in rows]
nontrivial = [value for value in values if value not in {0x0000, 0xFFFF}]
stats: dict[str, int | None] = {
"min": min(values),
"max": max(values),
"unique_count": len(set(values)),
}
if nontrivial:
stats["nontrivial_min"] = min(nontrivial)
stats["nontrivial_max"] = max(nontrivial)
stats["nontrivial_unique_count"] = len(set(nontrivial))
else:
stats["nontrivial_min"] = None
stats["nontrivial_max"] = None
stats["nontrivial_unique_count"] = 0
field_ranges[key] = stats
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(
{
"record_size": 24,
"row_count": len(rows),
"layout_note": "Each row is exported as two adjacent 6-word candidate placement records.",
"field_ranges": field_ranges,
"rows": rows,
},
indent=2,
),
encoding="ascii",
)
def write_pgm(path: Path, pixels: bytes, width: int, height: int) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
header = f"P5\n{width} {height}\n255\n".encode("ascii")
path.write_bytes(header + pixels)
def write_png_grayscale(path: Path, pixels: bytes, width: int, height: int) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
def chunk(tag: bytes, payload: bytes) -> bytes:
crc = zlib.crc32(tag)
crc = zlib.crc32(payload, crc) & 0xFFFFFFFF
return struct.pack(
">I", len(payload)
) + tag + payload + struct.pack(">I", crc)
scanlines = bytearray()
stride = width
for row in range(height):
start = row * stride
scanlines.append(0)
scanlines.extend(pixels[start : start + stride])
ihdr = struct.pack(">IIBBBBB", width, height, 8, 0, 0, 0, 0)
idat = zlib.compress(bytes(scanlines), level=9)
png = b"\x89PNG\r\n\x1a\n" + chunk(b"IHDR", ihdr) + chunk(b"IDAT", idat) + chunk(b"IEND", b"")
path.write_bytes(png)
def write_png_rgba(path: Path, pixels: bytes, width: int, height: int) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
def chunk(tag: bytes, payload: bytes) -> bytes:
crc = zlib.crc32(tag)
crc = zlib.crc32(payload, crc) & 0xFFFFFFFF
return struct.pack(">I", len(payload)) + tag + payload + struct.pack(">I", crc)
stride = width * 4
scanlines = bytearray()
for row in range(height):
start = row * stride
scanlines.append(0)
scanlines.extend(pixels[start : start + stride])
ihdr = struct.pack(">IIBBBBB", width, height, 8, 6, 0, 0, 0)
idat = zlib.compress(bytes(scanlines), level=9)
png = b"\x89PNG\r\n\x1a\n" + chunk(b"IHDR", ihdr) + chunk(b"IDAT", idat) + chunk(b"IEND", b"")
path.write_bytes(png)
def psx_555_to_rgba(color: int) -> tuple[int, int, int, int]:
red = (color & 0x1F) * 255 // 31
green = ((color >> 5) & 0x1F) * 255 // 31
blue = ((color >> 10) & 0x1F) * 255 // 31
alpha = 0 if (color & 0x7FFF) == 0 else 255
return red, green, blue, alpha
def write_psx_16bpp_png(path: Path, data: bytes, width: int, height: int) -> None:
pixel_count = width * height
usable_size = min(len(data) // 2, pixel_count)
rgba = bytearray(pixel_count * 4)
for pixel_index in range(usable_size):
color = struct.unpack_from("<H", data, pixel_index * 2)[0]
red, green, blue, alpha = psx_555_to_rgba(color)
dst = pixel_index * 4
rgba[dst : dst + 4] = bytes((red, green, blue, alpha))
write_png_rgba(path, bytes(rgba), width, height)
def extract_palette_sets(data: bytes, summary: dict[str, object]) -> list[list[int]]:
if summary["kind"] != "lset":
return []
header_words = summary["header_words"]
if len(header_words) < 4:
return []
palette_offset = header_words[2]
palette_size = header_words[3]
if palette_size != 0x1000 or palette_offset + palette_size > len(data):
return []
blob = data[palette_offset : palette_offset + palette_size]
palettes: list[list[int]] = []
for offset in range(0, len(blob), 0x20):
if offset + 0x20 > len(blob):
break
palette = list(struct.unpack_from("<16H", blob, offset))
palettes.append(palette)
return palettes
def extract_palette_blocks_256(data: bytes, summary: dict[str, object]) -> list[list[int]]:
if summary["kind"] != "lset":
return []
header_words = summary["header_words"]
if len(header_words) < 4:
return []
palette_offset = header_words[2]
palette_size = header_words[3]
if palette_size != 0x1000 or palette_offset + palette_size > len(data):
return []
blob = data[palette_offset : palette_offset + palette_size]
palettes: list[list[int]] = []
for offset in range(0, len(blob), 0x200):
if offset + 0x200 > len(blob):
break
palette = list(struct.unpack_from("<256H", blob, offset))
palettes.append(palette)
return palettes
def choose_palette(palettes: list[list[int]], frames: list[dict[str, object]], mode: int) -> int | None:
if mode != 2 or not palettes:
return None
used_indices: set[int] = set()
for frame in frames:
for byte in frame["pixels"]:
used_indices.add(byte & 0x0F)
used_indices.add((byte >> 4) & 0x0F)
used_indices.discard(0)
if not used_indices:
return 0
best_index: int | None = None
best_score = -1
for palette_index, palette in enumerate(palettes):
distinct = {palette[i] & 0x7FFF for i in used_indices}
nonzero = {value for value in distinct if value != 0}
if not nonzero:
continue
channel_spread = 0
for value in nonzero:
red, green, blue, _ = psx_555_to_rgba(value)
channel_spread += red + green + blue
score = len(nonzero) * 100000 + channel_spread
if score > best_score:
best_score = score
best_index = palette_index
return best_index
def colorize_indexed_pixels(
raw: bytes,
width: int,
height: int,
mode: int,
palette: list[int],
high_nibble_first: bool = False,
) -> bytes:
rgba = bytearray(width * height * 4)
if mode == 2:
row_bytes = (width + 1) // 2
src = 0
dst = 0
for _ in range(height):
row = raw[src : src + row_bytes]
src += row_bytes
for byte in row:
if high_nibble_first:
indices = ((byte >> 4) & 0x0F, byte & 0x0F)
else:
indices = (byte & 0x0F, (byte >> 4) & 0x0F)
for index in indices:
if dst >= len(rgba):
break
red, green, blue, _ = psx_555_to_rgba(palette[index])
alpha = 0 if index == 0 else 255
rgba[dst : dst + 4] = bytes((red, green, blue, alpha))
dst += 4
return bytes(rgba)
for pixel_index, value in enumerate(raw[: width * height]):
red, green, blue, _ = psx_555_to_rgba(palette[value])
alpha = 0 if value == 0 else 255
dst = pixel_index * 4
rgba[dst : dst + 4] = bytes((red, green, blue, alpha))
return bytes(rgba)
def write_bundle_atlas(path: Path, frames: list[dict[str, object]]) -> None:
if not frames:
return
padding = 2
atlas_width = sum(frame["width"] for frame in frames) + padding * (len(frames) - 1)
atlas_height = max(frame["height"] for frame in frames)
atlas = bytearray(atlas_width * atlas_height * 4)
cursor_x = 0
for frame in frames:
width = frame["width"]
height = frame["height"]
rgba = frame["rgba"]
for y in range(height):
src_start = y * width * 4
dst_start = (y * atlas_width + cursor_x) * 4
atlas[dst_start : dst_start + width * 4] = rgba[src_start : src_start + width * 4]
cursor_x += width + padding
write_png_rgba(path, bytes(atlas), atlas_width, atlas_height)
def write_overview_grid(path: Path, entries: list[dict[str, object]], columns: int = 4) -> None:
if not entries:
return
padding = 4
cell_width = max(entry["width"] for entry in entries)
cell_height = max(entry["height"] for entry in entries)
rows = (len(entries) + columns - 1) // columns
atlas_width = columns * cell_width + padding * (columns - 1)
atlas_height = rows * cell_height + padding * (rows - 1)
atlas = bytearray(atlas_width * atlas_height * 4)
for entry_index, entry in enumerate(entries):
col = entry_index % columns
row = entry_index // columns
origin_x = col * (cell_width + padding)
origin_y = row * (cell_height + padding)
rgba = entry["rgba"]
for y in range(entry["height"]):
src_start = y * entry["width"] * 4
dst_start = ((origin_y + y) * atlas_width + origin_x) * 4
atlas[dst_start : dst_start + entry["width"] * 4] = rgba[src_start : src_start + entry["width"] * 4]
write_png_rgba(path, bytes(atlas), atlas_width, atlas_height)
def write_palette_row_sweep(
path: Path,
frame: dict[str, object],
palettes: list[list[int]],
base_palette_index: int,
mode: int,
) -> None:
if mode != 2:
return
column = base_palette_index % 16
variants: list[dict[str, object]] = []
for row in range(8):
palette_index = row * 16 + column
if palette_index >= len(palettes):
continue
rgba = colorize_indexed_pixels(frame["pixels"], frame["width"], frame["height"], mode, palettes[palette_index])
variants.append(
{
"width": frame["width"],
"height": frame["height"],
"rgba": rgba,
}
)
write_bundle_atlas(path, variants)
def decode_indexed_pixels(
raw: bytes, width: int, height: int, mode: int, high_nibble_first: bool = False
) -> bytes:
if mode == 2:
row_bytes = (width + 1) // 2
out = bytearray(width * height)
src = 0
dst = 0
for _ in range(height):
row = raw[src : src + row_bytes]
src += row_bytes
for byte in row:
if high_nibble_first:
indices = ((byte >> 4) & 0x0F, byte & 0x0F)
else:
indices = (byte & 0x0F, (byte >> 4) & 0x0F)
for index in indices:
if dst < len(out):
out[dst] = index * 17
dst += 1
return bytes(out)
return raw[: width * height]
def decode_rle_rows(data: bytes, start: int, width: int, height: int, mode: int) -> tuple[bytes, int] | None:
row_bytes = (width + 1) // 2 if mode == 2 else width
expected_size = row_bytes * height
out = bytearray()
cursor = start
rows = 0
while rows < height:
if cursor >= len(data):
return None
control = data[cursor]
cursor += 1
signed = control if control < 0x80 else control - 0x100
if signed == 0:
rows += 1
continue
if signed < 0:
count = control & 0x7F
if cursor + count > len(data):
return None
out.extend(data[cursor : cursor + count])
cursor += count
else:
if cursor >= len(data):
return None
out.extend(bytes([data[cursor]]) * signed)
cursor += 1
if len(out) > expected_size:
return None
if len(out) != expected_size:
return None
return bytes(out), cursor - start
def try_parse_sprite_bundle(data: bytes, base_offset: int) -> dict[str, object] | None:
if base_offset + 0x34 > len(data):
return None
mode = u32(data, base_offset + 0x10)
if mode not in {1, 2}:
return None
palette_index = u32(data, base_offset + 0x14)
if palette_index > 127:
return None
frame_count = u32(data, base_offset + 0x20)
if frame_count == 0 or frame_count > 512:
return None
data_offset = u32(data, base_offset + 0x1C)
record_table_size = frame_count * 20
if data_offset < 0x34 + record_table_size or base_offset + data_offset >= len(data):
return None
frames: list[dict[str, object]] = []
for frame_index in range(min(frame_count, 12)):
record_offset = base_offset + 0x34 + frame_index * 20
if record_offset + 20 > len(data):
return None
flags = u32(data, record_offset)
data_rel = u32(data, record_offset + 8)
width, height, origin_x, origin_y = struct.unpack_from("<4H", data, record_offset + 12)
if width == 0 or height == 0 or width > 512 or height > 512:
return None
row_bytes = (width + 1) // 2 if mode == 2 else width
data_start = base_offset + data_offset + (data_rel * 4 if (flags & 1) else data_rel)
if data_start >= len(data):
return None
if flags & 1:
decoded = decode_rle_rows(data, data_start, width, height, mode)
if decoded is None:
return None
raw_pixels, consumed = decoded
else:
raw_size = row_bytes * height
if data_start + raw_size > len(data):
return None
raw_pixels = data[data_start : data_start + raw_size]
consumed = raw_size
frames.append(
{
"index": frame_index,
"flags": flags,
"data_rel": data_rel,
"width": width,
"height": height,
"origin_x": origin_x,
"origin_y": origin_y,
"data_start": data_start,
"consumed": consumed,
"pixels": raw_pixels,
}
)
return {
"offset": base_offset,
"mode": mode,
"palette_index": palette_index,
"frame_count": frame_count,
"data_offset": data_offset,
"frames": frames,
}
def scan_sprite_bundles(data: bytes, max_candidates: int | None = None) -> list[dict[str, object]]:
candidates: list[dict[str, object]] = []
seen_ranges: list[tuple[int, int]] = []
for offset in range(0, len(data) - 0x34, 4):
bundle = try_parse_sprite_bundle(data, offset)
if bundle is None:
continue
end = offset + bundle["data_offset"]
if any(existing_start <= offset < existing_end for existing_start, existing_end in seen_ranges):
continue
seen_ranges.append((offset, end))
candidates.append(bundle)
if max_candidates is not None and len(candidates) >= max_candidates:
break
return candidates
def export_sprite_bundle(bundle: dict[str, object], target_dir: Path) -> dict[str, object]:
bundle_dir = target_dir / f"bundle_{bundle['offset']:08X}"
bundle_dir.mkdir(parents=True, exist_ok=True)
frame_rows: list[dict[str, object]] = []
for frame in bundle["frames"]:
raw_path = bundle_dir / f"frame_{frame['index']:03d}.bin"
raw_path.write_bytes(frame["pixels"])
image = decode_indexed_pixels(frame["pixels"], frame["width"], frame["height"], bundle["mode"])
write_pgm(bundle_dir / f"frame_{frame['index']:03d}.pgm", image, frame["width"], frame["height"])
write_png_grayscale(bundle_dir / f"frame_{frame['index']:03d}.png", image, frame["width"], frame["height"])
frame_rows.append(
{
"index": frame["index"],
"flags": frame["flags"],
"width": frame["width"],
"height": frame["height"],
"origin_x": frame["origin_x"],
"origin_y": frame["origin_y"],
"data_start": frame["data_start"],
"consumed": frame["consumed"],
}
)
metadata = {
"offset": bundle["offset"],
"mode": bundle["mode"],
"palette_index": bundle["palette_index"],
"frame_count": bundle["frame_count"],
"data_offset": bundle["data_offset"],
"exported_frames": frame_rows,
}
(bundle_dir / "bundle.json").write_text(json.dumps(metadata, indent=2), encoding="ascii")
return metadata
def export_colored_sprite_bundle(
bundle: dict[str, object],
target_dir: Path,
palettes_16: list[list[int]],
palettes_256: list[list[int]],
) -> dict[str, object] | None:
palette_index = bundle.get("palette_index")
if bundle["mode"] == 2:
if palette_index is None or palette_index >= len(palettes_16):
palette_index = choose_palette(palettes_16, bundle["frames"], bundle["mode"])
if palette_index is None:
return None
palette = palettes_16[palette_index]
elif bundle["mode"] == 1:
if palette_index is None or palette_index >= len(palettes_256):
if not palettes_256:
return None
palette_index = 0
palette = palettes_256[palette_index]
else:
return None
bundle_dir = target_dir / f"bundle_{bundle['offset']:08X}"
bundle_dir.mkdir(parents=True, exist_ok=True)
exported_frames: list[dict[str, object]] = []
exported_frames_flip: list[dict[str, object]] = []
for frame in bundle["frames"]:
rgba = colorize_indexed_pixels(frame["pixels"], frame["width"], frame["height"], bundle["mode"], palette)
write_png_rgba(bundle_dir / f"frame_{frame['index']:03d}_color.png", rgba, frame["width"], frame["height"])
rgba_flip = colorize_indexed_pixels(
frame["pixels"],
frame["width"],
frame["height"],
bundle["mode"],
palette,
high_nibble_first=True,
)
write_png_rgba(
bundle_dir / f"frame_{frame['index']:03d}_color_flip.png",
rgba_flip,
frame["width"],
frame["height"],
)
frame_copy = dict(frame)
frame_copy["rgba"] = rgba
exported_frames.append(frame_copy)
frame_copy_flip = dict(frame)
frame_copy_flip["rgba"] = rgba_flip
exported_frames_flip.append(frame_copy_flip)
write_bundle_atlas(bundle_dir / "atlas_color.png", exported_frames)
write_bundle_atlas(bundle_dir / "atlas_color_flip.png", exported_frames_flip)
if exported_frames and bundle["mode"] == 2:
write_palette_row_sweep(
bundle_dir / "palette_row_sweep.png",
exported_frames[0],
palettes_16,
palette_index,
bundle["mode"],
)
metadata = {
"palette_index": palette_index,
"palette_preview": [f"0x{value:04X}" for value in palette[:16]],
"frame_count": len(exported_frames),
}
(bundle_dir / "palette.json").write_text(json.dumps(metadata, indent=2), encoding="ascii")
return metadata
def annotate_region_tim_counts(
regions: list[dict[str, int]], tim_hits: list[dict[str, int]]
) -> None:
for region in regions:
start = region["offset"]
end = start + region["size"]
region["tim_count"] = sum(1 for hit in tim_hits if start <= hit["offset"] < end)
def parse_lset_wdl(data: bytes) -> dict[str, object] | None:
if len(data) < 0x34:
return None
header_size = u32(data, 0)
if header_size != 0x34 or header_size > len(data):
return None
header_words = [u32(data, offset) for offset in range(0, header_size, 4)]
audio_size = header_words[1]
post_audio_start = header_size + audio_size
high_boundaries = sorted(
{
value
for value in header_words[2:]
if post_audio_start <= value < len(data)
}
)
regions: list[dict[str, int | str]] = []
if audio_size and post_audio_start <= len(data):
regions.append(
{
"name": "audio_or_spu_blob",
"offset": header_size,
"size": audio_size,
}
)
boundaries = [post_audio_start] + high_boundaries + [len(data)]
for index in range(len(boundaries) - 1):
start = boundaries[index]
end = boundaries[index + 1]
if end <= start:
continue
regions.append(
{
"name": f"post_audio_region_{index:02d}",
"offset": start,
"size": end - start,
}
)
tim_hits = scan_tims(data)
annotate_region_tim_counts(regions, tim_hits)
return {
"kind": "lset",
"header_size": header_size,
"header_words": header_words,
"audio_size": audio_size,
"post_audio_start": post_audio_start,
"high_offset_boundaries": high_boundaries,
"regions": regions,
"tim_hits": tim_hits,
}
def parse_contiguous_section_wdl(data: bytes) -> dict[str, object] | None:
header_size = 0x38
if len(data) < header_size:
return None
section_sizes = [u32(data, offset) for offset in range(0, header_size, 4)]
cursor = header_size
regions: list[dict[str, int | str]] = []
for index, size in enumerate(section_sizes):
if size == 0:
regions.append({"name": f"section_{index:02d}", "offset": cursor, "size": 0})
continue
if cursor + size > len(data):
return None
regions.append({"name": f"section_{index:02d}", "offset": cursor, "size": size})
cursor += size
tim_hits = scan_tims(data)
annotate_region_tim_counts(regions, tim_hits)
return {
"kind": "contiguous_sections",
"header_size": header_size,
"section_sizes": section_sizes,
"regions": regions,
"tim_hits": tim_hits,
"trailing_bytes": len(data) - cursor,
}
def parse_raw_blob_wdl(data: bytes) -> dict[str, object]:
tim_hits = scan_tims(data)
regions: list[dict[str, int | str]] = [
{
"name": "raw_blob",
"offset": 0,
"size": len(data),
}
]
annotate_region_tim_counts(regions, tim_hits)
return {
"kind": "raw_blob",
"regions": regions,
"tim_hits": tim_hits,
}
def summarize(path: Path, summary: dict[str, object]) -> str:
lines = [f"file: {path}", f"kind: {summary['kind']}"]
if summary["kind"] == "lset":
lines.append(f"header_size: 0x{summary['header_size']:X}")
lines.append(f"audio_size: 0x{summary['audio_size']:X}")
lines.append(f"post_audio_start: 0x{summary['post_audio_start']:X}")
lines.append(
"high_offset_boundaries: "
+ ", ".join(f"0x{value:X}" for value in summary["high_offset_boundaries"])
)
elif summary["kind"] == "contiguous_sections":
lines.append(f"header_size: 0x{summary['header_size']:X}")
lines.append(f"trailing_bytes: 0x{summary['trailing_bytes']:X}")
lines.append("regions:")
for region in summary["regions"]:
tim_count = region.get("tim_count", 0)
lines.append(
" "
+ f"{region['name']}: offset=0x{region['offset']:X} size=0x{region['size']:X} tims={tim_count}"
)
lines.append("tim_hits:")
for hit in summary["tim_hits"]:
lines.append(
" "
+ f"offset=0x{hit['offset']:X} size=0x{hit['size']:X} flags=0x{hit['flags']:X}"
)
if len(summary["tim_hits"]) == 0:
lines.append(" none")
sprite_bundles = summary.get("sprite_bundles", [])
lines.append(f"sprite_bundles: {len(sprite_bundles)}")
for bundle in sprite_bundles[:6]:
lines.append(
" "
+ f"offset=0x{bundle['offset']:X} mode={bundle['mode']} frames={bundle['frame_count']} data_offset=0x{bundle['data_offset']:X}"
)
return "\n".join(lines)
def extract(path: Path, output_dir: Path) -> dict[str, object]:
data = path.read_bytes()
summary = parse_lset_wdl(data)
if summary is None:
summary = parse_contiguous_section_wdl(data)
if summary is None:
summary = parse_raw_blob_wdl(data)
target_dir = output_dir / path.stem
target_dir.mkdir(parents=True, exist_ok=True)
# The runtime loads the first 0x3520 bytes of each LSET/WDL directly into VRAM
# as a 0x88 x 0x32 halfword image block before reading later stream sections.
if len(data) >= 0x3520:
write_psx_16bpp_png(target_dir / "front_vram_page_16bpp.png", data[:0x3520], 0x88, 0x32)
for region in summary["regions"]:
start = region["offset"]
end = start + region["size"]
if end <= start:
continue
filename = f"{region['name']}_{start:08X}.bin"
region_path = target_dir / filename
region_bytes = data[start:end]
write_blob(region_path, region_bytes)
if summary["kind"] == "lset" and region["name"] == "post_audio_region_00":
write_u16x6_csv(target_dir / f"{region['name']}_{start:08X}_u16x6.csv", region_bytes)
write_u16x12_csv(target_dir / f"{region['name']}_{start:08X}_u16x12.csv", region_bytes)
write_u32x5_csv(target_dir / f"{region['name']}_{start:08X}_u32x5.csv", region_bytes)
write_lset_stream_probe_json(target_dir / f"{region['name']}_{start:08X}_stream_probe.json", region_bytes)
if summary["kind"] == "lset" and region["name"] == "post_audio_region_01":
write_u16x6_csv(target_dir / f"{region['name']}_{start:08X}_u16x6.csv", region_bytes)
write_u16x12_csv(target_dir / f"{region['name']}_{start:08X}_u16x12.csv", region_bytes)
write_paired_u16x6_json(target_dir / f"{region['name']}_{start:08X}_paired_u16x6.json", region_bytes)
if summary["kind"] == "lset" and region["name"] == "post_audio_region_02":
write_u16x12_csv(target_dir / f"{region['name']}_{start:08X}_u16x12.csv", region_bytes)
if summary["kind"] == "lset" and region["name"] == "post_audio_region_03":
write_u16x12_csv(target_dir / f"{region['name']}_{start:08X}_u16x12.csv", region_bytes)
palettes = extract_palette_sets(data, summary)
palettes_256 = extract_palette_blocks_256(data, summary)
sprite_bundles: list[dict[str, object]] = []
if summary["kind"] == "lset":
graphics_region = next(
(region for region in summary["regions"] if region["name"] == "post_audio_region_04"),
None,
)
if graphics_region is not None:
region_data = data[graphics_region["offset"] : graphics_region["offset"] + graphics_region["size"]]
for bundle in scan_sprite_bundles(region_data, max_candidates=160):
exported = export_sprite_bundle(bundle, target_dir / "sprite_bundles")
color_metadata = export_colored_sprite_bundle(
bundle,
target_dir / "sprite_bundles",
palettes,
palettes_256,
)
exported["offset"] += graphics_region["offset"]
if color_metadata is not None:
exported["color_palette"] = color_metadata
sprite_bundles.append(exported)
if sprite_bundles:
sprite_bundles.sort(
key=lambda bundle: (
bundle["exported_frames"][0]["width"] * bundle["exported_frames"][0]["height"]
if bundle["exported_frames"]
else 0,
bundle["frame_count"],
),
reverse=True,
)
summary["sprite_bundles"] = sprite_bundles
overview_entries_gray: list[dict[str, object]] = []
overview_entries_color: list[dict[str, object]] = []
overview_entries_color_flip: list[dict[str, object]] = []
for bundle in sprite_bundles[:48]:
bundle_dir = target_dir / "sprite_bundles" / f"bundle_{bundle['offset'] - graphics_region['offset']:08X}"
if bundle["exported_frames"]:
frame = bundle["exported_frames"][0]
gray_png = bundle_dir / "frame_000.png"
color_png = bundle_dir / "frame_000_color.png"
raw_gray = decode_indexed_pixels(
(bundle_dir / "frame_000.bin").read_bytes(),
frame["width"],
frame["height"],
bundle["mode"],
)
rgba_gray = bytearray()
for value in raw_gray:
rgba_gray.extend((value, value, value, 255 if value != 0 else 0))
overview_entries_gray.append(
{"width": frame["width"], "height": frame["height"], "rgba": bytes(rgba_gray)}
)
if color_png.exists():
# Rebuild from the already-exported color png source data is unnecessary here;
# instead reuse the colorized frame by regenerating it from the saved metadata path.
palette_json = bundle.get("color_palette")
if palette_json is not None:
palette_index = palette_json["palette_index"]
if bundle["mode"] == 1:
if palette_index >= len(palettes_256):
continue
palette = palettes_256[palette_index]
else:
if palette_index >= len(palettes):
continue
palette = palettes[palette_index]
raw_frame = (bundle_dir / "frame_000.bin").read_bytes()
rgba_color = colorize_indexed_pixels(
raw_frame,
frame["width"],
frame["height"],
bundle["mode"],
palette,
)
overview_entries_color.append(
{"width": frame["width"], "height": frame["height"], "rgba": rgba_color}
)
if bundle["mode"] == 2:
rgba_color_flip = colorize_indexed_pixels(
raw_frame,
frame["width"],
frame["height"],
bundle["mode"],
palette,
high_nibble_first=True,
)
overview_entries_color_flip.append(
{"width": frame["width"], "height": frame["height"], "rgba": rgba_color_flip}
)
if overview_entries_gray:
write_overview_grid(target_dir / "sprite_bundles" / "overview_gray.png", overview_entries_gray)
if overview_entries_color:
write_overview_grid(target_dir / "sprite_bundles" / "overview_color.png", overview_entries_color)
if overview_entries_color_flip:
write_overview_grid(target_dir / "sprite_bundles" / "overview_color_flip.png", overview_entries_color_flip)
metadata_path = target_dir / "summary.json"
metadata_path.write_text(json.dumps(summary, indent=2), encoding="ascii")
tim_dir = target_dir / "tims"
for index, hit in enumerate(summary["tim_hits"]):
start = hit["offset"]
end = start + hit["size"]
write_blob(tim_dir / f"tim_{index:03d}_{start:08X}.tim", data[start:end])
return summary
def main() -> int:
parser = argparse.ArgumentParser(description="Inspect and carve Crusader PSX WDL files.")
parser.add_argument("input", type=Path, help="Path to a .WDL file")
parser.add_argument(
"--output",
type=Path,
default=Path("out") / "psx_wdl",
help="Directory where carved blocks and metadata are written",
)
args = parser.parse_args()
summary = extract(args.input, args.output)
print(summarize(args.input, summary))
return 0
if __name__ == "__main__":
raise SystemExit(main())