PSX Research

This commit is contained in:
Marco 2026-04-07 17:16:44 +02:00
commit 94c49ac5bd
16 changed files with 2052 additions and 16 deletions

View file

@ -1332,6 +1332,64 @@ def parse_field_tags(body: bytes, start: int) -> FieldTagParseResult | None:
return FieldTagParseResult(field_tags=field_tags, end_offset=end_offset, trailing_bytes=body[end_offset:])
def classify_post_ret_metadata(body: bytes, ops: list[dict[str, Any]]) -> dict[str, Any] | None:
last_ret_index = next((index for index in range(len(ops) - 1, -1, -1) if ops[index]["mnemonic"] == "ret"), None)
if last_ret_index is None:
return None
ret_end = ops[last_ret_index]["offset"] + (len(ops[last_ret_index]["raw_bytes"]) // 2)
if len(body) - ret_end <= 1:
return None
debug_result = parse_debug_symbols(body, ret_end)
if debug_result is not None and debug_result.end_offset == len(body):
return {
"ops": ops[:last_ret_index + 1],
"end_reason": "debug_symbols_then_end",
"unknown_tail": debug_result.trailing_bytes,
"debug_symbol_offset": ret_end,
"debug_symbols": [
{
"index": symbol.index,
"unknown1": symbol.unknown1,
"type_id": symbol.type_id,
"type_char": symbol.type_char,
"bp_offset": symbol.bp_offset,
"bp_repr": symbol.bp_repr,
"unknown3": symbol.unknown3,
"name": symbol.name,
}
for symbol in debug_result.debug_symbols
],
"field_tags": [],
"end_offset": debug_result.end_offset,
}
field_tag_result = parse_field_tags(body, ret_end)
if field_tag_result is not None and field_tag_result.end_offset == len(body):
return {
"ops": ops[:last_ret_index + 1],
"end_reason": "field_tags_then_end",
"unknown_tail": field_tag_result.trailing_bytes,
"debug_symbol_offset": None,
"debug_symbols": [],
"field_tags": [
{
"tag_id": tag.tag_id,
"bp_offset": tag.bp_offset,
"bp_repr": bp_repr(tag.bp_offset),
"value_kind": tag.value_kind,
"name": tag.name,
"tag_label": f"{tag.tag_id:02X}:{tag.bp_offset:02X}{tag.value_kind:02X}->{tag.name}",
}
for tag in field_tag_result.field_tags
],
"end_offset": field_tag_result.end_offset,
}
return None
def parse_body_ir(
event_row: dict[str, str],
layout_row: dict[str, str],
@ -1390,6 +1448,16 @@ def parse_body_ir(
and 0 <= operands["symbol_offset"] < len(body)
}
)
post_ret_metadata = classify_post_ret_metadata(body, ops)
if post_ret_metadata is not None:
ops = post_ret_metadata["ops"]
debug_symbol_offset = post_ret_metadata["debug_symbol_offset"]
debug_symbols = post_ret_metadata["debug_symbols"]
field_tags = post_ret_metadata["field_tags"]
end_reason = post_ret_metadata["end_reason"]
unknown_tail = post_ret_metadata["unknown_tail"]
offset = post_ret_metadata["end_offset"]
last_ret_index = next((index for index in range(len(ops) - 1, -1, -1) if ops[index]["mnemonic"] == "ret"), None)
if end_reason == "unknown_opcode" and last_ret_index is not None:
ret_end = ops[last_ret_index]["offset"] + (len(ops[last_ret_index]["raw_bytes"]) // 2)
@ -2973,6 +3041,17 @@ def render_pseudocode(ir: dict[str, Any], shape_catalog: ShapeCatalog | None = N
else:
lines.extend(render_partially_structured_blocks(rendered_blocks))
if ir["debug_symbols"] or ir["field_tags"]:
lines.append("")
lines.append(" /* post-return metadata (not executable):")
for symbol in ir["debug_symbols"]:
lines.append(
f" debug_symbol {sanitize_identifier(symbol['name'])} {symbol['bp_repr']} type=0x{symbol['type_id']:02X} unk1=0x{symbol['unknown1']:02X} unk3=0x{symbol['unknown3']:02X}"
)
for tag in ir["field_tags"]:
lines.append(f" field_tag {tag['tag_label']} ({tag['bp_repr']})")
lines.append(" */")
lines.append("}")
return apply_shape_catalog_to_pseudocode("\n".join(lines) + "\n", shape_catalog)

View file

@ -0,0 +1,76 @@
from __future__ import annotations
import argparse
import json
from pathlib import Path
from psx_extract_wdl import extract, summarize
def find_wdl_files(root: Path) -> list[Path]:
return sorted(
path
for path in root.rglob("*")
if path.is_file() and path.suffix.lower() == ".wdl"
)
def build_index_entry(root: Path, path: Path, summary: dict[str, object]) -> dict[str, object]:
relative_path = path.relative_to(root)
sprite_bundles = summary.get("sprite_bundles", [])
return {
"source": relative_path.as_posix(),
"stem": path.stem,
"kind": summary["kind"],
"region_count": len(summary.get("regions", [])),
"tim_count": len(summary.get("tim_hits", [])),
"sprite_bundle_count": len(sprite_bundles),
}
def run_batch(input_root: Path, output_root: Path) -> list[dict[str, object]]:
files = find_wdl_files(input_root)
if not files:
raise SystemExit(f"no .WDL files found under {input_root}")
index: list[dict[str, object]] = []
for path in files:
relative_parent = path.relative_to(input_root).parent
target_root = output_root / relative_parent
summary = extract(path, target_root)
log_path = target_root / path.stem / "summary.txt"
log_path.write_text(summarize(path, summary) + "\n", encoding="ascii")
index.append(build_index_entry(input_root, path, summary))
index_path = output_root / "index.json"
index_path.parent.mkdir(parents=True, exist_ok=True)
index_path.write_text(json.dumps(index, indent=2), encoding="ascii")
return index
def main() -> int:
parser = argparse.ArgumentParser(description="Extract all Crusader PSX WDL files under a directory tree.")
parser.add_argument("input_root", type=Path, help="Root directory to scan for .WDL files")
parser.add_argument(
"--output",
type=Path,
default=Path("out") / "psx_wdl_disc",
help="Directory where extracted per-file outputs are written",
)
args = parser.parse_args()
index = run_batch(args.input_root, args.output)
print(f"input_root: {args.input_root}")
print(f"output_root: {args.output}")
print(f"wdl_files: {len(index)}")
for entry in index:
print(
f" {entry['source']}: kind={entry['kind']} tims={entry['tim_count']} "
f"sprite_bundles={entry['sprite_bundle_count']}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -3,9 +3,11 @@ from __future__ import annotations
import unittest
from tools.poc_crusader_usecode_parser import (
classify_post_ret_metadata,
format_target_event_reference,
get_intrinsic_hints,
intrinsic_display_name,
render_pseudocode,
render_partially_structured_blocks,
render_structured_pseudocode,
try_decode_loop_selector,
@ -14,6 +16,44 @@ from tools.poc_crusader_usecode_parser import (
class UsecodeStructuringTests(unittest.TestCase):
def test_post_ret_debug_symbols_are_classified_as_metadata(self) -> None:
body = bytes([0x50, 0x01, 0x01, 0x69, 0x00, 0x00]) + b"referent\x00" + bytes([0x7A])
ops = [{"mnemonic": "ret", "offset": 0, "raw_bytes": "50", "operands": {}}]
metadata = classify_post_ret_metadata(body, ops)
self.assertIsNotNone(metadata)
self.assertEqual(metadata["end_reason"], "debug_symbols_then_end")
self.assertEqual(metadata["debug_symbol_offset"], 1)
self.assertEqual(len(metadata["debug_symbols"]), 1)
self.assertEqual(metadata["debug_symbols"][0]["name"], "referent")
def test_render_pseudocode_includes_post_ret_metadata_comment(self) -> None:
ir = {
"class": {"class_name": "JELYHACK", "entry_index": 277, "class_id": 0x04D3},
"event": {"event_name_hint": "use", "slot": 0x01},
"body": {"end_reason": "debug_symbols_then_end", "decoded_op_count": 1},
"ops": [{"mnemonic": "ret", "offset": 0, "absolute_body_offset": 0, "raw_bytes": "50", "operands": {}}],
"debug_symbols": [
{
"index": 0,
"unknown1": 0x01,
"type_id": 0x69,
"type_char": "i",
"bp_offset": 0x00,
"bp_repr": "[BP+00h]",
"unknown3": 0x00,
"name": "referent",
}
],
"field_tags": [],
}
rendered = render_pseudocode(ir)
self.assertIn("post-return metadata (not executable)", rendered)
self.assertIn("debug_symbol referent [BP+00h] type=0x69", rendered)
def test_alarmbox_style_forward_flow_renders_without_block_labels(self) -> None:
blocks = [
("entry", ["set_info(0x0211, *(arg_06));", "process_exclude();", "if var goto block_0330;"]),