Crusader_Decomp/tools/export_usecode_pseudocode.py

194 lines
7.2 KiB
Python
Raw Normal View History

2026-03-25 23:32:36 +01:00
from __future__ import annotations
import argparse
import csv
import re
import sys
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
from tools.poc_crusader_usecode_parser import (
EXTRACTED_ROOT,
default_shape_catalog_path,
load_shape_catalog,
parse_body_ir,
render_pseudocode,
)
2026-03-25 23:32:36 +01:00
2026-03-26 00:37:17 +01:00
def load_rows(class_event_index: Path) -> list[dict[str, str]]:
with class_event_index.open("r", encoding="utf-8", newline="") as handle:
2026-03-25 23:32:36 +01:00
return list(csv.DictReader(handle, delimiter="\t"))
2026-03-26 00:37:17 +01:00
def load_layout_by_entry(class_layout_index: Path) -> dict[int, dict[str, str]]:
with class_layout_index.open("r", encoding="utf-8", newline="") as handle:
2026-03-25 23:32:36 +01:00
rows = list(csv.DictReader(handle, delimiter="\t"))
layout_by_entry: dict[int, dict[str, str]] = {}
for row in rows:
try:
entry_index = parse_int(row["entry_index"])
except (KeyError, TypeError, ValueError):
continue
layout_by_entry[entry_index] = row
return layout_by_entry
def parse_int(value: str) -> int:
return int(value, 0)
def safe_name(value: str) -> str:
cleaned = re.sub(r"[^A-Za-z0-9_.-]+", "_", value.strip())
return cleaned.strip("._") or "unknown"
def describe_row(row: dict[str, str]) -> str:
class_name = row.get("class_name_hint") or "unknown"
slot = parse_int(row.get("slot", "0"))
event_name = row.get("event_name_hint") or f"slot_{slot:02X}"
entry_index = row.get("entry_index", "?")
return f"entry {entry_index} {class_name}::${event_name}".replace("::$", "::") + f" (slot 0x{slot:02X})"
2026-03-25 23:32:36 +01:00
def output_path_for_row(output_root: Path, row: dict[str, str]) -> Path:
class_name = row["class_name_hint"]
slot = parse_int(row["slot"])
event_name = row.get("event_name_hint") or f"slot_{slot:02X}"
class_dir = output_root / safe_name(class_name)
filename = f"slot_{slot:02X}_{safe_name(event_name)}.txt"
return class_dir / filename
def build_index_row(output_root: Path, row: dict[str, str], path: Path, ir: dict[str, object]) -> dict[str, str]:
return {
"entry_index": row["entry_index"],
"class_name": row["class_name_hint"],
"slot": row["slot"],
"event_name_hint": row.get("event_name_hint", ""),
"derived_body_start": row["derived_body_start"],
"derived_body_end": row["derived_body_end"],
"derived_body_length": row["derived_body_length"],
"decoded_op_count": str(ir["body"]["decoded_op_count"]),
"end_reason": str(ir["body"]["end_reason"]),
"debug_symbol_count": str(ir["body"]["debug_symbol_count"]),
"pseudocode_path": path.relative_to(output_root.parent).as_posix(),
}
def write_index(output_root: Path, index_rows: list[dict[str, str]]) -> None:
index_path = output_root / "index.tsv"
fieldnames = [
"entry_index",
"class_name",
"slot",
"event_name_hint",
"derived_body_start",
"derived_body_end",
"derived_body_length",
"decoded_op_count",
"end_reason",
"debug_symbol_count",
"pseudocode_path",
]
with index_path.open("w", encoding="utf-8", newline="") as handle:
writer = csv.DictWriter(handle, fieldnames=fieldnames, delimiter="\t")
writer.writeheader()
writer.writerows(index_rows)
def write_readme(output_root: Path, export_count: int) -> None:
readme_path = output_root / "README.md"
readme_path.write_text(
"# USECODE pseudocode export\n\n"
f"Generated pseudocode files: {export_count}\n\n"
"This folder is produced by tools/export_usecode_pseudocode.py using the current\n"
"pseudocode renderer in tools/poc_crusader_usecode_parser.py.\n\n"
"- Each class gets its own subfolder.\n"
"- Each non-empty decoded slot body is emitted as one text file.\n"
"- index.tsv records the body range, op count, end reason, and file path.\n",
encoding="utf-8",
)
def main() -> None:
parser = argparse.ArgumentParser(description="Export pseudocode for all decoded Crusader USECODE bodies")
2026-03-26 00:37:17 +01:00
parser.add_argument(
"--extracted-root",
default=str(EXTRACTED_ROOT),
help="Extracted USECODE root containing class_event_index.tsv and chunks/",
)
2026-03-25 23:32:36 +01:00
parser.add_argument(
"--output-dir",
2026-03-26 00:37:17 +01:00
help="Output directory for pseudocode files (default: <extracted-root>/pseudocode)",
)
parser.add_argument(
"--variant",
choices=["auto", "regret", "remorse"],
default="auto",
help="Crusader intrinsic numbering to apply during export (default: auto, fallback regret)",
2026-03-25 23:32:36 +01:00
)
parser.add_argument(
"--shape-csv",
help=(
"Shape catalog CSV to apply to pseudocode output "
"(default: Remorse uses <extracted-root>/usecode_shape_catalog_remorse.csv; "
"Regret uses <extracted-root>/usecode_shape_catalog_regret.csv)"
),
)
2026-03-25 23:32:36 +01:00
args = parser.parse_args()
2026-03-26 00:37:17 +01:00
extracted_root = Path(args.extracted_root)
class_event_index = extracted_root / "class_event_index.tsv"
class_layout_index = extracted_root / "class_layout_index.tsv"
output_root = Path(args.output_dir) if args.output_dir else extracted_root / "pseudocode"
shape_csv = Path(args.shape_csv) if args.shape_csv else default_shape_catalog_path(extracted_root, args.variant)
shape_catalog = load_shape_catalog(shape_csv)
2026-03-25 23:32:36 +01:00
output_root.mkdir(parents=True, exist_ok=True)
2026-03-26 00:37:17 +01:00
rows = load_rows(class_event_index)
work_rows = [row for row in rows if row.get("derived_body_start") and row.get("derived_body_end")]
2026-03-26 00:37:17 +01:00
layout_by_entry = load_layout_by_entry(class_layout_index)
2026-03-25 23:32:36 +01:00
index_rows: list[dict[str, str]] = []
exported = 0
print(
f"Exporting pseudocode from {extracted_root} to {output_root} using variant={args.variant} and shape_csv={shape_csv}",
flush=True,
)
2026-03-25 23:32:36 +01:00
for position, row in enumerate(work_rows, start=1):
2026-03-25 23:32:36 +01:00
entry_index = parse_int(row["entry_index"])
layout_row = layout_by_entry.get(entry_index)
if layout_row is None:
print(
f"[{position}/{len(work_rows)}] Skipping {describe_row(row)} because no layout row was found",
flush=True,
)
2026-03-25 23:32:36 +01:00
continue
label = describe_row(row)
print(f"[{position}/{len(work_rows)}] Decoding {label}", flush=True)
2026-03-26 00:37:17 +01:00
ir = parse_body_ir(row, layout_row, None if args.variant == "auto" else args.variant, extracted_root)
print(f"[{position}/{len(work_rows)}] Rendering {label}", flush=True)
pseudocode = render_pseudocode(ir, shape_catalog=shape_catalog)
2026-03-25 23:32:36 +01:00
path = output_path_for_row(output_root, row)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(pseudocode, encoding="utf-8")
index_rows.append(build_index_row(output_root, row, path, ir))
exported += 1
print(f"[{position}/{len(work_rows)}] Wrote {path.relative_to(output_root.parent).as_posix()}", flush=True)
2026-03-25 23:32:36 +01:00
write_index(output_root, index_rows)
write_readme(output_root, exported)
print(f"Exported {exported} pseudocode files to {output_root}")
if __name__ == "__main__":
main()