Crusader_Decomp/tools/export_usecode_pseudocode.py

from __future__ import annotations

import argparse
import csv
import re
import sys
from pathlib import Path


REPO_ROOT = Path(__file__).resolve().parents[1]
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))


from tools.poc_crusader_usecode_parser import (
    EXTRACTED_ROOT,
    default_shape_catalog_path,
    load_shape_catalog,
    parse_body_ir,
    render_pseudocode,
)


def load_rows(class_event_index: Path) -> list[dict[str, str]]:
    with class_event_index.open("r", encoding="utf-8", newline="") as handle:
        return list(csv.DictReader(handle, delimiter="\t"))


def load_layout_by_entry(class_layout_index: Path) -> dict[int, dict[str, str]]:
    with class_layout_index.open("r", encoding="utf-8", newline="") as handle:
        rows = list(csv.DictReader(handle, delimiter="\t"))
    layout_by_entry: dict[int, dict[str, str]] = {}
    for row in rows:
        try:
            entry_index = parse_int(row["entry_index"])
        except (KeyError, TypeError, ValueError):
            continue
        layout_by_entry[entry_index] = row
    return layout_by_entry


def parse_int(value: str) -> int:
    return int(value, 0)


def safe_name(value: str) -> str:
    cleaned = re.sub(r"[^A-Za-z0-9_.-]+", "_", value.strip())
    return cleaned.strip("._") or "unknown"


def describe_row(row: dict[str, str]) -> str:
    class_name = row.get("class_name_hint") or "unknown"
    slot = parse_int(row.get("slot", "0"))
    event_name = row.get("event_name_hint") or f"slot_{slot:02X}"
    entry_index = row.get("entry_index", "?")
    return f"entry {entry_index} {class_name}::${event_name}".replace("::$", "::") + f" (slot 0x{slot:02X})"


def output_path_for_row(output_root: Path, row: dict[str, str]) -> Path:
    class_name = row["class_name_hint"]
    slot = parse_int(row["slot"])
    event_name = row.get("event_name_hint") or f"slot_{slot:02X}"
    class_dir = output_root / safe_name(class_name)
    filename = f"slot_{slot:02X}_{safe_name(event_name)}.txt"
    return class_dir / filename


def build_index_row(output_root: Path, row: dict[str, str], path: Path, ir: dict[str, object]) -> dict[str, str]:
    return {
        "entry_index": row["entry_index"],
        "class_name": row["class_name_hint"],
        "slot": row["slot"],
        "event_name_hint": row.get("event_name_hint", ""),
        "derived_body_start": row["derived_body_start"],
        "derived_body_end": row["derived_body_end"],
        "derived_body_length": row["derived_body_length"],
        "decoded_op_count": str(ir["body"]["decoded_op_count"]),
        "end_reason": str(ir["body"]["end_reason"]),
        "debug_symbol_count": str(ir["body"]["debug_symbol_count"]),
        "pseudocode_path": path.relative_to(output_root.parent).as_posix(),
    }


def write_index(output_root: Path, index_rows: list[dict[str, str]]) -> None:
    index_path = output_root / "index.tsv"
    fieldnames = [
        "entry_index",
        "class_name",
        "slot",
        "event_name_hint",
        "derived_body_start",
        "derived_body_end",
        "derived_body_length",
        "decoded_op_count",
        "end_reason",
        "debug_symbol_count",
        "pseudocode_path",
    ]
    with index_path.open("w", encoding="utf-8", newline="") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames, delimiter="\t")
        writer.writeheader()
        writer.writerows(index_rows)


def write_readme(output_root: Path, export_count: int) -> None:
    readme_path = output_root / "README.md"
    readme_path.write_text(
        "# USECODE pseudocode export\n\n"
        f"Generated pseudocode files: {export_count}\n\n"
        "This folder is produced by tools/export_usecode_pseudocode.py using the current\n"
        "pseudocode renderer in tools/poc_crusader_usecode_parser.py.\n\n"
        "- Each class gets its own subfolder.\n"
        "- Each non-empty decoded slot body is emitted as one text file.\n"
        "- index.tsv records the body range, op count, end reason, and file path.\n",
        encoding="utf-8",
    )


def main() -> None:
    parser = argparse.ArgumentParser(description="Export pseudocode for all decoded Crusader USECODE bodies")
    parser.add_argument(
        "--extracted-root",
        default=str(EXTRACTED_ROOT),
        help="Extracted USECODE root containing class_event_index.tsv and chunks/",
    )
    parser.add_argument(
        "--output-dir",
        help="Output directory for pseudocode files (default: <extracted-root>/pseudocode)",
    )
    parser.add_argument(
        "--variant",
        choices=["auto", "regret", "remorse"],
        default="auto",
        help="Crusader intrinsic numbering to apply during export (default: auto, fallback regret)",
    )
    parser.add_argument(
        "--shape-csv",
        help=(
            "Shape catalog CSV to apply to pseudocode output "
            "(default: Remorse uses <extracted-root>/usecode_shape_catalog_remorse.csv; "
            "Regret uses <extracted-root>/usecode_shape_catalog_regret.csv)"
        ),
    )
    args = parser.parse_args()

    extracted_root = Path(args.extracted_root)
    class_event_index = extracted_root / "class_event_index.tsv"
    class_layout_index = extracted_root / "class_layout_index.tsv"
    output_root = Path(args.output_dir) if args.output_dir else extracted_root / "pseudocode"
    shape_csv = Path(args.shape_csv) if args.shape_csv else default_shape_catalog_path(extracted_root, args.variant)
    shape_catalog = load_shape_catalog(shape_csv)
    output_root.mkdir(parents=True, exist_ok=True)

    rows = load_rows(class_event_index)
    work_rows = [row for row in rows if row.get("derived_body_start") and row.get("derived_body_end")]
    layout_by_entry = load_layout_by_entry(class_layout_index)
    index_rows: list[dict[str, str]] = []
    exported = 0

    print(
        f"Exporting pseudocode from {extracted_root} to {output_root} using variant={args.variant} and shape_csv={shape_csv}",
        flush=True,
    )

    for position, row in enumerate(work_rows, start=1):
        entry_index = parse_int(row["entry_index"])
        layout_row = layout_by_entry.get(entry_index)
        if layout_row is None:
            print(
                f"[{position}/{len(work_rows)}] Skipping {describe_row(row)} because no layout row was found",
                flush=True,
            )
            continue

        label = describe_row(row)
        print(f"[{position}/{len(work_rows)}] Decoding {label}", flush=True)
        ir = parse_body_ir(row, layout_row, None if args.variant == "auto" else args.variant, extracted_root)
        print(f"[{position}/{len(work_rows)}] Rendering {label}", flush=True)
        pseudocode = render_pseudocode(ir, shape_catalog=shape_catalog)

        path = output_path_for_row(output_root, row)
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(pseudocode, encoding="utf-8")
        index_rows.append(build_index_row(output_root, row, path, ir))
        exported += 1
        print(f"[{position}/{len(work_rows)}] Wrote {path.relative_to(output_root.parent).as_posix()}", flush=True)

    write_index(output_root, index_rows)
    write_readme(output_root, exported)
    print(f"Exported {exported} pseudocode files to {output_root}")


if __name__ == "__main__":
    main()
Stuff 2026-03-25 23:32:36 +01:00			`from __future__ import annotations`

			`import argparse`
			`import csv`
			`import re`
			`import sys`
			`from pathlib import Path`


			`REPO_ROOT = Path(__file__).resolve().parents[1]`
			`if str(REPO_ROOT) not in sys.path:`
			`sys.path.insert(0, str(REPO_ROOT))`


Pseudocode decompialtion improvements and docs 2026-03-26 22:10:48 +01:00			`from tools.poc_crusader_usecode_parser import (`
			`EXTRACTED_ROOT,`
			`default_shape_catalog_path,`
			`load_shape_catalog,`
			`parse_body_ir,`
			`render_pseudocode,`
			`)`
Stuff 2026-03-25 23:32:36 +01:00

Usecode pseudocode 2026-03-26 00:37:17 +01:00			`def load_rows(class_event_index: Path) -> list[dict[str, str]]:`
			`with class_event_index.open("r", encoding="utf-8", newline="") as handle:`
Stuff 2026-03-25 23:32:36 +01:00			`return list(csv.DictReader(handle, delimiter="\t"))`


Usecode pseudocode 2026-03-26 00:37:17 +01:00			`def load_layout_by_entry(class_layout_index: Path) -> dict[int, dict[str, str]]:`
			`with class_layout_index.open("r", encoding="utf-8", newline="") as handle:`
Stuff 2026-03-25 23:32:36 +01:00			`rows = list(csv.DictReader(handle, delimiter="\t"))`
			`layout_by_entry: dict[int, dict[str, str]] = {}`
			`for row in rows:`
			`try:`
			`entry_index = parse_int(row["entry_index"])`
			`except (KeyError, TypeError, ValueError):`
			`continue`
			`layout_by_entry[entry_index] = row`
			`return layout_by_entry`


			`def parse_int(value: str) -> int:`
			`return int(value, 0)`


			`def safe_name(value: str) -> str:`
			`cleaned = re.sub(r"[^A-Za-z0-9_.-]+", "_", value.strip())`
			`return cleaned.strip("._") or "unknown"`


Pseudocode decompialtion improvements and docs 2026-03-26 22:10:48 +01:00			`def describe_row(row: dict[str, str]) -> str:`
			`class_name = row.get("class_name_hint") or "unknown"`
			`slot = parse_int(row.get("slot", "0"))`
			`event_name = row.get("event_name_hint") or f"slot_{slot:02X}"`
			`entry_index = row.get("entry_index", "?")`
			`return f"entry {entry_index} {class_name}::${event_name}".replace("::$", "::") + f" (slot 0x{slot:02X})"`


Stuff 2026-03-25 23:32:36 +01:00			`def output_path_for_row(output_root: Path, row: dict[str, str]) -> Path:`
			`class_name = row["class_name_hint"]`
			`slot = parse_int(row["slot"])`
			`event_name = row.get("event_name_hint") or f"slot_{slot:02X}"`
			`class_dir = output_root / safe_name(class_name)`
			`filename = f"slot_{slot:02X}_{safe_name(event_name)}.txt"`
			`return class_dir / filename`


			`def build_index_row(output_root: Path, row: dict[str, str], path: Path, ir: dict[str, object]) -> dict[str, str]:`
			`return {`
			`"entry_index": row["entry_index"],`
			`"class_name": row["class_name_hint"],`
			`"slot": row["slot"],`
			`"event_name_hint": row.get("event_name_hint", ""),`
			`"derived_body_start": row["derived_body_start"],`
			`"derived_body_end": row["derived_body_end"],`
			`"derived_body_length": row["derived_body_length"],`
			`"decoded_op_count": str(ir["body"]["decoded_op_count"]),`
			`"end_reason": str(ir["body"]["end_reason"]),`
			`"debug_symbol_count": str(ir["body"]["debug_symbol_count"]),`
			`"pseudocode_path": path.relative_to(output_root.parent).as_posix(),`
			`}`


			`def write_index(output_root: Path, index_rows: list[dict[str, str]]) -> None:`
			`index_path = output_root / "index.tsv"`
			`fieldnames = [`
			`"entry_index",`
			`"class_name",`
			`"slot",`
			`"event_name_hint",`
			`"derived_body_start",`
			`"derived_body_end",`
			`"derived_body_length",`
			`"decoded_op_count",`
			`"end_reason",`
			`"debug_symbol_count",`
			`"pseudocode_path",`
			`]`
			`with index_path.open("w", encoding="utf-8", newline="") as handle:`
			`writer = csv.DictWriter(handle, fieldnames=fieldnames, delimiter="\t")`
			`writer.writeheader()`
			`writer.writerows(index_rows)`


			`def write_readme(output_root: Path, export_count: int) -> None:`
			`readme_path = output_root / "README.md"`
			`readme_path.write_text(`
			`"# USECODE pseudocode export\n\n"`
			`f"Generated pseudocode files: {export_count}\n\n"`
			`"This folder is produced by tools/export_usecode_pseudocode.py using the current\n"`
			`"pseudocode renderer in tools/poc_crusader_usecode_parser.py.\n\n"`
			`"- Each class gets its own subfolder.\n"`
			`"- Each non-empty decoded slot body is emitted as one text file.\n"`
			`"- index.tsv records the body range, op count, end reason, and file path.\n",`
			`encoding="utf-8",`
			`)`


			`def main() -> None:`
			`parser = argparse.ArgumentParser(description="Export pseudocode for all decoded Crusader USECODE bodies")`
Usecode pseudocode 2026-03-26 00:37:17 +01:00			`parser.add_argument(`
			`"--extracted-root",`
			`default=str(EXTRACTED_ROOT),`
			`help="Extracted USECODE root containing class_event_index.tsv and chunks/",`
			`)`
Stuff 2026-03-25 23:32:36 +01:00			`parser.add_argument(`
			`"--output-dir",`
Usecode pseudocode 2026-03-26 00:37:17 +01:00			`help="Output directory for pseudocode files (default: <extracted-root>/pseudocode)",`
			`)`
			`parser.add_argument(`
			`"--variant",`
			`choices=["auto", "regret", "remorse"],`
			`default="auto",`
			`help="Crusader intrinsic numbering to apply during export (default: auto, fallback regret)",`
Stuff 2026-03-25 23:32:36 +01:00			`)`
Pseudocode decompialtion improvements and docs 2026-03-26 22:10:48 +01:00			`parser.add_argument(`
			`"--shape-csv",`
			`help=(`
			`"Shape catalog CSV to apply to pseudocode output "`
			`"(default: Remorse uses <extracted-root>/usecode_shape_catalog_remorse.csv; "`
			`"Regret uses <extracted-root>/usecode_shape_catalog_regret.csv)"`
			`),`
			`)`
Stuff 2026-03-25 23:32:36 +01:00			`args = parser.parse_args()`

Usecode pseudocode 2026-03-26 00:37:17 +01:00			`extracted_root = Path(args.extracted_root)`
			`class_event_index = extracted_root / "class_event_index.tsv"`
			`class_layout_index = extracted_root / "class_layout_index.tsv"`
			`output_root = Path(args.output_dir) if args.output_dir else extracted_root / "pseudocode"`
Pseudocode decompialtion improvements and docs 2026-03-26 22:10:48 +01:00			`shape_csv = Path(args.shape_csv) if args.shape_csv else default_shape_catalog_path(extracted_root, args.variant)`
			`shape_catalog = load_shape_catalog(shape_csv)`
Stuff 2026-03-25 23:32:36 +01:00			`output_root.mkdir(parents=True, exist_ok=True)`

Usecode pseudocode 2026-03-26 00:37:17 +01:00			`rows = load_rows(class_event_index)`
Pseudocode decompialtion improvements and docs 2026-03-26 22:10:48 +01:00			`work_rows = [row for row in rows if row.get("derived_body_start") and row.get("derived_body_end")]`
Usecode pseudocode 2026-03-26 00:37:17 +01:00			`layout_by_entry = load_layout_by_entry(class_layout_index)`
Stuff 2026-03-25 23:32:36 +01:00			`index_rows: list[dict[str, str]] = []`
			`exported = 0`

Pseudocode decompialtion improvements and docs 2026-03-26 22:10:48 +01:00			`print(`
			`f"Exporting pseudocode from {extracted_root} to {output_root} using variant={args.variant} and shape_csv={shape_csv}",`
			`flush=True,`
			`)`
Stuff 2026-03-25 23:32:36 +01:00
Pseudocode decompialtion improvements and docs 2026-03-26 22:10:48 +01:00			`for position, row in enumerate(work_rows, start=1):`
Stuff 2026-03-25 23:32:36 +01:00			`entry_index = parse_int(row["entry_index"])`
			`layout_row = layout_by_entry.get(entry_index)`
			`if layout_row is None:`
Pseudocode decompialtion improvements and docs 2026-03-26 22:10:48 +01:00			`print(`
			`f"[{position}/{len(work_rows)}] Skipping {describe_row(row)} because no layout row was found",`
			`flush=True,`
			`)`
Stuff 2026-03-25 23:32:36 +01:00			`continue`
Pseudocode decompialtion improvements and docs 2026-03-26 22:10:48 +01:00
			`label = describe_row(row)`
			`print(f"[{position}/{len(work_rows)}] Decoding {label}", flush=True)`
Usecode pseudocode 2026-03-26 00:37:17 +01:00			`ir = parse_body_ir(row, layout_row, None if args.variant == "auto" else args.variant, extracted_root)`
Pseudocode decompialtion improvements and docs 2026-03-26 22:10:48 +01:00			`print(f"[{position}/{len(work_rows)}] Rendering {label}", flush=True)`
			`pseudocode = render_pseudocode(ir, shape_catalog=shape_catalog)`
Stuff 2026-03-25 23:32:36 +01:00
			`path = output_path_for_row(output_root, row)`
			`path.parent.mkdir(parents=True, exist_ok=True)`
			`path.write_text(pseudocode, encoding="utf-8")`
			`index_rows.append(build_index_row(output_root, row, path, ir))`
			`exported += 1`
Pseudocode decompialtion improvements and docs 2026-03-26 22:10:48 +01:00			`print(f"[{position}/{len(work_rows)}] Wrote {path.relative_to(output_root.parent).as_posix()}", flush=True)`
Stuff 2026-03-25 23:32:36 +01:00
			`write_index(output_root, index_rows)`
			`write_readme(output_root, exported)`
			`print(f"Exported {exported} pseudocode files to {output_root}")`


			`if __name__ == "__main__":`
			`main()`