from __future__ import annotations import argparse import csv import re import sys from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[1] if str(REPO_ROOT) not in sys.path: sys.path.insert(0, str(REPO_ROOT)) from tools.poc_crusader_usecode_parser import EXTRACTED_ROOT, parse_body_ir, render_pseudocode def load_rows(class_event_index: Path) -> list[dict[str, str]]: with class_event_index.open("r", encoding="utf-8", newline="") as handle: return list(csv.DictReader(handle, delimiter="\t")) def load_layout_by_entry(class_layout_index: Path) -> dict[int, dict[str, str]]: with class_layout_index.open("r", encoding="utf-8", newline="") as handle: rows = list(csv.DictReader(handle, delimiter="\t")) layout_by_entry: dict[int, dict[str, str]] = {} for row in rows: try: entry_index = parse_int(row["entry_index"]) except (KeyError, TypeError, ValueError): continue layout_by_entry[entry_index] = row return layout_by_entry def parse_int(value: str) -> int: return int(value, 0) def safe_name(value: str) -> str: cleaned = re.sub(r"[^A-Za-z0-9_.-]+", "_", value.strip()) return cleaned.strip("._") or "unknown" def output_path_for_row(output_root: Path, row: dict[str, str]) -> Path: class_name = row["class_name_hint"] slot = parse_int(row["slot"]) event_name = row.get("event_name_hint") or f"slot_{slot:02X}" class_dir = output_root / safe_name(class_name) filename = f"slot_{slot:02X}_{safe_name(event_name)}.txt" return class_dir / filename def build_index_row(output_root: Path, row: dict[str, str], path: Path, ir: dict[str, object]) -> dict[str, str]: return { "entry_index": row["entry_index"], "class_name": row["class_name_hint"], "slot": row["slot"], "event_name_hint": row.get("event_name_hint", ""), "derived_body_start": row["derived_body_start"], "derived_body_end": row["derived_body_end"], "derived_body_length": row["derived_body_length"], "decoded_op_count": str(ir["body"]["decoded_op_count"]), "end_reason": str(ir["body"]["end_reason"]), "debug_symbol_count": str(ir["body"]["debug_symbol_count"]), "pseudocode_path": path.relative_to(output_root.parent).as_posix(), } def write_index(output_root: Path, index_rows: list[dict[str, str]]) -> None: index_path = output_root / "index.tsv" fieldnames = [ "entry_index", "class_name", "slot", "event_name_hint", "derived_body_start", "derived_body_end", "derived_body_length", "decoded_op_count", "end_reason", "debug_symbol_count", "pseudocode_path", ] with index_path.open("w", encoding="utf-8", newline="") as handle: writer = csv.DictWriter(handle, fieldnames=fieldnames, delimiter="\t") writer.writeheader() writer.writerows(index_rows) def write_readme(output_root: Path, export_count: int) -> None: readme_path = output_root / "README.md" readme_path.write_text( "# USECODE pseudocode export\n\n" f"Generated pseudocode files: {export_count}\n\n" "This folder is produced by tools/export_usecode_pseudocode.py using the current\n" "pseudocode renderer in tools/poc_crusader_usecode_parser.py.\n\n" "- Each class gets its own subfolder.\n" "- Each non-empty decoded slot body is emitted as one text file.\n" "- index.tsv records the body range, op count, end reason, and file path.\n", encoding="utf-8", ) def main() -> None: parser = argparse.ArgumentParser(description="Export pseudocode for all decoded Crusader USECODE bodies") parser.add_argument( "--extracted-root", default=str(EXTRACTED_ROOT), help="Extracted USECODE root containing class_event_index.tsv and chunks/", ) parser.add_argument( "--output-dir", help="Output directory for pseudocode files (default: /pseudocode)", ) parser.add_argument( "--variant", choices=["auto", "regret", "remorse"], default="auto", help="Crusader intrinsic numbering to apply during export (default: auto, fallback regret)", ) args = parser.parse_args() extracted_root = Path(args.extracted_root) class_event_index = extracted_root / "class_event_index.tsv" class_layout_index = extracted_root / "class_layout_index.tsv" output_root = Path(args.output_dir) if args.output_dir else extracted_root / "pseudocode" output_root.mkdir(parents=True, exist_ok=True) rows = load_rows(class_event_index) layout_by_entry = load_layout_by_entry(class_layout_index) index_rows: list[dict[str, str]] = [] exported = 0 for row in rows: if not row.get("derived_body_start") or not row.get("derived_body_end"): continue entry_index = parse_int(row["entry_index"]) layout_row = layout_by_entry.get(entry_index) if layout_row is None: continue ir = parse_body_ir(row, layout_row, None if args.variant == "auto" else args.variant, extracted_root) pseudocode = render_pseudocode(ir) path = output_path_for_row(output_root, row) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(pseudocode, encoding="utf-8") index_rows.append(build_index_row(output_root, row, path, ir)) exported += 1 write_index(output_root, index_rows) write_readme(output_root, exported) print(f"Exported {exported} pseudocode files to {output_root}") if __name__ == "__main__": main()