from __future__ import annotations import argparse import csv import re import sys from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[1] if str(REPO_ROOT) not in sys.path: sys.path.insert(0, str(REPO_ROOT)) from tools.poc_crusader_usecode_parser import ( EXTRACTED_ROOT, default_shape_catalog_path, load_shape_catalog, parse_body_ir, render_pseudocode, ) def load_rows(class_event_index: Path) -> list[dict[str, str]]: with class_event_index.open("r", encoding="utf-8", newline="") as handle: return list(csv.DictReader(handle, delimiter="\t")) def load_layout_by_entry(class_layout_index: Path) -> dict[int, dict[str, str]]: with class_layout_index.open("r", encoding="utf-8", newline="") as handle: rows = list(csv.DictReader(handle, delimiter="\t")) layout_by_entry: dict[int, dict[str, str]] = {} for row in rows: try: entry_index = parse_int(row["entry_index"]) except (KeyError, TypeError, ValueError): continue layout_by_entry[entry_index] = row return layout_by_entry def parse_int(value: str) -> int: return int(value, 0) def safe_name(value: str) -> str: cleaned = re.sub(r"[^A-Za-z0-9_.-]+", "_", value.strip()) return cleaned.strip("._") or "unknown" def describe_row(row: dict[str, str]) -> str: class_name = row.get("class_name_hint") or "unknown" slot = parse_int(row.get("slot", "0")) event_name = row.get("event_name_hint") or f"slot_{slot:02X}" entry_index = row.get("entry_index", "?") return f"entry {entry_index} {class_name}::${event_name}".replace("::$", "::") + f" (slot 0x{slot:02X})" def output_path_for_row(output_root: Path, row: dict[str, str]) -> Path: class_name = row["class_name_hint"] slot = parse_int(row["slot"]) event_name = row.get("event_name_hint") or f"slot_{slot:02X}" class_dir = output_root / safe_name(class_name) filename = f"slot_{slot:02X}_{safe_name(event_name)}.txt" return class_dir / filename def build_index_row(output_root: Path, row: dict[str, str], path: Path, ir: dict[str, object]) -> dict[str, str]: return { "entry_index": row["entry_index"], "class_name": row["class_name_hint"], "slot": row["slot"], "event_name_hint": row.get("event_name_hint", ""), "derived_body_start": row["derived_body_start"], "derived_body_end": row["derived_body_end"], "derived_body_length": row["derived_body_length"], "decoded_op_count": str(ir["body"]["decoded_op_count"]), "end_reason": str(ir["body"]["end_reason"]), "debug_symbol_count": str(ir["body"]["debug_symbol_count"]), "pseudocode_path": path.relative_to(output_root.parent).as_posix(), } def write_index(output_root: Path, index_rows: list[dict[str, str]]) -> None: index_path = output_root / "index.tsv" fieldnames = [ "entry_index", "class_name", "slot", "event_name_hint", "derived_body_start", "derived_body_end", "derived_body_length", "decoded_op_count", "end_reason", "debug_symbol_count", "pseudocode_path", ] with index_path.open("w", encoding="utf-8", newline="") as handle: writer = csv.DictWriter(handle, fieldnames=fieldnames, delimiter="\t") writer.writeheader() writer.writerows(index_rows) def write_readme(output_root: Path, export_count: int) -> None: readme_path = output_root / "README.md" readme_path.write_text( "# USECODE pseudocode export\n\n" f"Generated pseudocode files: {export_count}\n\n" "This folder is produced by tools/export_usecode_pseudocode.py using the current\n" "pseudocode renderer in tools/poc_crusader_usecode_parser.py.\n\n" "- Each class gets its own subfolder.\n" "- Each non-empty decoded slot body is emitted as one text file.\n" "- index.tsv records the body range, op count, end reason, and file path.\n", encoding="utf-8", ) def main() -> None: parser = argparse.ArgumentParser(description="Export pseudocode for all decoded Crusader USECODE bodies") parser.add_argument( "--extracted-root", default=str(EXTRACTED_ROOT), help="Extracted USECODE root containing class_event_index.tsv and chunks/", ) parser.add_argument( "--output-dir", help="Output directory for pseudocode files (default: /pseudocode)", ) parser.add_argument( "--variant", choices=["auto", "regret", "remorse"], default="auto", help="Crusader intrinsic numbering to apply during export (default: auto, fallback regret)", ) parser.add_argument( "--shape-csv", help=( "Shape catalog CSV to apply to pseudocode output " "(default: Remorse uses /usecode_shape_catalog_remorse.csv; " "Regret uses /usecode_shape_catalog_regret.csv)" ), ) args = parser.parse_args() extracted_root = Path(args.extracted_root) class_event_index = extracted_root / "class_event_index.tsv" class_layout_index = extracted_root / "class_layout_index.tsv" output_root = Path(args.output_dir) if args.output_dir else extracted_root / "pseudocode" shape_csv = Path(args.shape_csv) if args.shape_csv else default_shape_catalog_path(extracted_root, args.variant) shape_catalog = load_shape_catalog(shape_csv) output_root.mkdir(parents=True, exist_ok=True) rows = load_rows(class_event_index) work_rows = [row for row in rows if row.get("derived_body_start") and row.get("derived_body_end")] layout_by_entry = load_layout_by_entry(class_layout_index) index_rows: list[dict[str, str]] = [] exported = 0 print( f"Exporting pseudocode from {extracted_root} to {output_root} using variant={args.variant} and shape_csv={shape_csv}", flush=True, ) for position, row in enumerate(work_rows, start=1): entry_index = parse_int(row["entry_index"]) layout_row = layout_by_entry.get(entry_index) if layout_row is None: print( f"[{position}/{len(work_rows)}] Skipping {describe_row(row)} because no layout row was found", flush=True, ) continue label = describe_row(row) print(f"[{position}/{len(work_rows)}] Decoding {label}", flush=True) ir = parse_body_ir(row, layout_row, None if args.variant == "auto" else args.variant, extracted_root) print(f"[{position}/{len(work_rows)}] Rendering {label}", flush=True) pseudocode = render_pseudocode(ir, shape_catalog=shape_catalog) path = output_path_for_row(output_root, row) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(pseudocode, encoding="utf-8") index_rows.append(build_index_row(output_root, row, path, ir)) exported += 1 print(f"[{position}/{len(work_rows)}] Wrote {path.relative_to(output_root.parent).as_posix()}", flush=True) write_index(output_root, index_rows) write_readme(output_root, exported) print(f"Exported {exported} pseudocode files to {output_root}") if __name__ == "__main__": main()