148 lines
5 KiB
Python
148 lines
5 KiB
Python
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import csv
|
||
|
|
import re
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
|
||
|
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||
|
|
if str(REPO_ROOT) not in sys.path:
|
||
|
|
sys.path.insert(0, str(REPO_ROOT))
|
||
|
|
|
||
|
|
|
||
|
|
from tools.poc_crusader_usecode_parser import CLASS_LAYOUT_INDEX, EXTRACTED_ROOT, parse_body_ir, render_pseudocode
|
||
|
|
|
||
|
|
|
||
|
|
CLASS_EVENT_INDEX = EXTRACTED_ROOT / "class_event_index.tsv"
|
||
|
|
|
||
|
|
|
||
|
|
def load_rows() -> list[dict[str, str]]:
|
||
|
|
with CLASS_EVENT_INDEX.open("r", encoding="utf-8", newline="") as handle:
|
||
|
|
return list(csv.DictReader(handle, delimiter="\t"))
|
||
|
|
|
||
|
|
|
||
|
|
def load_layout_by_entry() -> dict[int, dict[str, str]]:
|
||
|
|
with CLASS_LAYOUT_INDEX.open("r", encoding="utf-8", newline="") as handle:
|
||
|
|
rows = list(csv.DictReader(handle, delimiter="\t"))
|
||
|
|
layout_by_entry: dict[int, dict[str, str]] = {}
|
||
|
|
for row in rows:
|
||
|
|
try:
|
||
|
|
entry_index = parse_int(row["entry_index"])
|
||
|
|
except (KeyError, TypeError, ValueError):
|
||
|
|
continue
|
||
|
|
layout_by_entry[entry_index] = row
|
||
|
|
return layout_by_entry
|
||
|
|
|
||
|
|
|
||
|
|
def parse_int(value: str) -> int:
|
||
|
|
return int(value, 0)
|
||
|
|
|
||
|
|
|
||
|
|
def safe_name(value: str) -> str:
|
||
|
|
cleaned = re.sub(r"[^A-Za-z0-9_.-]+", "_", value.strip())
|
||
|
|
return cleaned.strip("._") or "unknown"
|
||
|
|
|
||
|
|
|
||
|
|
def output_path_for_row(output_root: Path, row: dict[str, str]) -> Path:
|
||
|
|
class_name = row["class_name_hint"]
|
||
|
|
slot = parse_int(row["slot"])
|
||
|
|
event_name = row.get("event_name_hint") or f"slot_{slot:02X}"
|
||
|
|
class_dir = output_root / safe_name(class_name)
|
||
|
|
filename = f"slot_{slot:02X}_{safe_name(event_name)}.txt"
|
||
|
|
return class_dir / filename
|
||
|
|
|
||
|
|
|
||
|
|
def build_index_row(output_root: Path, row: dict[str, str], path: Path, ir: dict[str, object]) -> dict[str, str]:
|
||
|
|
return {
|
||
|
|
"entry_index": row["entry_index"],
|
||
|
|
"class_name": row["class_name_hint"],
|
||
|
|
"slot": row["slot"],
|
||
|
|
"event_name_hint": row.get("event_name_hint", ""),
|
||
|
|
"derived_body_start": row["derived_body_start"],
|
||
|
|
"derived_body_end": row["derived_body_end"],
|
||
|
|
"derived_body_length": row["derived_body_length"],
|
||
|
|
"decoded_op_count": str(ir["body"]["decoded_op_count"]),
|
||
|
|
"end_reason": str(ir["body"]["end_reason"]),
|
||
|
|
"debug_symbol_count": str(ir["body"]["debug_symbol_count"]),
|
||
|
|
"pseudocode_path": path.relative_to(output_root.parent).as_posix(),
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def write_index(output_root: Path, index_rows: list[dict[str, str]]) -> None:
|
||
|
|
index_path = output_root / "index.tsv"
|
||
|
|
fieldnames = [
|
||
|
|
"entry_index",
|
||
|
|
"class_name",
|
||
|
|
"slot",
|
||
|
|
"event_name_hint",
|
||
|
|
"derived_body_start",
|
||
|
|
"derived_body_end",
|
||
|
|
"derived_body_length",
|
||
|
|
"decoded_op_count",
|
||
|
|
"end_reason",
|
||
|
|
"debug_symbol_count",
|
||
|
|
"pseudocode_path",
|
||
|
|
]
|
||
|
|
with index_path.open("w", encoding="utf-8", newline="") as handle:
|
||
|
|
writer = csv.DictWriter(handle, fieldnames=fieldnames, delimiter="\t")
|
||
|
|
writer.writeheader()
|
||
|
|
writer.writerows(index_rows)
|
||
|
|
|
||
|
|
|
||
|
|
def write_readme(output_root: Path, export_count: int) -> None:
|
||
|
|
readme_path = output_root / "README.md"
|
||
|
|
readme_path.write_text(
|
||
|
|
"# USECODE pseudocode export\n\n"
|
||
|
|
f"Generated pseudocode files: {export_count}\n\n"
|
||
|
|
"This folder is produced by tools/export_usecode_pseudocode.py using the current\n"
|
||
|
|
"pseudocode renderer in tools/poc_crusader_usecode_parser.py.\n\n"
|
||
|
|
"- Each class gets its own subfolder.\n"
|
||
|
|
"- Each non-empty decoded slot body is emitted as one text file.\n"
|
||
|
|
"- index.tsv records the body range, op count, end reason, and file path.\n",
|
||
|
|
encoding="utf-8",
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> None:
|
||
|
|
parser = argparse.ArgumentParser(description="Export pseudocode for all decoded Crusader USECODE bodies")
|
||
|
|
parser.add_argument(
|
||
|
|
"--output-dir",
|
||
|
|
default=str(EXTRACTED_ROOT / "pseudocode"),
|
||
|
|
help="Output directory for pseudocode files (default: USECODE/EUSECODE_extracted/pseudocode)",
|
||
|
|
)
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
output_root = Path(args.output_dir)
|
||
|
|
output_root.mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
rows = load_rows()
|
||
|
|
layout_by_entry = load_layout_by_entry()
|
||
|
|
index_rows: list[dict[str, str]] = []
|
||
|
|
exported = 0
|
||
|
|
|
||
|
|
for row in rows:
|
||
|
|
if not row.get("derived_body_start") or not row.get("derived_body_end"):
|
||
|
|
continue
|
||
|
|
|
||
|
|
entry_index = parse_int(row["entry_index"])
|
||
|
|
layout_row = layout_by_entry.get(entry_index)
|
||
|
|
if layout_row is None:
|
||
|
|
continue
|
||
|
|
ir = parse_body_ir(row, layout_row)
|
||
|
|
pseudocode = render_pseudocode(ir)
|
||
|
|
|
||
|
|
path = output_path_for_row(output_root, row)
|
||
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
path.write_text(pseudocode, encoding="utf-8")
|
||
|
|
index_rows.append(build_index_row(output_root, row, path, ir))
|
||
|
|
exported += 1
|
||
|
|
|
||
|
|
write_index(output_root, index_rows)
|
||
|
|
write_readme(output_root, exported)
|
||
|
|
print(f"Exported {exported} pseudocode files to {output_root}")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|