Pseudocode decompialtion improvements and docs
This commit is contained in:
parent
f869a181a3
commit
589bfc31ef
1898 changed files with 60634 additions and 6597 deletions
|
|
@ -12,7 +12,13 @@ if str(REPO_ROOT) not in sys.path:
|
|||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
|
||||
from tools.poc_crusader_usecode_parser import EXTRACTED_ROOT, parse_body_ir, render_pseudocode
|
||||
from tools.poc_crusader_usecode_parser import (
|
||||
EXTRACTED_ROOT,
|
||||
default_shape_catalog_path,
|
||||
load_shape_catalog,
|
||||
parse_body_ir,
|
||||
render_pseudocode,
|
||||
)
|
||||
|
||||
|
||||
def load_rows(class_event_index: Path) -> list[dict[str, str]]:
|
||||
|
|
@ -42,6 +48,14 @@ def safe_name(value: str) -> str:
|
|||
return cleaned.strip("._") or "unknown"
|
||||
|
||||
|
||||
def describe_row(row: dict[str, str]) -> str:
|
||||
class_name = row.get("class_name_hint") or "unknown"
|
||||
slot = parse_int(row.get("slot", "0"))
|
||||
event_name = row.get("event_name_hint") or f"slot_{slot:02X}"
|
||||
entry_index = row.get("entry_index", "?")
|
||||
return f"entry {entry_index} {class_name}::${event_name}".replace("::$", "::") + f" (slot 0x{slot:02X})"
|
||||
|
||||
|
||||
def output_path_for_row(output_root: Path, row: dict[str, str]) -> Path:
|
||||
class_name = row["class_name_hint"]
|
||||
slot = parse_int(row["slot"])
|
||||
|
|
@ -119,35 +133,57 @@ def main() -> None:
|
|||
default="auto",
|
||||
help="Crusader intrinsic numbering to apply during export (default: auto, fallback regret)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--shape-csv",
|
||||
help=(
|
||||
"Shape catalog CSV to apply to pseudocode output "
|
||||
"(default: Remorse uses <extracted-root>/usecode_shape_catalog_remorse.csv; "
|
||||
"Regret uses <extracted-root>/usecode_shape_catalog_regret.csv)"
|
||||
),
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
extracted_root = Path(args.extracted_root)
|
||||
class_event_index = extracted_root / "class_event_index.tsv"
|
||||
class_layout_index = extracted_root / "class_layout_index.tsv"
|
||||
output_root = Path(args.output_dir) if args.output_dir else extracted_root / "pseudocode"
|
||||
shape_csv = Path(args.shape_csv) if args.shape_csv else default_shape_catalog_path(extracted_root, args.variant)
|
||||
shape_catalog = load_shape_catalog(shape_csv)
|
||||
output_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
rows = load_rows(class_event_index)
|
||||
work_rows = [row for row in rows if row.get("derived_body_start") and row.get("derived_body_end")]
|
||||
layout_by_entry = load_layout_by_entry(class_layout_index)
|
||||
index_rows: list[dict[str, str]] = []
|
||||
exported = 0
|
||||
|
||||
for row in rows:
|
||||
if not row.get("derived_body_start") or not row.get("derived_body_end"):
|
||||
continue
|
||||
print(
|
||||
f"Exporting pseudocode from {extracted_root} to {output_root} using variant={args.variant} and shape_csv={shape_csv}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for position, row in enumerate(work_rows, start=1):
|
||||
entry_index = parse_int(row["entry_index"])
|
||||
layout_row = layout_by_entry.get(entry_index)
|
||||
if layout_row is None:
|
||||
print(
|
||||
f"[{position}/{len(work_rows)}] Skipping {describe_row(row)} because no layout row was found",
|
||||
flush=True,
|
||||
)
|
||||
continue
|
||||
|
||||
label = describe_row(row)
|
||||
print(f"[{position}/{len(work_rows)}] Decoding {label}", flush=True)
|
||||
ir = parse_body_ir(row, layout_row, None if args.variant == "auto" else args.variant, extracted_root)
|
||||
pseudocode = render_pseudocode(ir)
|
||||
print(f"[{position}/{len(work_rows)}] Rendering {label}", flush=True)
|
||||
pseudocode = render_pseudocode(ir, shape_catalog=shape_catalog)
|
||||
|
||||
path = output_path_for_row(output_root, row)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(pseudocode, encoding="utf-8")
|
||||
index_rows.append(build_index_row(output_root, row, path, ir))
|
||||
exported += 1
|
||||
print(f"[{position}/{len(work_rows)}] Wrote {path.relative_to(output_root.parent).as_posix()}", flush=True)
|
||||
|
||||
write_index(output_root, index_rows)
|
||||
write_readme(output_root, exported)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import csv
|
|||
import hashlib
|
||||
import json
|
||||
import re
|
||||
from functools import lru_cache
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
|
@ -457,6 +458,158 @@ VARIANT_INTRINSIC_CALLSITE_HINTS: dict[str, dict[tuple[int, int], str]] = {
|
|||
}
|
||||
|
||||
|
||||
CLASS_EVENT_NAME_HINTS: dict[tuple[int, int], str] = {
|
||||
(0x0A0C, 0x32): "waitNTimerTicks",
|
||||
}
|
||||
|
||||
|
||||
LOOP_SELECTOR_FIELD_HINTS = {
|
||||
0x3A: "family",
|
||||
0x40: "shape",
|
||||
}
|
||||
|
||||
SHAPE_CATALOG_FILENAME = "usecode_shape_catalog.csv"
|
||||
SHAPE_CATALOG_FILENAMES = {
|
||||
"remorse": "usecode_shape_catalog_remorse.csv",
|
||||
"regret": "usecode_shape_catalog_regret.csv",
|
||||
}
|
||||
SHAPE_CATALOG_FIELDNAMES = ["shape_code", "human_readable_id", "description"]
|
||||
NUMERIC_SHAPE_LITERAL_PATTERN = r"(?:0x[0-9A-Fa-f]+|\d+)"
|
||||
SHAPE_REFERENCE_PATTERNS = (
|
||||
re.compile(rf"(?P<prefix>\bshape=)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"),
|
||||
re.compile(
|
||||
rf"(?P<prefix>\bItem\.(?:getShape|getType)\([^\)\n]*\)\s*(?:==|!=|<=|>=|<|>)\s*)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"
|
||||
),
|
||||
re.compile(rf"(?P<prefix>\bItem\.create\(\s*[^,\n]+,\s*)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"),
|
||||
re.compile(rf"(?P<prefix>\bItem\.legal_create\(\s*)(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"),
|
||||
)
|
||||
|
||||
ShapeCatalog = dict[int, dict[str, str]]
|
||||
|
||||
|
||||
def infer_shape_catalog_variant(extracted_root: Path | str | None = None, game_variant: str | None = None) -> str | None:
|
||||
normalized = normalize_game_variant(game_variant)
|
||||
if normalized is not None:
|
||||
return normalized
|
||||
|
||||
root = resolve_extracted_root(extracted_root)
|
||||
inferred = infer_game_variant_from_path(root)
|
||||
if inferred is not None:
|
||||
return inferred
|
||||
|
||||
try:
|
||||
relative_root = root.resolve().relative_to(REPO_ROOT.resolve())
|
||||
except ValueError:
|
||||
relative_root = None
|
||||
|
||||
if relative_root is not None:
|
||||
relative_parts = tuple(part.lower() for part in relative_root.parts)
|
||||
if relative_parts[:2] == ("usecode", "eusecode_extracted"):
|
||||
return "remorse"
|
||||
if relative_parts[:3] == ("usecode", "regret", "regret_usecode_extracted"):
|
||||
return "regret"
|
||||
return None
|
||||
|
||||
|
||||
def default_shape_catalog_path(
|
||||
extracted_root: Path | str | None = None,
|
||||
game_variant: str | None = None,
|
||||
) -> Path:
|
||||
root = resolve_extracted_root(extracted_root)
|
||||
variant = infer_shape_catalog_variant(root, game_variant)
|
||||
filename = SHAPE_CATALOG_FILENAMES.get(variant, SHAPE_CATALOG_FILENAME)
|
||||
return root / filename
|
||||
|
||||
|
||||
def format_shape_code(shape_code: int) -> str:
|
||||
return f"0x{shape_code:04X}"
|
||||
|
||||
|
||||
def load_shape_catalog(path: Path | str | None) -> ShapeCatalog:
|
||||
if path is None:
|
||||
return {}
|
||||
|
||||
shape_path = Path(path)
|
||||
if not shape_path.exists():
|
||||
return {}
|
||||
|
||||
catalog: ShapeCatalog = {}
|
||||
with shape_path.open("r", encoding="utf-8", newline="") as handle:
|
||||
reader = csv.DictReader(handle)
|
||||
for row in reader:
|
||||
shape_code = try_parse_int((row.get("shape_code") or "").strip())
|
||||
if shape_code is None:
|
||||
continue
|
||||
catalog[shape_code] = {
|
||||
"shape_code": format_shape_code(shape_code),
|
||||
"human_readable_id": (row.get("human_readable_id") or "").strip(),
|
||||
"description": row.get("description") or "",
|
||||
}
|
||||
return catalog
|
||||
|
||||
|
||||
def shape_catalog_identifier(shape_code: int, shape_catalog: ShapeCatalog | None = None) -> str | None:
|
||||
if not shape_catalog:
|
||||
return None
|
||||
|
||||
row = shape_catalog.get(shape_code)
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
human_readable_id = (row.get("human_readable_id") or "").strip()
|
||||
if not human_readable_id:
|
||||
return None
|
||||
return sanitize_identifier(human_readable_id)
|
||||
|
||||
|
||||
def format_shape_reference(
|
||||
shape_code: int,
|
||||
shape_catalog: ShapeCatalog | None = None,
|
||||
frame_expr: str | None = None,
|
||||
) -> str:
|
||||
base = shape_catalog_identifier(shape_code, shape_catalog) or format_shape_code(shape_code)
|
||||
if frame_expr is None or not frame_expr.strip():
|
||||
return base
|
||||
return f"{base}[{frame_expr}]"
|
||||
|
||||
|
||||
def iter_shape_code_matches(text: str):
|
||||
for pattern in SHAPE_REFERENCE_PATTERNS:
|
||||
for match in pattern.finditer(text):
|
||||
shape_code = try_parse_int(match.group("value"))
|
||||
if shape_code is not None:
|
||||
yield shape_code
|
||||
|
||||
|
||||
def collect_shape_codes_from_pseudocode(text: str) -> set[int]:
|
||||
return set(iter_shape_code_matches(text))
|
||||
|
||||
|
||||
def apply_shape_catalog_to_pseudocode(text: str, shape_catalog: ShapeCatalog | None = None) -> str:
|
||||
if not shape_catalog:
|
||||
return text
|
||||
|
||||
def replace_match(match: re.Match[str]) -> str:
|
||||
shape_code = try_parse_int(match.group("value"))
|
||||
if shape_code is None:
|
||||
return match.group(0)
|
||||
|
||||
shape_id = shape_catalog_identifier(shape_code, shape_catalog)
|
||||
if shape_id is None:
|
||||
return match.group(0)
|
||||
return f"{match.group('prefix')}{shape_id}"
|
||||
|
||||
rendered = text
|
||||
for pattern in SHAPE_REFERENCE_PATTERNS:
|
||||
rendered = pattern.sub(replace_match, rendered)
|
||||
return rendered
|
||||
|
||||
|
||||
def generic_loop_selector_call(name: str, arguments: list[tuple[str, str]]) -> str:
|
||||
rendered_args = ", ".join(f"{label}={expr}" for label, expr in arguments)
|
||||
return f"{name}({rendered_args})"
|
||||
|
||||
|
||||
def normalize_game_variant(value: str | None) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
|
|
@ -998,6 +1151,24 @@ def load_tsv_rows(path: Path) -> list[dict[str, str]]:
|
|||
return list(csv.DictReader(handle, delimiter="\t"))
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def load_class_name_hints_by_id(extracted_root_key: str) -> dict[int, str]:
|
||||
_, class_layout_index, _, _ = extracted_root_paths(Path(extracted_root_key))
|
||||
hints: dict[int, str] = {}
|
||||
for row in load_tsv_rows(class_layout_index):
|
||||
class_id = try_parse_int(row.get("class_id", ""))
|
||||
class_name = (row.get("class_name_hint") or "").strip()
|
||||
if class_id is None or not class_name:
|
||||
continue
|
||||
hints[class_id] = class_name
|
||||
return hints
|
||||
|
||||
|
||||
def class_name_hints_by_id(extracted_root: Path | str | None = None) -> dict[int, str]:
|
||||
root = resolve_extracted_root(extracted_root)
|
||||
return load_class_name_hints_by_id(str(root))
|
||||
|
||||
|
||||
def find_chunk_file(entry_index: int, extracted_root: Path | str | None = None) -> Path:
|
||||
_, _, _, chunks_dir = extracted_root_paths(extracted_root)
|
||||
matches = sorted(chunks_dir.glob(f"chunk_{entry_index:03d}_*.bin"))
|
||||
|
|
@ -1174,6 +1345,7 @@ def parse_body_ir(
|
|||
resolved_game_variant = resolve_game_variant(game_variant, chunk_file)
|
||||
intrinsic_hints = get_intrinsic_hints(resolved_game_variant, chunk_file)
|
||||
intrinsic_callsite_hints = get_intrinsic_callsite_hints(resolved_game_variant, chunk_file)
|
||||
target_class_name_hints = class_name_hints_by_id(resolved_extracted_root)
|
||||
|
||||
body_start = parse_int(event_row["derived_body_start"])
|
||||
body_end = parse_int(event_row["derived_body_end"])
|
||||
|
|
@ -1190,6 +1362,10 @@ def parse_body_ir(
|
|||
while offset < len(body):
|
||||
result = parse_one_op(body, offset, intrinsic_hints, intrinsic_callsite_hints)
|
||||
if result.op is not None:
|
||||
operands = result.op["operands"]
|
||||
if "target_class_id" in operands:
|
||||
class_id = operands["target_class_id"]
|
||||
operands["target_class_name_hint"] = target_class_name_hints.get(class_id)
|
||||
result.op["absolute_body_offset"] = body_start + result.op["offset"]
|
||||
ops.append(result.op)
|
||||
if result.end_reason is not None:
|
||||
|
|
@ -1598,9 +1774,7 @@ def format_script_statement(op: dict[str, Any], label_map: dict[int, str], body_
|
|||
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
|
||||
return f"call {target}"
|
||||
if mnemonic == "call_class_event":
|
||||
event_hint = operands.get("target_event_name_hint")
|
||||
suffix = f" {event_hint}" if event_hint else ""
|
||||
return f"call class 0x{operands['target_class_id']:04X}.slot 0x{operands['target_event_slot']:02X}{suffix}"
|
||||
return f"call {format_target_event_reference(operands)}"
|
||||
if mnemonic in {"append_unique_inline", "append_unique_indirect", "remove_matching_indirect", "remove_matching_inline"}:
|
||||
return f"{mnemonic} size=0x{operands['element_size']:X}"
|
||||
if mnemonic == "create_list":
|
||||
|
|
@ -1617,17 +1791,13 @@ def format_script_statement(op: dict[str, Any], label_map: dict[int, str], body_
|
|||
target = label_map.get(body_start + operands["target_offset"], f"0x{body_start + operands['target_offset']:04X}")
|
||||
return f"{mnemonic} {operands['target_var']} elem_size=0x{operands['element_size']:X} -> {target}"
|
||||
if mnemonic == "spawn":
|
||||
event_hint = operands.get("target_event_name_hint")
|
||||
suffix = f" {event_hint}" if event_hint else ""
|
||||
return (
|
||||
f"spawn class 0x{operands['target_class_id']:04X}.slot 0x{operands['target_event_slot']:02X}{suffix} "
|
||||
f"spawn {format_target_event_reference(operands)} "
|
||||
f"args=0x{operands['arg_bytes']:02X} this_size=0x{operands['this_size']:02X}"
|
||||
)
|
||||
if mnemonic == "spawn_inline":
|
||||
event_hint = operands.get("target_event_name_hint")
|
||||
suffix = f" {event_hint}" if event_hint else ""
|
||||
return (
|
||||
f"spawn_inline class 0x{operands['target_class_id']:04X}.slot 0x{operands['target_event_slot']:02X}{suffix} "
|
||||
f"spawn_inline {format_target_event_reference(operands)} "
|
||||
f"inline=0x{operands['inline_offset']:04X} this_size=0x{operands['this_size']:02X} unk=0x{operands['unknown']:02X}"
|
||||
)
|
||||
if mnemonic == "line_number":
|
||||
|
|
@ -1701,6 +1871,20 @@ def sanitize_identifier(name: str) -> str:
|
|||
return identifier
|
||||
|
||||
|
||||
def target_event_display_name(operands: dict[str, Any]) -> str:
|
||||
class_id = operands["target_class_id"]
|
||||
slot = operands["target_event_slot"]
|
||||
return CLASS_EVENT_NAME_HINTS.get((class_id, slot)) or operands.get("target_event_name_hint") or f"slot_{slot:02X}"
|
||||
|
||||
|
||||
def format_target_event_reference(operands: dict[str, Any]) -> str:
|
||||
class_name_hint = operands.get("target_class_name_hint")
|
||||
event_name = sanitize_identifier(target_event_display_name(operands))
|
||||
if class_name_hint:
|
||||
return f"{sanitize_identifier(class_name_hint)}.{event_name}"
|
||||
return f"class_{operands['target_class_id']:04X}_{event_name}"
|
||||
|
||||
|
||||
def build_local_name_map(ir: dict[str, Any]) -> dict[int, str]:
|
||||
return {
|
||||
symbol["bp_offset"]: sanitize_identifier(symbol["name"])
|
||||
|
|
@ -1782,6 +1966,105 @@ def combine_binary(stack: list[tuple[str, int]], operator: str, result_width: in
|
|||
stack.append((f"({left_expr} {operator} {right_expr})", result_width))
|
||||
|
||||
|
||||
def evaluate_loop_setup_op(
|
||||
op: dict[str, Any],
|
||||
stack: list[tuple[str, int]],
|
||||
local_name_map: dict[int, str],
|
||||
) -> bool:
|
||||
pushed = push_expr_from_op(op, local_name_map)
|
||||
if pushed is not None:
|
||||
stack.append(pushed)
|
||||
return True
|
||||
|
||||
mnemonic = op["mnemonic"]
|
||||
operands = op["operands"]
|
||||
if mnemonic == "push_indirect":
|
||||
if stack:
|
||||
expr, _ = stack.pop()
|
||||
stack.append((f"*({expr})", max(1, operands["size"])))
|
||||
return True
|
||||
if mnemonic in {"add", "add_dword"}:
|
||||
combine_binary(stack, "+", 4 if mnemonic.endswith("dword") else 2)
|
||||
return True
|
||||
if mnemonic in {"sub", "sub_dword"}:
|
||||
combine_binary(stack, "-", 4 if mnemonic.endswith("dword") else 2)
|
||||
return True
|
||||
if mnemonic in {"mul", "mul_dword"}:
|
||||
combine_binary(stack, "*", 4 if mnemonic.endswith("dword") else 2)
|
||||
return True
|
||||
if mnemonic in {"div", "div_dword"}:
|
||||
combine_binary(stack, "/", 4 if mnemonic.endswith("dword") else 2)
|
||||
return True
|
||||
if mnemonic == "line_number":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def normalize_loop_origin(expr: str) -> str:
|
||||
normalized = expr.strip()
|
||||
if normalized.startswith("*(") and normalized.endswith(")"):
|
||||
return normalized[2:-1]
|
||||
return normalized
|
||||
|
||||
|
||||
def try_decode_loop_selector(
|
||||
ops: list[dict[str, Any]],
|
||||
start_index: int,
|
||||
local_name_map: dict[int, str],
|
||||
) -> tuple[str, int] | None:
|
||||
selector_tokens: list[int] = []
|
||||
selector_stack: list[tuple[str, int]] = []
|
||||
index = start_index
|
||||
|
||||
while index < len(ops):
|
||||
op = ops[index]
|
||||
mnemonic = op["mnemonic"]
|
||||
if mnemonic == "loopscr":
|
||||
selector_tokens.append(op["operands"]["value_u8"])
|
||||
index += 1
|
||||
continue
|
||||
if mnemonic == "loop":
|
||||
break
|
||||
if not evaluate_loop_setup_op(op, selector_stack, local_name_map):
|
||||
return None
|
||||
index += 1
|
||||
|
||||
if index >= len(ops) or ops[index]["mnemonic"] != "loop":
|
||||
return None
|
||||
|
||||
loop_operands = ops[index]["operands"]
|
||||
if loop_operands.get("string_bytes") != 0x6 or loop_operands.get("loop_type") != 0x2:
|
||||
return None
|
||||
if len(selector_tokens) != 4 or selector_tokens[0] != 0x24 or selector_tokens[1] != 0x3D or selector_tokens[3] != 0x25:
|
||||
if selector_tokens == [0x24, 0x42] and len(selector_stack) >= 4:
|
||||
current_var = format_bp_name(loop_operands["current_var"], local_name_map)
|
||||
return (
|
||||
f"{current_var} in {generic_loop_selector_call('selector_0x42', [
|
||||
('arg0', selector_stack[-4][0]),
|
||||
('arg1', selector_stack[-3][0]),
|
||||
('arg2', selector_stack[-2][0]),
|
||||
('origin', normalize_loop_origin(selector_stack[-1][0])),
|
||||
])}",
|
||||
index + 1,
|
||||
)
|
||||
return None
|
||||
selector_field = LOOP_SELECTOR_FIELD_HINTS.get(selector_tokens[2])
|
||||
if selector_field is None or len(selector_stack) < 3:
|
||||
return None
|
||||
|
||||
current_var = format_bp_name(loop_operands["current_var"], local_name_map)
|
||||
selector_value = selector_stack[-3][0]
|
||||
origin_expr = normalize_loop_origin(selector_stack[-1][0])
|
||||
return (
|
||||
f"{current_var} in nearby_items({selector_field}={selector_value}, origin={origin_expr})",
|
||||
index + 1,
|
||||
)
|
||||
|
||||
|
||||
def loop_selector_statement(selector_text: str) -> str:
|
||||
return f"/* loop_selector {selector_text} */"
|
||||
|
||||
|
||||
def decompile_pseudocode_blocks(ir: dict[str, Any]) -> list[tuple[str, list[str]]]:
|
||||
label_map, blocks = build_script_blocks(ir)
|
||||
local_name_map = build_local_name_map(ir)
|
||||
|
|
@ -1799,6 +2082,16 @@ def decompile_pseudocode_blocks(ir: dict[str, Any]) -> list[tuple[str, list[str]
|
|||
mnemonic = op["mnemonic"]
|
||||
operands = op["operands"]
|
||||
|
||||
if mnemonic == "loopscr":
|
||||
decoded_loop = try_decode_loop_selector(ops, index, local_name_map)
|
||||
if decoded_loop is not None:
|
||||
selector_text, next_index = decoded_loop
|
||||
block_lines.append(loop_selector_statement(selector_text))
|
||||
stack.clear()
|
||||
pending_result = None
|
||||
index = next_index
|
||||
continue
|
||||
|
||||
pushed = push_expr_from_op(op, local_name_map)
|
||||
if pushed is not None:
|
||||
stack.append(pushed)
|
||||
|
|
@ -1855,10 +2148,7 @@ def decompile_pseudocode_blocks(ir: dict[str, Any]) -> list[tuple[str, list[str]
|
|||
if mnemonic == "call_class_event":
|
||||
arg_text = ", ".join(expr for expr, _ in stack)
|
||||
stack.clear()
|
||||
event_name = operands.get("target_event_name_hint") or f"slot_{operands['target_event_slot']:02X}"
|
||||
block_lines.append(
|
||||
f"class_{operands['target_class_id']:04X}_{sanitize_identifier(event_name)}({arg_text});"
|
||||
)
|
||||
block_lines.append(f"{format_target_event_reference(operands)}({arg_text});")
|
||||
pending_result = None
|
||||
index += 1
|
||||
continue
|
||||
|
|
@ -1866,10 +2156,7 @@ def decompile_pseudocode_blocks(ir: dict[str, Any]) -> list[tuple[str, list[str]
|
|||
if mnemonic == "spawn":
|
||||
arg_text = ", ".join(expr for expr, _ in stack)
|
||||
stack.clear()
|
||||
event_name = operands.get("target_event_name_hint") or f"slot_{operands['target_event_slot']:02X}"
|
||||
block_lines.append(
|
||||
f"spawn class_{operands['target_class_id']:04X}_{sanitize_identifier(event_name)}({arg_text});"
|
||||
)
|
||||
block_lines.append(f"spawn {format_target_event_reference(operands)}({arg_text});")
|
||||
pending_result = None
|
||||
index += 1
|
||||
continue
|
||||
|
|
@ -1877,9 +2164,8 @@ def decompile_pseudocode_blocks(ir: dict[str, Any]) -> list[tuple[str, list[str]
|
|||
if mnemonic == "spawn_inline":
|
||||
arg_text = ", ".join(expr for expr, _ in stack)
|
||||
stack.clear()
|
||||
event_name = operands.get("target_event_name_hint") or f"slot_{operands['target_event_slot']:02X}"
|
||||
block_lines.append(
|
||||
f"spawn_inline class_{operands['target_class_id']:04X}_{sanitize_identifier(event_name)}({arg_text}) /* inline=0x{operands['inline_offset']:04X} */;"
|
||||
f"spawn_inline {format_target_event_reference(operands)}({arg_text}) /* inline=0x{operands['inline_offset']:04X} */;"
|
||||
)
|
||||
pending_result = None
|
||||
index += 1
|
||||
|
|
@ -2154,12 +2440,25 @@ def parse_selector_condition(condition: str) -> tuple[str, str] | None:
|
|||
return match.group(1).strip(), match.group(2).strip()
|
||||
|
||||
|
||||
def parse_loop_selector_statement(statement: str) -> str | None:
|
||||
match = re.fullmatch(r"/\* loop_selector (.+) \*/", statement)
|
||||
if match is None:
|
||||
return None
|
||||
return match.group(1)
|
||||
|
||||
|
||||
def is_loop_selector_only_block(statements: list[str]) -> bool:
|
||||
return len(statements) == 1 and parse_loop_selector_statement(statements[0]) is not None
|
||||
|
||||
|
||||
def render_selector_chain(
|
||||
blocks: list[tuple[str, list[str]]],
|
||||
label_to_index: dict[str, int],
|
||||
start_index: int,
|
||||
end_index: int,
|
||||
return_labels: set[str],
|
||||
active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None,
|
||||
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None,
|
||||
) -> tuple[list[str], int] | None:
|
||||
if not blocks[start_index][1]:
|
||||
return None
|
||||
|
|
@ -2220,6 +2519,8 @@ def render_selector_chain(
|
|||
target_index,
|
||||
return_labels,
|
||||
{join_label},
|
||||
active_regions,
|
||||
render_cache,
|
||||
)
|
||||
if body_result is None:
|
||||
return None
|
||||
|
|
@ -2250,7 +2551,19 @@ def render_structured_region(
|
|||
end_index: int,
|
||||
return_labels: set[str],
|
||||
exit_labels: set[str] | None = None,
|
||||
active_regions: set[tuple[int, int, tuple[str, ...]]] | None = None,
|
||||
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] | None = None,
|
||||
) -> tuple[list[str], bool] | None:
|
||||
region_key = (start_index, end_index, tuple(sorted(exit_labels or ())))
|
||||
if render_cache is not None and region_key in render_cache:
|
||||
return render_cache[region_key]
|
||||
if active_regions is None:
|
||||
active_regions = set()
|
||||
elif region_key in active_regions:
|
||||
return None
|
||||
|
||||
active_regions = set(active_regions)
|
||||
active_regions.add(region_key)
|
||||
allowed_exit_labels = set(exit_labels or ())
|
||||
lines: list[str] = []
|
||||
index = start_index
|
||||
|
|
@ -2265,6 +2578,9 @@ def render_structured_region(
|
|||
if not statements:
|
||||
index += 1
|
||||
continue
|
||||
if is_loop_selector_only_block(statements):
|
||||
index += 1
|
||||
continue
|
||||
|
||||
terminal = parse_terminal_statement(statements[-1])
|
||||
if terminal is None:
|
||||
|
|
@ -2304,7 +2620,15 @@ def render_structured_region(
|
|||
index += 1
|
||||
continue
|
||||
|
||||
selector_chain = render_selector_chain(blocks, label_to_index, index, end_index, return_labels)
|
||||
selector_chain = render_selector_chain(
|
||||
blocks,
|
||||
label_to_index,
|
||||
index,
|
||||
end_index,
|
||||
return_labels,
|
||||
active_regions,
|
||||
render_cache,
|
||||
)
|
||||
if selector_chain is not None:
|
||||
selector_lines, selector_join_index = selector_chain
|
||||
lines.extend(selector_lines)
|
||||
|
|
@ -2323,10 +2647,18 @@ def render_structured_region(
|
|||
target_index,
|
||||
return_labels,
|
||||
{blocks[index][0]},
|
||||
active_regions,
|
||||
render_cache,
|
||||
)
|
||||
if loop_body is not None:
|
||||
loop_lines, _ = loop_body
|
||||
lines.append(f"while ({invert_condition_text(terminal.condition or 'condition')}) {{")
|
||||
loop_selector = None
|
||||
if index > start_index:
|
||||
loop_selector = parse_loop_selector_statement(blocks[index - 1][1][0]) if is_loop_selector_only_block(blocks[index - 1][1]) else None
|
||||
if loop_selector is not None:
|
||||
lines.append(f"for {loop_selector} {{")
|
||||
else:
|
||||
lines.append(f"while ({invert_condition_text(terminal.condition or 'condition')}) {{")
|
||||
lines.extend(indent_lines(loop_lines))
|
||||
lines.append("}")
|
||||
index = target_index
|
||||
|
|
@ -2346,6 +2678,8 @@ def render_structured_region(
|
|||
target_index,
|
||||
return_labels,
|
||||
{join_label},
|
||||
active_regions,
|
||||
render_cache,
|
||||
)
|
||||
false_result = render_structured_region(
|
||||
blocks,
|
||||
|
|
@ -2354,6 +2688,8 @@ def render_structured_region(
|
|||
join_index,
|
||||
return_labels,
|
||||
{join_label},
|
||||
active_regions,
|
||||
render_cache,
|
||||
)
|
||||
if true_result is not None and false_result is not None:
|
||||
true_lines, _ = true_result
|
||||
|
|
@ -2372,8 +2708,19 @@ def render_structured_region(
|
|||
index = join_index
|
||||
continue
|
||||
|
||||
inner_result = render_structured_region(blocks, label_to_index, index + 1, target_index, return_labels)
|
||||
inner_result = render_structured_region(
|
||||
blocks,
|
||||
label_to_index,
|
||||
index + 1,
|
||||
target_index,
|
||||
return_labels,
|
||||
None,
|
||||
active_regions,
|
||||
render_cache,
|
||||
)
|
||||
if inner_result is None:
|
||||
if render_cache is not None:
|
||||
render_cache[region_key] = None
|
||||
return None
|
||||
|
||||
inner_lines, inner_falls_through = inner_result
|
||||
|
|
@ -2387,7 +2734,10 @@ def render_structured_region(
|
|||
|
||||
index = target_index
|
||||
|
||||
return lines, True
|
||||
result = (lines, True)
|
||||
if render_cache is not None:
|
||||
render_cache[region_key] = result
|
||||
return result
|
||||
|
||||
|
||||
def render_structured_pseudocode(blocks: list[tuple[str, list[str]]]) -> list[str] | None:
|
||||
|
|
@ -2400,7 +2750,8 @@ def render_structured_pseudocode(blocks: list[tuple[str, list[str]]]) -> list[st
|
|||
for label, statements in blocks
|
||||
if len(statements) == 1 and statements[0] == "return;"
|
||||
}
|
||||
structured = render_structured_region(blocks, label_to_index, 0, len(blocks), return_labels)
|
||||
render_cache: dict[tuple[int, int, tuple[str, ...]], tuple[list[str], bool] | None] = {}
|
||||
structured = render_structured_region(blocks, label_to_index, 0, len(blocks), return_labels, None, None, render_cache)
|
||||
if structured is None:
|
||||
return None
|
||||
return structured[0]
|
||||
|
|
@ -2421,6 +2772,41 @@ def render_partially_structured_blocks(blocks: list[tuple[str, list[str]]]) -> l
|
|||
index = 0
|
||||
while index < len(blocks):
|
||||
label, statements = blocks[index]
|
||||
if is_loop_selector_only_block(statements):
|
||||
loop_selector = parse_loop_selector_statement(statements[0])
|
||||
if loop_selector is not None and index + 1 < len(blocks):
|
||||
next_label, next_statements = blocks[index + 1]
|
||||
next_terminal = parse_terminal_statement(next_statements[-1]) if next_statements else None
|
||||
if next_terminal is not None and next_terminal.kind == "if":
|
||||
target_index = label_to_index.get(next_terminal.target or "")
|
||||
if target_index is not None and target_index > index + 1:
|
||||
loop_tail_index = last_nonempty_block_index(blocks, index + 2, target_index)
|
||||
if loop_tail_index is not None:
|
||||
loop_tail_terminal = parse_terminal_statement(blocks[loop_tail_index][1][-1])
|
||||
if loop_tail_terminal is not None and loop_tail_terminal.kind == "goto" and loop_tail_terminal.target == next_label:
|
||||
loop_body = render_structured_region(
|
||||
blocks,
|
||||
label_to_index,
|
||||
index + 2,
|
||||
target_index,
|
||||
return_labels,
|
||||
{next_label},
|
||||
)
|
||||
if loop_body is not None:
|
||||
loop_lines, _ = loop_body
|
||||
lines.append(f" {label}:")
|
||||
lines.append(f" for {loop_selector} {{")
|
||||
lines.extend(f" {line}" for line in indent_lines(loop_lines))
|
||||
lines.append(" }")
|
||||
lines.append("")
|
||||
index = target_index
|
||||
continue
|
||||
|
||||
lines.append(f" {label}:")
|
||||
lines.append(f" {statements[0]}")
|
||||
lines.append("")
|
||||
index += 1
|
||||
continue
|
||||
selector_chain = render_selector_chain(blocks, label_to_index, index, len(blocks), return_labels)
|
||||
if selector_chain is not None:
|
||||
selector_lines, selector_join_index = selector_chain
|
||||
|
|
@ -2440,7 +2826,7 @@ def render_partially_structured_blocks(blocks: list[tuple[str, list[str]]]) -> l
|
|||
return lines
|
||||
|
||||
|
||||
def render_pseudocode(ir: dict[str, Any]) -> str:
|
||||
def render_pseudocode(ir: dict[str, Any], shape_catalog: ShapeCatalog | None = None) -> str:
|
||||
slot_name = sanitize_identifier(ir["event"]["event_name_hint"] or f"slot_{ir['event']['slot']:02X}")
|
||||
lines = [
|
||||
(
|
||||
|
|
@ -2466,7 +2852,7 @@ def render_pseudocode(ir: dict[str, Any]) -> str:
|
|||
lines.extend(render_partially_structured_blocks(rendered_blocks))
|
||||
|
||||
lines.append("}")
|
||||
return "\n".join(lines) + "\n"
|
||||
return apply_shape_catalog_to_pseudocode("\n".join(lines) + "\n", shape_catalog)
|
||||
|
||||
|
||||
def render_text(ir: dict[str, Any]) -> str:
|
||||
|
|
@ -2525,6 +2911,14 @@ def main() -> None:
|
|||
parser.add_argument("--slot", required=True, help="Event slot, for example 0x0A")
|
||||
parser.add_argument("--extracted-root", default=str(EXTRACTED_ROOT), help="Extracted USECODE root containing class_event_index.tsv and chunks/")
|
||||
parser.add_argument("--variant", choices=["auto", "regret", "remorse"], default="auto", help="Crusader intrinsic numbering to apply (default: auto, fallback regret)")
|
||||
parser.add_argument(
|
||||
"--shape-csv",
|
||||
help=(
|
||||
"Shape catalog CSV to apply to pseudocode output "
|
||||
"(default: Remorse uses <extracted-root>/usecode_shape_catalog_remorse.csv; "
|
||||
"Regret uses <extracted-root>/usecode_shape_catalog_regret.csv)"
|
||||
),
|
||||
)
|
||||
parser.add_argument("--output", help="Write IR JSON to this file instead of stdout")
|
||||
parser.add_argument("--emit-text", action="store_true", help="Emit a readable text listing beside the JSON")
|
||||
parser.add_argument("--text-output", help="Write the text listing to this file")
|
||||
|
|
@ -2539,6 +2933,8 @@ def main() -> None:
|
|||
|
||||
slot = parse_int(args.slot)
|
||||
extracted_root = Path(args.extracted_root)
|
||||
shape_csv = Path(args.shape_csv) if args.shape_csv else default_shape_catalog_path(extracted_root, args.variant)
|
||||
shape_catalog = load_shape_catalog(shape_csv)
|
||||
event_row, layout_row = select_rows(args.class_name, slot, extracted_root)
|
||||
ir = parse_body_ir(event_row, layout_row, None if args.variant == "auto" else args.variant, extracted_root)
|
||||
|
||||
|
|
@ -2563,7 +2959,7 @@ def main() -> None:
|
|||
print(rendered_script)
|
||||
|
||||
if args.emit_pseudocode:
|
||||
rendered_pseudocode = render_pseudocode(ir)
|
||||
rendered_pseudocode = render_pseudocode(ir, shape_catalog=shape_catalog)
|
||||
if args.pseudocode_output:
|
||||
Path(args.pseudocode_output).write_text(rendered_pseudocode, encoding="utf-8")
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -3,10 +3,12 @@ from __future__ import annotations
|
|||
import unittest
|
||||
|
||||
from tools.poc_crusader_usecode_parser import (
|
||||
format_target_event_reference,
|
||||
get_intrinsic_hints,
|
||||
intrinsic_display_name,
|
||||
render_partially_structured_blocks,
|
||||
render_structured_pseudocode,
|
||||
try_decode_loop_selector,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -91,6 +93,78 @@ class UsecodeStructuringTests(unittest.TestCase):
|
|||
self.assertIn("while (!condition) {", text)
|
||||
self.assertNotIn("goto block_0118;", text)
|
||||
|
||||
def test_loop_selector_block_renders_as_for_loop(self) -> None:
|
||||
blocks = [
|
||||
("entry", ["/* loop_selector item in nearby_items(shape=0x04D0, origin=arg_06) */"]),
|
||||
("block_0118", ["if condition goto block_0151;"]),
|
||||
("block_011B", ["if (Item.getFrame(item) != 0) goto block_014D;", "suspend;"]),
|
||||
("block_014D", ["goto block_0118;"]),
|
||||
("block_0151", ["return;"]),
|
||||
]
|
||||
|
||||
rendered = render_structured_pseudocode(blocks)
|
||||
|
||||
self.assertIsNotNone(rendered)
|
||||
text = "\n".join(rendered or [])
|
||||
self.assertIn("for item in nearby_items(shape=0x04D0, origin=arg_06) {", text)
|
||||
self.assertNotIn("while (!condition) {", text)
|
||||
|
||||
def test_loop_selector_renders_in_partial_fallback(self) -> None:
|
||||
blocks = [
|
||||
("entry", ["/* loop_selector item in nearby_items(shape=0x04D0, origin=arg_06) */"]),
|
||||
("block_0118", ["if condition goto block_0151;"]),
|
||||
("block_011B", ["if other goto block_014D;", "suspend;"]),
|
||||
("block_014D", ["goto block_0118;"]),
|
||||
("block_0151", ["goto block_0200;"]),
|
||||
("block_0200", ["return;"]),
|
||||
]
|
||||
|
||||
rendered = render_partially_structured_blocks(blocks)
|
||||
|
||||
text = "\n".join(rendered)
|
||||
self.assertIn("entry:", text)
|
||||
self.assertIn("for item in nearby_items(shape=0x04D0, origin=arg_06) {", text)
|
||||
|
||||
def test_target_event_reference_prefers_alias_and_class_name(self) -> None:
|
||||
target = format_target_event_reference(
|
||||
{
|
||||
"target_class_id": 0x0A0C,
|
||||
"target_class_name_hint": "FREE",
|
||||
"target_event_slot": 0x32,
|
||||
"target_event_name_hint": None,
|
||||
}
|
||||
)
|
||||
|
||||
self.assertEqual(target, "FREE.waitNTimerTicks")
|
||||
|
||||
def test_selector_0x42_decodes_to_readable_fallback(self) -> None:
|
||||
decoded = try_decode_loop_selector(
|
||||
[
|
||||
{"mnemonic": "loopscr", "operands": {"value_u8": 0x24}},
|
||||
{"mnemonic": "push_word_immediate", "operands": {"value_u16": 0x04C8}},
|
||||
{"mnemonic": "push_word_immediate", "operands": {"value_u16": 0x01CD}},
|
||||
{"mnemonic": "loopscr", "operands": {"value_u8": 0x42}},
|
||||
{"mnemonic": "push_byte_immediate", "operands": {"value_u8": 0x32, "value_signed": 50}},
|
||||
{"mnemonic": "push_byte_immediate", "operands": {"value_u8": 0x20, "value_signed": 32}},
|
||||
{"mnemonic": "mul", "operands": {}},
|
||||
{"mnemonic": "push_local_word", "operands": {"bp_offset": 0x0A}},
|
||||
{
|
||||
"mnemonic": "loop",
|
||||
"operands": {"current_var": 0xFE, "string_bytes": 0x6, "loop_type": 0x2},
|
||||
},
|
||||
],
|
||||
0,
|
||||
{0xFE: "n", 0x0A: "eventTrigger"},
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
decoded,
|
||||
(
|
||||
"n in selector_0x42(arg0=0x04C8, arg1=0x01CD, arg2=(50 * 32), origin=eventTrigger)",
|
||||
9,
|
||||
),
|
||||
)
|
||||
|
||||
def test_selector_ladder_renders_as_else_if_chain(self) -> None:
|
||||
blocks = [
|
||||
("entry", ["if (dir != 0) goto block_0358;"]),
|
||||
|
|
|
|||
228
tools/update_usecode_shape_catalog.py
Normal file
228
tools/update_usecode_shape_catalog.py
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(REPO_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
|
||||
from tools.poc_crusader_usecode_parser import (
|
||||
EXTRACTED_ROOT,
|
||||
NUMERIC_SHAPE_LITERAL_PATTERN,
|
||||
SHAPE_CATALOG_FIELDNAMES,
|
||||
collect_shape_codes_from_pseudocode,
|
||||
default_shape_catalog_path,
|
||||
format_shape_code,
|
||||
load_tsv_rows,
|
||||
parse_body_ir,
|
||||
parse_int,
|
||||
render_pseudocode,
|
||||
sanitize_identifier,
|
||||
try_parse_int,
|
||||
)
|
||||
|
||||
|
||||
SELF_SHAPE_COMPARISON_PATTERN = re.compile(
|
||||
rf"\bItem\.(?:getShape|getType)\(\s*arg_06\s*\)\s*(?:==|!=|<=|>=|<|>)\s*(?P<value>{NUMERIC_SHAPE_LITERAL_PATTERN})\b"
|
||||
)
|
||||
|
||||
|
||||
def describe_row(row: dict[str, str]) -> str:
|
||||
class_name = row.get("class_name_hint") or "unknown"
|
||||
slot = parse_int(row.get("slot", "0"))
|
||||
event_name = row.get("event_name_hint") or f"slot_{slot:02X}"
|
||||
entry_index = row.get("entry_index", "?")
|
||||
return f"entry {entry_index} {class_name}::${event_name}".replace("::$", "::") + f" (slot 0x{slot:02X})"
|
||||
|
||||
|
||||
def load_layout_by_entry(class_layout_index: Path) -> dict[int, dict[str, str]]:
|
||||
rows = load_tsv_rows(class_layout_index)
|
||||
layout_by_entry: dict[int, dict[str, str]] = {}
|
||||
for row in rows:
|
||||
entry_index = try_parse_int(row.get("entry_index", ""))
|
||||
if entry_index is None:
|
||||
continue
|
||||
layout_by_entry[entry_index] = row
|
||||
return layout_by_entry
|
||||
|
||||
|
||||
def load_existing_catalog(csv_path: Path) -> tuple[list[str], list[dict[str, str]], set[int]]:
|
||||
if not csv_path.exists():
|
||||
return SHAPE_CATALOG_FIELDNAMES[:], [], set()
|
||||
|
||||
with csv_path.open("r", encoding="utf-8", newline="") as handle:
|
||||
reader = csv.DictReader(handle)
|
||||
fieldnames = [name for name in (reader.fieldnames or []) if name] or SHAPE_CATALOG_FIELDNAMES[:]
|
||||
for required_name in SHAPE_CATALOG_FIELDNAMES:
|
||||
if required_name not in fieldnames:
|
||||
fieldnames.append(required_name)
|
||||
|
||||
rows: list[dict[str, str]] = []
|
||||
existing_codes: set[int] = set()
|
||||
for raw_row in reader:
|
||||
row = {fieldname: raw_row.get(fieldname, "") for fieldname in fieldnames}
|
||||
shape_code = try_parse_int((row.get("shape_code") or "").strip())
|
||||
if shape_code is not None:
|
||||
if shape_code in existing_codes:
|
||||
continue
|
||||
row["shape_code"] = format_shape_code(shape_code)
|
||||
existing_codes.add(shape_code)
|
||||
rows.append(row)
|
||||
return fieldnames, rows, existing_codes
|
||||
|
||||
|
||||
def write_catalog(csv_path: Path, fieldnames: list[str], rows: list[dict[str, str]]) -> None:
|
||||
csv_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with csv_path.open("w", encoding="utf-8", newline="") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def scan_exported_pseudocode_shape_codes(extracted_root: Path) -> set[int]:
|
||||
pseudocode_root = extracted_root / "pseudocode"
|
||||
if not pseudocode_root.exists():
|
||||
print(f"No exported pseudocode directory at {pseudocode_root}; skipping pre-scan", flush=True)
|
||||
return set()
|
||||
|
||||
shape_codes: set[int] = set()
|
||||
pseudocode_paths = sorted(pseudocode_root.rglob("*.txt"))
|
||||
print(f"Scanning {len(pseudocode_paths)} exported pseudocode files under {pseudocode_root}", flush=True)
|
||||
for index, pseudocode_path in enumerate(pseudocode_paths, start=1):
|
||||
print(f"[pseudocode {index}/{len(pseudocode_paths)}] {pseudocode_path.relative_to(extracted_root).as_posix()}", flush=True)
|
||||
shape_codes.update(collect_shape_codes_from_pseudocode(pseudocode_path.read_text(encoding="utf-8")))
|
||||
return shape_codes
|
||||
|
||||
|
||||
def auto_shape_identifier(class_name: str, shape_code: int) -> str:
|
||||
class_id = sanitize_identifier((class_name or "shape").lower())
|
||||
return sanitize_identifier(f"{class_id}_shape_{shape_code:04x}")
|
||||
|
||||
|
||||
def collect_shape_suggestions_from_pseudocode(text: str, row: dict[str, str]) -> dict[int, dict[str, str]]:
|
||||
class_name = (row.get("class_name_hint") or "").strip()
|
||||
if not class_name:
|
||||
return {}
|
||||
|
||||
suggestions: dict[int, dict[str, str]] = {}
|
||||
for match in SELF_SHAPE_COMPARISON_PATTERN.finditer(text):
|
||||
shape_code = try_parse_int(match.group("value"))
|
||||
if shape_code is None:
|
||||
continue
|
||||
suggestions[shape_code] = {
|
||||
"human_readable_id": auto_shape_identifier(class_name, shape_code),
|
||||
"description": f"Auto-derived from {class_name} self-shape comparison in USECODE",
|
||||
}
|
||||
return suggestions
|
||||
|
||||
|
||||
def scan_shape_codes(extracted_root: Path, variant: str) -> tuple[set[int], dict[int, dict[str, str]]]:
|
||||
class_event_index = extracted_root / "class_event_index.tsv"
|
||||
class_layout_index = extracted_root / "class_layout_index.tsv"
|
||||
rows = load_tsv_rows(class_event_index)
|
||||
work_rows = [row for row in rows if row.get("derived_body_start") and row.get("derived_body_end")]
|
||||
layout_by_entry = load_layout_by_entry(class_layout_index)
|
||||
shape_codes = scan_exported_pseudocode_shape_codes(extracted_root)
|
||||
suggestion_candidates: dict[int, dict[str, dict[str, str]]] = {}
|
||||
print(f"Scanning {len(work_rows)} decoded USECODE bodies from {extracted_root}", flush=True)
|
||||
|
||||
for position, row in enumerate(work_rows, start=1):
|
||||
entry_index = parse_int(row["entry_index"])
|
||||
layout_row = layout_by_entry.get(entry_index)
|
||||
if layout_row is None:
|
||||
print(
|
||||
f"[{position}/{len(work_rows)}] Skipping {describe_row(row)} because no layout row was found",
|
||||
flush=True,
|
||||
)
|
||||
continue
|
||||
|
||||
label = describe_row(row)
|
||||
print(f"[{position}/{len(work_rows)}] Decoding {label}", flush=True)
|
||||
ir = parse_body_ir(row, layout_row, None if variant == "auto" else variant, extracted_root)
|
||||
print(f"[{position}/{len(work_rows)}] Rendering {label}", flush=True)
|
||||
pseudocode = render_pseudocode(ir)
|
||||
shape_codes.update(collect_shape_codes_from_pseudocode(pseudocode))
|
||||
for shape_code, suggestion in collect_shape_suggestions_from_pseudocode(pseudocode, row).items():
|
||||
suggestion_candidates.setdefault(shape_code, {})[suggestion["human_readable_id"]] = suggestion
|
||||
|
||||
resolved_suggestions: dict[int, dict[str, str]] = {}
|
||||
for shape_code, candidates in suggestion_candidates.items():
|
||||
if len(candidates) == 1:
|
||||
resolved_suggestions[shape_code] = next(iter(candidates.values()))
|
||||
|
||||
return shape_codes, resolved_suggestions
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Generate or update the append-only USECODE shape catalog CSV")
|
||||
parser.add_argument(
|
||||
"--extracted-root",
|
||||
default=str(EXTRACTED_ROOT),
|
||||
help="Extracted USECODE root containing class_event_index.tsv and chunks/",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-csv",
|
||||
help=(
|
||||
"Catalog CSV path "
|
||||
"(default: Remorse uses <extracted-root>/usecode_shape_catalog_remorse.csv; "
|
||||
"Regret uses <extracted-root>/usecode_shape_catalog_regret.csv)"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--variant",
|
||||
choices=["auto", "regret", "remorse"],
|
||||
default="auto",
|
||||
help="Crusader intrinsic numbering to apply during scanning (default: auto, fallback regret)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
extracted_root = Path(args.extracted_root)
|
||||
output_csv = Path(args.output_csv) if args.output_csv else default_shape_catalog_path(extracted_root, args.variant)
|
||||
|
||||
print(
|
||||
f"Updating shape catalog {output_csv} from extracted_root={extracted_root} using variant={args.variant}",
|
||||
flush=True,
|
||||
)
|
||||
shape_codes, suggested_rows = scan_shape_codes(extracted_root, args.variant)
|
||||
fieldnames, rows, existing_codes = load_existing_catalog(output_csv)
|
||||
|
||||
backfilled = 0
|
||||
for row in rows:
|
||||
shape_code = try_parse_int((row.get("shape_code") or "").strip())
|
||||
if shape_code is None:
|
||||
continue
|
||||
suggestion = suggested_rows.get(shape_code)
|
||||
if suggestion is None:
|
||||
continue
|
||||
if not (row.get("human_readable_id") or "").strip():
|
||||
row["human_readable_id"] = suggestion["human_readable_id"]
|
||||
backfilled += 1
|
||||
if not (row.get("description") or "").strip():
|
||||
row["description"] = suggestion["description"]
|
||||
|
||||
missing_codes = sorted(shape_codes - existing_codes)
|
||||
for shape_code in missing_codes:
|
||||
row = {fieldname: "" for fieldname in fieldnames}
|
||||
row["shape_code"] = format_shape_code(shape_code)
|
||||
suggestion = suggested_rows.get(shape_code)
|
||||
if suggestion is not None:
|
||||
row["human_readable_id"] = suggestion["human_readable_id"]
|
||||
row["description"] = suggestion["description"]
|
||||
rows.append(row)
|
||||
|
||||
write_catalog(output_csv, fieldnames, rows)
|
||||
print(
|
||||
f"Catalog {output_csv} now contains {len(rows)} rows; "
|
||||
f"scanned {len(shape_codes)} distinct shape codes, added {len(missing_codes)} new rows, "
|
||||
f"and backfilled {backfilled} existing rows."
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue