diff --git a/.github/skills/pyghidra-ghidra-ops/SKILL.md b/.github/skills/pyghidra-ghidra-ops/SKILL.md index f4732da..6514bde 100644 --- a/.github/skills/pyghidra-ghidra-ops/SKILL.md +++ b/.github/skills/pyghidra-ghidra-ops/SKILL.md @@ -62,11 +62,14 @@ MCP-style read/query commands are also available from the same CLI: ```powershell .\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader get-function-by-address --address 000a:48ff +.\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader get_function_by_address --address 000a:48ff .\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader get-function-containing --address 000a:4901 .\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader decompile-function-by-address --address 000a:48ff .\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader disassemble-function --address 000a:48ff .\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader read-region --start 000a:48ff --end 000a:4912 .\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader search-functions-by-name --query rng_ +.\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader list-methods --limit 20 +.\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader list_methods --limit 20 .\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader list-strings --limit 20 .\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader list-imports --limit 20 .\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader list-exports --limit 20 @@ -84,6 +87,26 @@ All commands also support structured output for scripting: .\.venv-pyghidra311\Scripts\python.exe -m tools.pyghidra_crusader --format json get-function-by-address --address 000a:48ff ``` +JSON output now uses a stable envelope: + +```json +{ + "schema_version": "1.0", + "command": "get-function-by-address", + "ok": true, + "schema": { "type": "object", "properties": { "name": { "type": "string" } } }, + "data": { + "name": "rng_next_modulo", + "signature": "undefined rng_next_modulo()", + "entry": "000a:48ff", + "body_start": "000a:48ff", + "body_end": "000a:4912" + } +} +``` + +The CLI also accepts exact MCP-style underscore command aliases, so local automation can often swap MCP names directly with little or no translation. + For ad hoc investigation, prefer `run-script` over multiline `python -c` or pasted PowerShell here-strings. It avoids leaving the shared shell stuck in an unfinished string/block state: ```powershell diff --git a/tools/pyghidra_crusader/__pycache__/cli.cpython-311.pyc b/tools/pyghidra_crusader/__pycache__/cli.cpython-311.pyc index a363335..1bc6291 100644 Binary files a/tools/pyghidra_crusader/__pycache__/cli.cpython-311.pyc and b/tools/pyghidra_crusader/__pycache__/cli.cpython-311.pyc differ diff --git a/tools/pyghidra_crusader/__pycache__/common.cpython-311.pyc b/tools/pyghidra_crusader/__pycache__/common.cpython-311.pyc index 610f196..a92f121 100644 Binary files a/tools/pyghidra_crusader/__pycache__/common.cpython-311.pyc and b/tools/pyghidra_crusader/__pycache__/common.cpython-311.pyc differ diff --git a/tools/pyghidra_crusader/cli.py b/tools/pyghidra_crusader/cli.py index ed938fa..dbbeeae 100644 --- a/tools/pyghidra_crusader/cli.py +++ b/tools/pyghidra_crusader/cli.py @@ -15,6 +15,7 @@ from .common import ( decompile_function, disassemble_function, format_function_summary, + function_signature, get_function, get_function_containing, get_functions_by_exact_name, @@ -41,6 +42,181 @@ from .common import ( ) +OUTPUT_SCHEMA_VERSION = "1.0" + +FUNCTION_SCHEMA = { + "type": "object", + "required": ["name", "signature", "entry", "body_start", "body_end"], + "properties": { + "name": {"type": "string"}, + "signature": {"type": "string"}, + "entry": {"type": "string"}, + "body_start": {"type": "string"}, + "body_end": {"type": "string"}, + }, +} + +REFERENCE_SCHEMA = { + "type": "object", + "required": ["from", "to", "type", "operand_index"], + "properties": { + "from": {"type": "string"}, + "to": {"type": "string"}, + "type": {"type": "string"}, + "operand_index": {"type": "integer"}, + }, +} + +STATUS_SCHEMA = { + "type": "object", + "required": ["status", "action"], + "properties": { + "status": {"type": "string"}, + "action": {"type": "string"}, + "entry": {"type": "string"}, + "name": {"type": "string"}, + "address": {"type": "string"}, + "type": {"type": "string"}, + "text": {"type": "string"}, + "script": {"type": "string"}, + "plan": {"type": "string"}, + }, +} + +STRING_SCHEMA = { + "type": "object", + "required": ["address", "length", "text"], + "properties": { + "address": {"type": "string"}, + "length": {"type": "integer"}, + "text": {"type": "string"}, + }, +} + +SEGMENT_SCHEMA = { + "type": "object", + "required": ["name", "start", "end", "length", "initialized", "read", "write", "execute"], + "properties": { + "name": {"type": "string"}, + "start": {"type": "string"}, + "end": {"type": "string"}, + "length": {"type": "integer"}, + "initialized": {"type": "boolean"}, + "read": {"type": "boolean"}, + "write": {"type": "boolean"}, + "execute": {"type": "boolean"}, + }, +} + +DATA_ITEM_SCHEMA = { + "type": "object", + "required": ["address", "length", "mnemonic", "value"], + "properties": { + "address": {"type": "string"}, + "length": {"type": "integer"}, + "mnemonic": {"type": "string"}, + "value": {"type": ["string", "null"]}, + }, +} + +IMPORT_SCHEMA = { + "type": "object", + "required": ["library", "label", "address"], + "properties": { + "library": {"type": "string"}, + "label": {"type": ["string", "null"]}, + "address": {"type": ["string", "null"]}, + }, +} + +EXPORT_SCHEMA = { + "type": "object", + "required": ["address", "name", "kind"], + "properties": { + "address": {"type": "string"}, + "name": {"type": ["string", "null"]}, + "kind": {"type": "string"}, + }, +} + +NAMESPACE_SCHEMA = { + "type": "object", + "required": ["name", "type", "parent"], + "properties": { + "name": {"type": "string"}, + "type": {"type": "string"}, + "parent": {"type": ["string", "null"]}, + }, +} + +CLASS_SCHEMA = { + "type": "object", + "required": ["name", "parent"], + "properties": { + "name": {"type": "string"}, + "parent": {"type": ["string", "null"]}, + }, +} + +JSON_SCHEMAS = { + "project-files": {"type": "array", "items": {"type": "string"}}, + "dump-region": { + "type": "object", + "required": ["start", "end", "preview_bytes", "lines"], + "properties": { + "start": {"type": "string"}, + "end": {"type": "string"}, + "preview_bytes": {"type": "string"}, + "lines": {"type": "array", "items": {"type": "string"}}, + }, + }, + "create-function": STATUS_SCHEMA, + "delete-function": STATUS_SCHEMA, + "rename-function": STATUS_SCHEMA, + "rename-function-by-address": STATUS_SCHEMA, + "set-comment": STATUS_SCHEMA, + "set-decompiler-comment": STATUS_SCHEMA, + "set-disassembly-comment": STATUS_SCHEMA, + "get-function-by-address": FUNCTION_SCHEMA, + "get-function-containing": FUNCTION_SCHEMA, + "list-functions": {"type": "array", "items": FUNCTION_SCHEMA}, + "list-methods": {"type": "array", "items": FUNCTION_SCHEMA}, + "search-functions-by-name": {"type": "array", "items": FUNCTION_SCHEMA}, + "decompile-function": { + "type": "object", + "required": ["name", "decompiled"], + "properties": {"name": {"type": "string"}, "decompiled": {"type": "string"}}, + }, + "decompile-function-by-address": { + "type": "object", + "required": ["address", "decompiled"], + "properties": {"address": {"type": "string"}, "decompiled": {"type": "string"}}, + }, + "disassemble-function": { + "type": "object", + "required": ["address", "lines"], + "properties": {"address": {"type": "string"}, "lines": {"type": "array", "items": {"type": "string"}}}, + }, + "read-region": { + "type": "object", + "required": ["start", "end", "bytes"], + "properties": {"start": {"type": "string"}, "end": {"type": "string"}, "bytes": {"type": "string"}}, + }, + "get-xrefs-to": {"type": "array", "items": REFERENCE_SCHEMA}, + "get-xrefs-from": {"type": "array", "items": REFERENCE_SCHEMA}, + "get-function-xrefs": {"type": "array", "items": REFERENCE_SCHEMA}, + "list-strings": {"type": "array", "items": STRING_SCHEMA}, + "list-imports": {"type": "array", "items": IMPORT_SCHEMA}, + "list-exports": {"type": "array", "items": EXPORT_SCHEMA}, + "list-namespaces": {"type": "array", "items": NAMESPACE_SCHEMA}, + "list-segments": {"type": "array", "items": SEGMENT_SCHEMA}, + "list-data-items": {"type": "array", "items": DATA_ITEM_SCHEMA}, + "list-classes": {"type": "array", "items": CLASS_SCHEMA}, + "run-script": STATUS_SCHEMA, + "apply-plan": STATUS_SCHEMA, +} + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="PyGhidra helpers for the Crusader project." @@ -86,11 +262,13 @@ def build_parser() -> argparse.ArgumentParser: subparsers.add_parser( "project-files", + aliases=["project_files"], help="List root-level files in the Ghidra project.", ) dump_parser = subparsers.add_parser( "dump-region", + aliases=["dump_region"], help="Dump instructions and resolved call targets for an address range.", ) dump_parser.add_argument("--start", required=True, help="Start address.") @@ -98,6 +276,7 @@ def build_parser() -> argparse.ArgumentParser: create_parser = subparsers.add_parser( "create-function", + aliases=["create_function"], help="Create a function at an address with an optional explicit body range.", ) create_parser.add_argument("--entry", required=True, help="Function entry address.") @@ -111,12 +290,14 @@ def build_parser() -> argparse.ArgumentParser: delete_parser = subparsers.add_parser( "delete-function", + aliases=["delete_function"], help="Delete a function at an address.", ) delete_parser.add_argument("--entry", required=True, help="Function entry address.") rename_parser = subparsers.add_parser( "rename-function", + aliases=["rename_function"], help="Rename an existing function by entry address.", ) rename_parser.add_argument("--entry", required=True, help="Function entry address.") @@ -124,15 +305,17 @@ def build_parser() -> argparse.ArgumentParser: rename_by_address_parser = subparsers.add_parser( "rename-function-by-address", + aliases=["rename_function_by_address"], help="Rename an existing function by entry address (MCP-style alias).", ) rename_by_address_parser.add_argument( - "--entry", required=True, help="Function entry address." + "--entry", "--function-address", dest="entry", required=True, help="Function entry address." ) - rename_by_address_parser.add_argument("--name", required=True, help="New function name.") + rename_by_address_parser.add_argument("--name", "--new-name", dest="name", required=True, help="New function name.") comment_parser = subparsers.add_parser( "set-comment", + aliases=["set_comment"], help="Set a code-unit comment by address.", ) comment_parser.add_argument("--address", required=True, help="Comment target address.") @@ -146,26 +329,30 @@ def build_parser() -> argparse.ArgumentParser: decompiler_comment_parser = subparsers.add_parser( "set-decompiler-comment", + aliases=["set_decompiler_comment"], help="Set a decompiler-visible pre-comment by address.", ) decompiler_comment_parser.add_argument("--address", required=True, help="Comment target address.") - decompiler_comment_parser.add_argument("--text", required=True, help="Comment text.") + decompiler_comment_parser.add_argument("--text", "--comment", dest="text", required=True, help="Comment text.") disassembly_comment_parser = subparsers.add_parser( "set-disassembly-comment", + aliases=["set_disassembly_comment"], help="Set a disassembly EOL comment by address.", ) disassembly_comment_parser.add_argument("--address", required=True, help="Comment target address.") - disassembly_comment_parser.add_argument("--text", required=True, help="Comment text.") + disassembly_comment_parser.add_argument("--text", "--comment", dest="text", required=True, help="Comment text.") get_function_parser = subparsers.add_parser( "get-function-by-address", + aliases=["get_function_by_address"], help="Show function metadata for an exact entry address.", ) get_function_parser.add_argument("--address", required=True, help="Function entry address.") get_function_containing_parser = subparsers.add_parser( "get-function-containing", + aliases=["get_function_containing"], help="Show function metadata for the function containing an address.", ) get_function_containing_parser.add_argument( @@ -174,13 +361,23 @@ def build_parser() -> argparse.ArgumentParser: list_functions_parser = subparsers.add_parser( "list-functions", + aliases=["list_functions"], help="List all defined functions.", ) list_functions_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.") list_functions_parser.add_argument("--limit", type=int, default=100, help="Maximum functions to print.") + list_methods_parser = subparsers.add_parser( + "list-methods", + aliases=["list_methods"], + help="List defined function names and entries with signatures/body ranges in JSON mode.", + ) + list_methods_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.") + list_methods_parser.add_argument("--limit", type=int, default=100, help="Maximum methods to print.") + list_segments_parser = subparsers.add_parser( "list-segments", + aliases=["list_segments"], help="List memory segments or blocks.", ) list_segments_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.") @@ -188,6 +385,7 @@ def build_parser() -> argparse.ArgumentParser: list_data_items_parser = subparsers.add_parser( "list-data-items", + aliases=["list_data_items"], help="List defined data items.", ) list_data_items_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.") @@ -195,6 +393,7 @@ def build_parser() -> argparse.ArgumentParser: list_classes_parser = subparsers.add_parser( "list-classes", + aliases=["list_classes"], help="List class namespaces.", ) list_classes_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.") @@ -202,14 +401,16 @@ def build_parser() -> argparse.ArgumentParser: list_strings_parser = subparsers.add_parser( "list-strings", + aliases=["list_strings"], help="List defined strings in the program.", ) list_strings_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.") list_strings_parser.add_argument("--limit", type=int, default=2000, help="Maximum strings to print.") - list_strings_parser.add_argument("--filter", help="Optional substring filter.") + list_strings_parser.add_argument("--filter", "--filter-text", dest="filter", help="Optional substring filter.") list_imports_parser = subparsers.add_parser( "list-imports", + aliases=["list_imports"], help="List imported external symbols.", ) list_imports_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.") @@ -217,6 +418,7 @@ def build_parser() -> argparse.ArgumentParser: list_exports_parser = subparsers.add_parser( "list-exports", + aliases=["list_exports"], help="List exported entry points and symbols.", ) list_exports_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.") @@ -224,6 +426,7 @@ def build_parser() -> argparse.ArgumentParser: list_namespaces_parser = subparsers.add_parser( "list-namespaces", + aliases=["list_namespaces"], help="List non-global namespaces, classes, and libraries.", ) list_namespaces_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.") @@ -231,6 +434,7 @@ def build_parser() -> argparse.ArgumentParser: search_functions_parser = subparsers.add_parser( "search-functions-by-name", + aliases=["search_functions_by_name"], help="List functions whose names contain a substring.", ) search_functions_parser.add_argument("--query", required=True, help="Substring to match.") @@ -239,6 +443,7 @@ def build_parser() -> argparse.ArgumentParser: decompile_name_parser = subparsers.add_parser( "decompile-function", + aliases=["decompile_function"], help="Decompile an exact-named function.", ) decompile_name_parser.add_argument("--name", required=True, help="Exact function name.") @@ -246,6 +451,7 @@ def build_parser() -> argparse.ArgumentParser: decompile_address_parser = subparsers.add_parser( "decompile-function-by-address", + aliases=["decompile_function_by_address"], help="Decompile a function by entry address.", ) decompile_address_parser.add_argument("--address", required=True, help="Function entry address.") @@ -253,12 +459,14 @@ def build_parser() -> argparse.ArgumentParser: disassemble_parser = subparsers.add_parser( "disassemble-function", + aliases=["disassemble_function"], help="Disassemble a function body by entry address.", ) disassemble_parser.add_argument("--address", required=True, help="Function entry address.") read_region_parser = subparsers.add_parser( "read-region", + aliases=["read_region"], help="Dump raw bytes for an inclusive address range.", ) read_region_parser.add_argument("--start", required=True, help="Start address.") @@ -266,6 +474,7 @@ def build_parser() -> argparse.ArgumentParser: run_script_parser = subparsers.add_parser( "run-script", + aliases=["run_script"], help="Execute a Python file with project/program context to avoid interactive shell quoting issues.", ) run_script_parser.add_argument("--script", required=True, help="Path to the Python script file.") @@ -277,6 +486,7 @@ def build_parser() -> argparse.ArgumentParser: xrefs_to_parser = subparsers.add_parser( "get-xrefs-to", + aliases=["get_xrefs_to"], help="List references to an address.", ) xrefs_to_parser.add_argument("--address", required=True, help="Target address.") @@ -285,6 +495,7 @@ def build_parser() -> argparse.ArgumentParser: xrefs_from_parser = subparsers.add_parser( "get-xrefs-from", + aliases=["get_xrefs_from"], help="List references from an address.", ) xrefs_from_parser.add_argument("--address", required=True, help="Source address.") @@ -293,6 +504,7 @@ def build_parser() -> argparse.ArgumentParser: function_xrefs_parser = subparsers.add_parser( "get-function-xrefs", + aliases=["get_function_xrefs"], help="List references to a function entry by exact function name.", ) function_xrefs_parser.add_argument("--name", required=True, help="Exact function name.") @@ -301,6 +513,7 @@ def build_parser() -> argparse.ArgumentParser: plan_parser = subparsers.add_parser( "apply-plan", + aliases=["apply_plan"], help="Apply a JSON edit plan containing function and comment operations.", ) plan_parser.add_argument("--plan", required=True, help="Path to the JSON plan file.") @@ -324,9 +537,21 @@ def build_config(args: argparse.Namespace) -> ProjectConfig: ) +def _canonical_command_name(command_name: str) -> str: + return command_name.replace("_", "-") + + def _emit(args: argparse.Namespace, payload, text: str | None = None) -> int: if args.format == "json": - print(json.dumps(payload, indent=2, sort_keys=True)) + command_name = _canonical_command_name(args.command) + response = { + "schema_version": OUTPUT_SCHEMA_VERSION, + "command": command_name, + "ok": True, + "schema": JSON_SCHEMAS.get(command_name, {"type": "object"}), + "data": payload, + } + print(json.dumps(response, indent=2, sort_keys=True)) return 0 if text is not None: print(text) @@ -343,16 +568,12 @@ def _emit(args: argparse.Namespace, payload, text: str | None = None) -> int: def _function_to_dict(function) -> dict[str, str]: - summary_text = format_function_summary(function) - lines = summary_text.splitlines() - body_line = lines[3].split(": ", 1)[1] - body_start, body_end = body_line.split(" - ", 1) return { "name": function.getName(), - "signature": lines[1].split(": ", 1)[1], + "signature": function_signature(function), "entry": str(function.getEntryPoint()), - "body_start": body_start, - "body_end": body_end, + "body_start": str(function.getBody().getMinAddress()), + "body_end": str(function.getBody().getMaxAddress()), } @@ -519,15 +740,23 @@ def command_get_function_containing(config: ProjectConfig, args: argparse.Namesp def command_list_functions(config: ProjectConfig, args: argparse.Namespace) -> int: with open_program(config, read_only=True) as (_project, program): functions = search_functions_by_name(program, "", offset=args.offset, limit=args.limit) - payload = [{"name": function.getName(), "entry": str(function.getEntryPoint())} for function in functions] + payload = [_function_to_dict(function) for function in functions] text = _text_or_empty([_function_line(function) for function in functions], "no functions found") return _emit(args, payload, text) +def command_list_methods(config: ProjectConfig, args: argparse.Namespace) -> int: + with open_program(config, read_only=True) as (_project, program): + functions = search_functions_by_name(program, "", offset=args.offset, limit=args.limit) + payload = [_function_to_dict(function) for function in functions] + text = _text_or_empty([_function_line(function) for function in functions], "no methods found") + return _emit(args, payload, text) + + def command_search_functions_by_name(config: ProjectConfig, args: argparse.Namespace) -> int: with open_program(config, read_only=True) as (_project, program): functions = search_functions_by_name(program, args.query, offset=args.offset, limit=args.limit) - payload = [{"name": function.getName(), "entry": str(function.getEntryPoint())} for function in functions] + payload = [_function_to_dict(function) for function in functions] text = _text_or_empty([_function_line(function) for function in functions], "no matching functions found") return _emit(args, payload, text) @@ -774,6 +1003,7 @@ def command_apply_plan(config: ProjectConfig, args: argparse.Namespace) -> int: def main(argv: list[str] | None = None) -> int: parser = build_parser() args = parser.parse_args(argv) + args.command = _canonical_command_name(args.command) config = build_config(args) command_map = { @@ -789,6 +1019,7 @@ def main(argv: list[str] | None = None) -> int: "get-function-by-address": command_get_function_by_address, "get-function-containing": command_get_function_containing, "list-functions": command_list_functions, + "list-methods": command_list_methods, "list-segments": command_list_segments, "list-data-items": command_list_data_items, "list-classes": command_list_classes,