Enhance CLI functionality and improve common utilities

- Added new commands to the CLI for dumping regions, renaming functions by address, and setting various types of comments.
- Implemented JSON output formatting for CLI commands.
- Introduced functions for decompiling and disassembling functions, as well as retrieving cross-references.
- Enhanced common utilities with functions for reading memory regions, iterating Java items, and managing function metadata.
- Added suppress_output context manager to hide process output during Ghidra startup.
- Updated existing functions to improve error handling and output formatting.
This commit is contained in:
MaddoScientisto 2026-03-21 09:44:35 +01:00
commit a56851f994
16 changed files with 1072 additions and 36 deletions

View file

@ -12,13 +12,30 @@ from .common import (
DEFAULT_FOLDER_PATH,
ProjectConfig,
create_function,
decompile_function,
disassemble_function,
format_function_summary,
get_function,
get_function_containing,
get_functions_by_exact_name,
get_xrefs_from,
get_xrefs_to,
list_classes,
list_data_items,
list_exports,
list_root_files,
list_imports,
list_namespaces,
list_segments,
list_strings,
open_program,
open_project,
read_region_bytes,
remove_function,
rename_function,
run_script_file,
save_program,
search_functions_by_name,
set_comment,
transaction,
)
@ -58,6 +75,12 @@ def build_parser() -> argparse.ArgumentParser:
action="store_true",
help="Restore project tool state while opening the project.",
)
parser.add_argument(
"--format",
choices=["text", "json"],
default="text",
help="Output format.",
)
subparsers = parser.add_subparsers(dest="command", required=True)
@ -66,6 +89,13 @@ def build_parser() -> argparse.ArgumentParser:
help="List root-level files in the Ghidra project.",
)
dump_parser = subparsers.add_parser(
"dump-region",
help="Dump instructions and resolved call targets for an address range.",
)
dump_parser.add_argument("--start", required=True, help="Start address.")
dump_parser.add_argument("--end", required=True, help="Inclusive end address.")
create_parser = subparsers.add_parser(
"create-function",
help="Create a function at an address with an optional explicit body range.",
@ -92,6 +122,15 @@ def build_parser() -> argparse.ArgumentParser:
rename_parser.add_argument("--entry", required=True, help="Function entry address.")
rename_parser.add_argument("--name", required=True, help="New function name.")
rename_by_address_parser = subparsers.add_parser(
"rename-function-by-address",
help="Rename an existing function by entry address (MCP-style alias).",
)
rename_by_address_parser.add_argument(
"--entry", required=True, help="Function entry address."
)
rename_by_address_parser.add_argument("--name", required=True, help="New function name.")
comment_parser = subparsers.add_parser(
"set-comment",
help="Set a code-unit comment by address.",
@ -105,6 +144,161 @@ def build_parser() -> argparse.ArgumentParser:
help="Comment type.",
)
decompiler_comment_parser = subparsers.add_parser(
"set-decompiler-comment",
help="Set a decompiler-visible pre-comment by address.",
)
decompiler_comment_parser.add_argument("--address", required=True, help="Comment target address.")
decompiler_comment_parser.add_argument("--text", required=True, help="Comment text.")
disassembly_comment_parser = subparsers.add_parser(
"set-disassembly-comment",
help="Set a disassembly EOL comment by address.",
)
disassembly_comment_parser.add_argument("--address", required=True, help="Comment target address.")
disassembly_comment_parser.add_argument("--text", required=True, help="Comment text.")
get_function_parser = subparsers.add_parser(
"get-function-by-address",
help="Show function metadata for an exact entry address.",
)
get_function_parser.add_argument("--address", required=True, help="Function entry address.")
get_function_containing_parser = subparsers.add_parser(
"get-function-containing",
help="Show function metadata for the function containing an address.",
)
get_function_containing_parser.add_argument(
"--address", required=True, help="Address inside the desired function body."
)
list_functions_parser = subparsers.add_parser(
"list-functions",
help="List all defined functions.",
)
list_functions_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
list_functions_parser.add_argument("--limit", type=int, default=100, help="Maximum functions to print.")
list_segments_parser = subparsers.add_parser(
"list-segments",
help="List memory segments or blocks.",
)
list_segments_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
list_segments_parser.add_argument("--limit", type=int, default=100, help="Maximum segments to print.")
list_data_items_parser = subparsers.add_parser(
"list-data-items",
help="List defined data items.",
)
list_data_items_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
list_data_items_parser.add_argument("--limit", type=int, default=100, help="Maximum data items to print.")
list_classes_parser = subparsers.add_parser(
"list-classes",
help="List class namespaces.",
)
list_classes_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
list_classes_parser.add_argument("--limit", type=int, default=100, help="Maximum classes to print.")
list_strings_parser = subparsers.add_parser(
"list-strings",
help="List defined strings in the program.",
)
list_strings_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
list_strings_parser.add_argument("--limit", type=int, default=2000, help="Maximum strings to print.")
list_strings_parser.add_argument("--filter", help="Optional substring filter.")
list_imports_parser = subparsers.add_parser(
"list-imports",
help="List imported external symbols.",
)
list_imports_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
list_imports_parser.add_argument("--limit", type=int, default=100, help="Maximum imports to print.")
list_exports_parser = subparsers.add_parser(
"list-exports",
help="List exported entry points and symbols.",
)
list_exports_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
list_exports_parser.add_argument("--limit", type=int, default=100, help="Maximum exports to print.")
list_namespaces_parser = subparsers.add_parser(
"list-namespaces",
help="List non-global namespaces, classes, and libraries.",
)
list_namespaces_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
list_namespaces_parser.add_argument("--limit", type=int, default=100, help="Maximum namespaces to print.")
search_functions_parser = subparsers.add_parser(
"search-functions-by-name",
help="List functions whose names contain a substring.",
)
search_functions_parser.add_argument("--query", required=True, help="Substring to match.")
search_functions_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
search_functions_parser.add_argument("--limit", type=int, default=100, help="Maximum functions to print.")
decompile_name_parser = subparsers.add_parser(
"decompile-function",
help="Decompile an exact-named function.",
)
decompile_name_parser.add_argument("--name", required=True, help="Exact function name.")
decompile_name_parser.add_argument("--timeout", type=int, default=30, help="Decompile timeout in seconds.")
decompile_address_parser = subparsers.add_parser(
"decompile-function-by-address",
help="Decompile a function by entry address.",
)
decompile_address_parser.add_argument("--address", required=True, help="Function entry address.")
decompile_address_parser.add_argument("--timeout", type=int, default=30, help="Decompile timeout in seconds.")
disassemble_parser = subparsers.add_parser(
"disassemble-function",
help="Disassemble a function body by entry address.",
)
disassemble_parser.add_argument("--address", required=True, help="Function entry address.")
read_region_parser = subparsers.add_parser(
"read-region",
help="Dump raw bytes for an inclusive address range.",
)
read_region_parser.add_argument("--start", required=True, help="Start address.")
read_region_parser.add_argument("--end", required=True, help="Inclusive end address.")
run_script_parser = subparsers.add_parser(
"run-script",
help="Execute a Python file with project/program context to avoid interactive shell quoting issues.",
)
run_script_parser.add_argument("--script", required=True, help="Path to the Python script file.")
run_script_parser.add_argument(
"--read-only",
action="store_true",
help="Open the program read-only for script execution.",
)
xrefs_to_parser = subparsers.add_parser(
"get-xrefs-to",
help="List references to an address.",
)
xrefs_to_parser.add_argument("--address", required=True, help="Target address.")
xrefs_to_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
xrefs_to_parser.add_argument("--limit", type=int, default=100, help="Maximum references to print.")
xrefs_from_parser = subparsers.add_parser(
"get-xrefs-from",
help="List references from an address.",
)
xrefs_from_parser.add_argument("--address", required=True, help="Source address.")
xrefs_from_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
xrefs_from_parser.add_argument("--limit", type=int, default=100, help="Maximum references to print.")
function_xrefs_parser = subparsers.add_parser(
"get-function-xrefs",
help="List references to a function entry by exact function name.",
)
function_xrefs_parser.add_argument("--name", required=True, help="Exact function name.")
function_xrefs_parser.add_argument("--offset", type=int, default=0, help="Pagination offset.")
function_xrefs_parser.add_argument("--limit", type=int, default=100, help="Maximum references to print.")
plan_parser = subparsers.add_parser(
"apply-plan",
help="Apply a JSON edit plan containing function and comment operations.",
@ -128,13 +322,85 @@ def build_config(args: argparse.Namespace) -> ProjectConfig:
folder_path=args.folder_path,
restore_project=args.restore_project,
)
def _emit(args: argparse.Namespace, payload, text: str | None = None) -> int:
if args.format == "json":
print(json.dumps(payload, indent=2, sort_keys=True))
return 0
if text is not None:
print(text)
return 0
if isinstance(payload, list):
for item in payload:
print(item)
return 0
if isinstance(payload, dict):
print(json.dumps(payload, indent=2, sort_keys=True))
return 0
print(payload)
return 0
def _function_to_dict(function) -> dict[str, str]:
summary_text = format_function_summary(function)
lines = summary_text.splitlines()
body_line = lines[3].split(": ", 1)[1]
body_start, body_end = body_line.split(" - ", 1)
return {
"name": function.getName(),
"signature": lines[1].split(": ", 1)[1],
"entry": str(function.getEntryPoint()),
"body_start": body_start,
"body_end": body_end,
}
def _function_line(function) -> str:
return f"{function.getName()} @ {function.getEntryPoint()}"
def _text_or_empty(lines: list[str], empty_message: str) -> str:
return "\n".join(lines) if lines else empty_message
def command_project_files(config: ProjectConfig, _args: argparse.Namespace) -> int:
project = open_project(config)
try:
for name in list_root_files(project):
print(name)
names = list_root_files(project)
finally:
project.close()
return _emit(_args, names, "\n".join(names))
def command_dump_region(config: ProjectConfig, args: argparse.Namespace) -> int:
from .common import to_address
with open_program(config, read_only=True) as (_project, program):
listing = program.getListing()
memory = program.getMemory()
start = to_address(program, args.start)
end = to_address(program, args.end)
size = end.subtract(start) + 1
buf = bytearray(size)
memory.getBytes(start, buf)
print(f"REGION {args.start}..{args.end} BYTES {bytes(buf[:32]).hex()}")
instruction = listing.getInstructionAt(start)
while instruction is not None and instruction.getAddress().compareTo(end) <= 0:
line = f"{instruction.getAddress()}: {instruction.toString()}"
if instruction.getFlowType().isCall():
references = instruction.getReferencesFrom()
if references:
target = references[0].getToAddress()
function = program.getFunctionManager().getFunctionAt(target)
if function is not None:
line += f" -> {function.getName()} @ {target}"
else:
line += f" -> {target}"
print(line)
instruction = instruction.getNext()
return 0
@ -145,8 +411,11 @@ def command_create_function(config: ProjectConfig, args: argparse.Namespace) ->
if args.plate_comment:
set_comment(program, args.entry, args.plate_comment, "plate")
save_program(project, program)
print(f"created {function.getName()} at {args.entry}")
return 0
return _emit(
args,
{"status": "ok", "entry": args.entry, "name": function.getName(), "action": "create-function"},
f"created {function.getName()} at {args.entry}",
)
def command_delete_function(config: ProjectConfig, args: argparse.Namespace) -> int:
@ -156,8 +425,11 @@ def command_delete_function(config: ProjectConfig, args: argparse.Namespace) ->
if not removed:
raise RuntimeError(f"no function removed at {args.entry}")
save_program(project, program)
print(f"deleted function at {args.entry}")
return 0
return _emit(
args,
{"status": "ok", "entry": args.entry, "action": "delete-function"},
f"deleted function at {args.entry}",
)
def command_rename_function(config: ProjectConfig, args: argparse.Namespace) -> int:
@ -165,17 +437,276 @@ def command_rename_function(config: ProjectConfig, args: argparse.Namespace) ->
with transaction(program, f"Rename function {args.entry}"):
function = rename_function(program, args.entry, args.name)
save_program(project, program)
print(f"renamed {args.entry} to {function.getName()}")
return 0
return _emit(
args,
{"status": "ok", "entry": args.entry, "name": function.getName(), "action": "rename-function"},
f"renamed {args.entry} to {function.getName()}",
)
def _set_comment_with_type(config: ProjectConfig, args: argparse.Namespace, address: str, text: str, comment_type: str) -> int:
with open_program(config, read_only=False) as (project, program):
with transaction(program, f"Set comment {address}"):
set_comment(program, address, text, comment_type)
save_program(project, program)
return _emit(
args,
{"status": "ok", "address": address, "type": comment_type, "text": text, "action": "set-comment"},
f"set {comment_type} comment at {address}",
)
def command_set_comment(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=False) as (project, program):
with transaction(program, f"Set comment {args.address}"):
set_comment(program, args.address, args.text, args.type)
save_program(project, program)
print(f"set {args.type} comment at {args.address}")
return 0
return _set_comment_with_type(config, args, args.address, args.text, args.type)
def command_set_decompiler_comment(config: ProjectConfig, args: argparse.Namespace) -> int:
return _set_comment_with_type(config, args, args.address, args.text, "pre")
def command_set_disassembly_comment(config: ProjectConfig, args: argparse.Namespace) -> int:
return _set_comment_with_type(config, args, args.address, args.text, "eol")
def _require_function_by_address(program, address_text: str):
function = get_function(program, address_text)
if function is None:
raise RuntimeError(f"no function found at {address_text}")
return function
def _require_single_function_by_name(program, name: str):
matches = get_functions_by_exact_name(program, name)
if not matches:
raise RuntimeError(f"no function found with exact name '{name}'")
if len(matches) > 1:
raise RuntimeError(
f"multiple functions match exact name '{name}'; use search-functions-by-name or an address-specific command"
)
return matches[0]
def _print_function_lines(functions) -> None:
for function in functions:
print(f"{function.getName()} @ {function.getEntryPoint()}")
def _print_reference_lines(references: list[dict[str, str | int]]) -> None:
for reference in references:
print(
f"{reference['from']} -> {reference['to']} [{reference['type']}] operand={reference['operand_index']}"
)
def command_get_function_by_address(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
function = _require_function_by_address(program, args.address)
payload = _function_to_dict(function)
text = format_function_summary(function)
return _emit(args, payload, text)
def command_get_function_containing(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
function = get_function_containing(program, args.address)
if function is None:
raise RuntimeError(f"no containing function found at {args.address}")
payload = _function_to_dict(function)
text = format_function_summary(function)
return _emit(args, payload, text)
def command_list_functions(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
functions = search_functions_by_name(program, "", offset=args.offset, limit=args.limit)
payload = [{"name": function.getName(), "entry": str(function.getEntryPoint())} for function in functions]
text = _text_or_empty([_function_line(function) for function in functions], "no functions found")
return _emit(args, payload, text)
def command_search_functions_by_name(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
functions = search_functions_by_name(program, args.query, offset=args.offset, limit=args.limit)
payload = [{"name": function.getName(), "entry": str(function.getEntryPoint())} for function in functions]
text = _text_or_empty([_function_line(function) for function in functions], "no matching functions found")
return _emit(args, payload, text)
def command_list_strings(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
strings = list_strings(program, offset=args.offset, limit=args.limit, filter_text=args.filter)
text = _text_or_empty([f"{entry['address']}: {entry['text']}" for entry in strings], "no strings found")
return _emit(args, strings, text)
def command_list_segments(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
segments = list_segments(program, offset=args.offset, limit=args.limit)
text = _text_or_empty(
[
f"{entry['name']} {entry['start']} - {entry['end']} len={entry['length']}"
f" r={entry['read']} w={entry['write']} x={entry['execute']} init={entry['initialized']}"
for entry in segments
],
"no segments found",
)
return _emit(args, segments, text)
def command_list_data_items(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
items = list_data_items(program, offset=args.offset, limit=args.limit)
text = _text_or_empty(
[
f"{entry['address']} {entry['mnemonic']} len={entry['length']}"
+ (f" value={entry['value']}" if entry['value'] is not None else "")
for entry in items
],
"no data items found",
)
return _emit(args, items, text)
def command_list_classes(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
classes = list_classes(program, offset=args.offset, limit=args.limit)
text = _text_or_empty(
[
f"{entry['name']}" + (f" parent={entry['parent']}" if entry['parent'] else "")
for entry in classes
],
"no classes found",
)
return _emit(args, classes, text)
def command_list_imports(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
imports = list_imports(program, offset=args.offset, limit=args.limit)
text = _text_or_empty([
f"{entry['library']}!{entry['label'] or '<unnamed>'} @ {entry['address'] or '<no address>'}"
for entry in imports
], "no imports found")
return _emit(args, imports, text)
def command_list_exports(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
exports = list_exports(program, offset=args.offset, limit=args.limit)
text = _text_or_empty([
f"{entry['name'] or '<unnamed>'} @ {entry['address']} [{entry['kind']}]"
for entry in exports
], "no exports found")
return _emit(args, exports, text)
def command_list_namespaces(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
namespaces = list_namespaces(program, offset=args.offset, limit=args.limit)
text = _text_or_empty([
f"{entry['name']} [{entry['type']}]" + (f" parent={entry['parent']}" if entry['parent'] else "")
for entry in namespaces
], "no namespaces found")
return _emit(args, namespaces, text)
def command_decompile_function_by_address(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
function = _require_function_by_address(program, args.address)
output = decompile_function(program, function, args.timeout)
return _emit(args, {"address": args.address, "decompiled": output}, output)
def command_decompile_function(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
function = _require_single_function_by_name(program, args.name)
output = decompile_function(program, function, args.timeout)
return _emit(args, {"name": args.name, "decompiled": output}, output)
def command_disassemble_function(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
function = _require_function_by_address(program, args.address)
lines = disassemble_function(program, function)
if not lines:
code_unit = program.getListing().getCodeUnitAt(function.getEntryPoint())
lines = [
f"no instructions found in body {function.getBody().getMinAddress()} - {function.getBody().getMaxAddress()}; entry code unit = {code_unit}"
]
return _emit(args, {"address": args.address, "lines": lines}, "\n".join(lines))
def command_read_region(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
data = read_region_bytes(program, args.start, args.end)
text = f"REGION {args.start}..{args.end} BYTES {data.hex()}"
return _emit(args, {"start": args.start, "end": args.end, "bytes": data.hex()}, text)
def command_get_xrefs_to(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
references = get_xrefs_to(program, args.address, offset=args.offset, limit=args.limit)
text = _text_or_empty([
f"{reference['from']} -> {reference['to']} [{reference['type']}] operand={reference['operand_index']}"
for reference in references
], "no xrefs found")
return _emit(args, references, text)
def command_get_xrefs_from(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
references = get_xrefs_from(program, args.address, offset=args.offset, limit=args.limit)
text = _text_or_empty([
f"{reference['from']} -> {reference['to']} [{reference['type']}] operand={reference['operand_index']}"
for reference in references
], "no xrefs found")
return _emit(args, references, text)
def command_get_function_xrefs(config: ProjectConfig, args: argparse.Namespace) -> int:
with open_program(config, read_only=True) as (_project, program):
function = _require_single_function_by_name(program, args.name)
references = get_xrefs_to(
program,
str(function.getEntryPoint()),
offset=args.offset,
limit=args.limit,
)
text = _text_or_empty([
f"{reference['from']} -> {reference['to']} [{reference['type']}] operand={reference['operand_index']}"
for reference in references
], "no xrefs found")
return _emit(args, references, text)
def command_run_script(config: ProjectConfig, args: argparse.Namespace) -> int:
script_path = Path(args.script).resolve()
if not script_path.is_file():
raise RuntimeError(f"script file not found: {script_path}")
with open_program(config, read_only=args.read_only) as (project, program):
script_globals = {
"config": config,
"project": project,
"program": program,
"helpers": {
"create_function": create_function,
"decompile_function": decompile_function,
"disassemble_function": disassemble_function,
"format_function_summary": format_function_summary,
"get_function": get_function,
"get_function_containing": get_function_containing,
"get_xrefs_from": get_xrefs_from,
"get_xrefs_to": get_xrefs_to,
"read_region_bytes": read_region_bytes,
"rename_function": rename_function,
"set_comment": set_comment,
},
}
run_script_file(script_path, script_globals)
if not args.read_only:
save_program(project, program)
return _emit(args, {"status": "ok", "script": str(script_path)}, f"ran script {script_path}")
def _load_plan(plan_path: str) -> dict:
@ -190,6 +721,9 @@ def _print_plan(plan: dict) -> None:
def command_apply_plan(config: ProjectConfig, args: argparse.Namespace) -> int:
plan = _load_plan(args.plan)
if args.dry_run:
if args.format == "json":
_print_plan(plan)
return 0
_print_plan(plan)
return 0
@ -234,8 +768,7 @@ def command_apply_plan(config: ProjectConfig, args: argparse.Namespace) -> int:
save_program(project, program)
print(f"applied plan {args.plan}")
return 0
return _emit(args, {"status": "ok", "plan": args.plan}, f"applied plan {args.plan}")
def main(argv: list[str] | None = None) -> int:
@ -244,11 +777,34 @@ def main(argv: list[str] | None = None) -> int:
config = build_config(args)
command_map = {
"dump-region": command_dump_region,
"project-files": command_project_files,
"create-function": command_create_function,
"delete-function": command_delete_function,
"rename-function": command_rename_function,
"rename-function-by-address": command_rename_function,
"set-comment": command_set_comment,
"set-decompiler-comment": command_set_decompiler_comment,
"set-disassembly-comment": command_set_disassembly_comment,
"get-function-by-address": command_get_function_by_address,
"get-function-containing": command_get_function_containing,
"list-functions": command_list_functions,
"list-segments": command_list_segments,
"list-data-items": command_list_data_items,
"list-classes": command_list_classes,
"list-strings": command_list_strings,
"list-imports": command_list_imports,
"list-exports": command_list_exports,
"list-namespaces": command_list_namespaces,
"search-functions-by-name": command_search_functions_by_name,
"decompile-function": command_decompile_function,
"decompile-function-by-address": command_decompile_function_by_address,
"disassemble-function": command_disassemble_function,
"read-region": command_read_region,
"get-xrefs-to": command_get_xrefs_to,
"get-xrefs-from": command_get_xrefs_from,
"get-function-xrefs": command_get_function_xrefs,
"run-script": command_run_script,
"apply-plan": command_apply_plan,
}
return command_map[args.command](config, args)

View file

@ -4,6 +4,7 @@ from contextlib import contextmanager
from dataclasses import dataclass
from pathlib import Path
import os
import sys
REPO_ROOT = Path(__file__).resolve().parents[2]
@ -31,10 +32,29 @@ def ensure_pyghidra_started(install_dir: Path | None = None):
resolved_dir = Path(install_dir or DEFAULT_INSTALL_DIR)
if not pyghidra.started():
pyghidra.start(install_dir=resolved_dir)
with suppress_process_output():
pyghidra.start(install_dir=resolved_dir)
return pyghidra
@contextmanager
def suppress_process_output():
with open(os.devnull, "w", encoding="utf-8") as devnull:
original_stdout = os.dup(1)
original_stderr = os.dup(2)
try:
sys.stdout.flush()
sys.stderr.flush()
os.dup2(devnull.fileno(), 1)
os.dup2(devnull.fileno(), 2)
yield
finally:
os.dup2(original_stdout, 1)
os.dup2(original_stderr, 2)
os.close(original_stdout)
os.close(original_stderr)
def parse_address_text(address_text: str) -> int:
text = address_text.strip()
if ":" in text:
@ -48,6 +68,19 @@ def to_address(program, address_text: str):
return address_space.getAddress(parse_address_text(address_text))
def format_address(address) -> str:
return str(address)
def iter_java_items(items):
if hasattr(items, "hasNext") and hasattr(items, "next"):
while items.hasNext():
yield items.next()
return
for item in items:
yield item
def format_project_error(config: ProjectConfig, exc: Exception) -> RuntimeError:
lock_path = config.project_dir / f"{config.project_name}.lock"
details = [
@ -127,6 +160,141 @@ def get_function(program, entry_text: str):
return program.getFunctionManager().getFunctionAt(to_address(program, entry_text))
def get_function_containing(program, address_text: str):
return program.getFunctionManager().getFunctionContaining(to_address(program, address_text))
def read_region_bytes(program, start_text: str, end_text: str) -> bytes:
memory = program.getMemory()
start = to_address(program, start_text)
end = to_address(program, end_text)
size = end.subtract(start) + 1
if size < 0:
raise ValueError(f"invalid address range: {start_text}..{end_text}")
data = bytearray()
current = start
for _ in range(size):
data.append(int(memory.getByte(current)) & 0xFF)
current = current.next()
return bytes(data)
def iter_functions(program):
return program.getFunctionManager().getFunctions(True)
def function_signature(function) -> str:
return function.getPrototypeString(True, True)
def function_body_range(function) -> tuple[str, str]:
body = function.getBody()
return format_address(body.getMinAddress()), format_address(body.getMaxAddress())
def format_function_summary(function) -> str:
body_start, body_end = function_body_range(function)
return (
f"Function: {function.getName()} at {format_address(function.getEntryPoint())}\n"
f"Signature: {function_signature(function)}\n"
f"Entry: {format_address(function.getEntryPoint())}\n"
f"Body: {body_start} - {body_end}"
)
def list_segments(program, offset: int = 0, limit: int = 100):
memory = program.getMemory()
matches = []
skipped = 0
for block in memory.getBlocks():
if skipped < offset:
skipped += 1
continue
matches.append(
{
"name": block.getName(),
"start": format_address(block.getStart()),
"end": format_address(block.getEnd()),
"length": int(block.getSize()),
"initialized": bool(block.isInitialized()),
"read": bool(block.isRead()),
"write": bool(block.isWrite()),
"execute": bool(block.isExecute()),
}
)
if len(matches) >= limit:
break
return matches
def list_data_items(program, offset: int = 0, limit: int = 100):
listing = program.getListing()
matches = []
skipped = 0
for data in iter_java_items(listing.getDefinedData(True)):
if skipped < offset:
skipped += 1
continue
value = data.getValue()
matches.append(
{
"address": format_address(data.getAddress()),
"length": int(data.getLength()),
"mnemonic": data.getMnemonicString(),
"value": None if value is None else str(value),
}
)
if len(matches) >= limit:
break
return matches
def list_classes(program, offset: int = 0, limit: int = 100):
from ghidra.program.model.symbol import SymbolType
symbol_table = program.getSymbolTable()
matches = []
skipped = 0
for symbol in iter_java_items(symbol_table.getDefinedSymbols()):
if symbol.getSymbolType() != SymbolType.CLASS:
continue
namespace = symbol.getObject()
parent = namespace.getParentNamespace() if namespace is not None else None
matches.append(
{
"name": symbol.getName(),
"parent": None if parent is None or parent.isGlobal() else parent.getName(),
}
)
matches.sort(key=lambda entry: (entry["parent"] or "", entry["name"]))
return matches[offset: offset + limit]
def search_functions_by_name(program, query: str, offset: int = 0, limit: int = 100):
lowered = query.lower()
matches = []
skipped = 0
for function in iter_java_items(iter_functions(program)):
if lowered not in function.getName().lower():
continue
if skipped < offset:
skipped += 1
continue
matches.append(function)
if len(matches) >= limit:
break
return matches
def get_functions_by_exact_name(program, name: str):
matches = []
for function in iter_java_items(iter_functions(program)):
if function.getName() == name:
matches.append(function)
return matches
def create_function(program, entry_text: str, name: str, body_start: str | None, body_end: str | None):
from ghidra.program.model.address import AddressSet
from ghidra.program.model.symbol import SourceType
@ -157,6 +325,199 @@ def rename_function(program, entry_text: str, new_name: str):
return function
def decompile_function(program, function, timeout_seconds: int = 30) -> str:
from ghidra.app.decompiler import DecompInterface
from ghidra.util.task import ConsoleTaskMonitor
interface = DecompInterface()
interface.openProgram(program)
try:
result = interface.decompileFunction(function, timeout_seconds, ConsoleTaskMonitor())
if not result.decompileCompleted():
error_message = result.getErrorMessage() or "decompilation did not complete"
raise RuntimeError(error_message)
decompiled = result.getDecompiledFunction()
if decompiled is None:
raise RuntimeError("decompiler returned no function text")
return decompiled.getC()
finally:
interface.dispose()
def disassemble_function(program, function) -> list[str]:
from ghidra.program.model.listing import CodeUnit
listing = program.getListing()
lines = []
for instruction in iter_java_items(listing.getInstructions(function.getBody(), True)):
line = f"{format_address(instruction.getAddress())}: {instruction.toString()}"
if instruction.getFlowType().isCall():
references = instruction.getReferencesFrom()
if references:
target = references[0].getToAddress()
target_function = program.getFunctionManager().getFunctionAt(target)
if target_function is not None:
line += f" -> {target_function.getName()} @ {format_address(target)}"
else:
line += f" -> {format_address(target)}"
comment = instruction.getComment(CodeUnit.EOL_COMMENT)
if comment:
line += f" ; {comment}"
lines.append(line)
return lines
def _reference_dict(reference) -> dict[str, str | int]:
return {
"from": format_address(reference.getFromAddress()),
"to": format_address(reference.getToAddress()),
"type": str(reference.getReferenceType()),
"operand_index": int(reference.getOperandIndex()),
}
def get_xrefs_to(program, address_text: str, offset: int = 0, limit: int = 100) -> list[dict[str, str | int]]:
reference_manager = program.getReferenceManager()
target_address = to_address(program, address_text)
results = []
skipped = 0
for reference in iter_java_items(reference_manager.getReferencesTo(target_address)):
if skipped < offset:
skipped += 1
continue
results.append(_reference_dict(reference))
if len(results) >= limit:
break
return results
def get_xrefs_from(program, address_text: str, offset: int = 0, limit: int = 100) -> list[dict[str, str | int]]:
reference_manager = program.getReferenceManager()
source_address = to_address(program, address_text)
results = []
skipped = 0
for reference in iter_java_items(reference_manager.getReferencesFrom(source_address)):
if skipped < offset:
skipped += 1
continue
results.append(_reference_dict(reference))
if len(results) >= limit:
break
return results
def list_strings(program, offset: int = 0, limit: int = 2000, filter_text: str | None = None):
listing = program.getListing()
matches = []
skipped = 0
lowered_filter = filter_text.lower() if filter_text else None
for data in iter_java_items(listing.getDefinedData(True)):
if not data.hasStringValue():
continue
text = str(data.getValue())
if lowered_filter and lowered_filter not in text.lower():
continue
if skipped < offset:
skipped += 1
continue
matches.append(
{
"address": format_address(data.getAddress()),
"length": int(data.getLength()),
"text": text,
}
)
if len(matches) >= limit:
break
return matches
def list_imports(program, offset: int = 0, limit: int = 100):
external_manager = program.getExternalManager()
matches = []
skipped = 0
for library_name in external_manager.getExternalLibraryNames():
for location in iter_java_items(external_manager.getExternalLocations(library_name)):
if skipped < offset:
skipped += 1
continue
label = location.getLabel()
address = location.getAddress()
matches.append(
{
"library": str(library_name),
"label": str(label) if label is not None else None,
"address": format_address(address) if address is not None else None,
}
)
if len(matches) >= limit:
return matches
return matches
def list_exports(program, offset: int = 0, limit: int = 100):
symbol_table = program.getSymbolTable()
function_manager = program.getFunctionManager()
matches = []
skipped = 0
for address in iter_java_items(symbol_table.getExternalEntryPointIterator()):
if skipped < offset:
skipped += 1
continue
function = function_manager.getFunctionAt(address)
primary_symbol = symbol_table.getPrimarySymbol(address)
matches.append(
{
"address": format_address(address),
"name": function.getName() if function is not None else (primary_symbol.getName() if primary_symbol is not None else None),
"kind": "function" if function is not None else (str(primary_symbol.getSymbolType()) if primary_symbol is not None else "unknown"),
}
)
if len(matches) >= limit:
break
return matches
def list_namespaces(program, offset: int = 0, limit: int = 100):
from ghidra.program.model.symbol import SymbolType
symbol_table = program.getSymbolTable()
matches = []
skipped = 0
for symbol in iter_java_items(symbol_table.getDefinedSymbols()):
symbol_type = symbol.getSymbolType()
if symbol_type not in (SymbolType.NAMESPACE, SymbolType.CLASS, SymbolType.LIBRARY):
continue
namespace = symbol.getObject()
parent = namespace.getParentNamespace() if namespace is not None else None
if parent is not None and parent.isGlobal():
parent_name = None
else:
parent_name = parent.getName() if parent is not None else None
if skipped < offset:
skipped += 1
continue
matches.append(
{
"name": symbol.getName(),
"type": str(symbol_type),
"parent": parent_name,
}
)
if len(matches) >= limit:
break
return matches
def run_script_file(script_path: Path, globals_dict: dict):
script_globals = dict(globals_dict)
script_globals.setdefault("__name__", "__main__")
script_globals.setdefault("__file__", str(script_path))
code = compile(script_path.read_text(encoding="utf-8"), str(script_path), "exec")
exec(code, script_globals, script_globals)
return script_globals
def set_comment(program, address_text: str, comment: str, comment_type: str):
from ghidra.program.model.listing import CodeUnit
@ -171,9 +532,14 @@ def set_comment(program, address_text: str, comment: str, comment_type: str):
raise ValueError(f"unsupported comment type: {comment_type}")
listing = program.getListing()
code_unit = listing.getCodeUnitAt(to_address(program, address_text))
target_address = to_address(program, address_text)
code_unit = listing.getCodeUnitAt(target_address)
if code_unit is None:
raise ValueError(f"no code unit found at {address_text}")
function = program.getFunctionManager().getFunctionAt(target_address)
if function is not None:
function.setComment(comment)
return
raise ValueError(f"no code unit or function found at {address_text}")
code_unit.setComment(comment_types[comment_type], comment)