Enhance CLI functionality and improve common utilities
- Added new commands to the CLI for dumping regions, renaming functions by address, and setting various types of comments. - Implemented JSON output formatting for CLI commands. - Introduced functions for decompiling and disassembling functions, as well as retrieving cross-references. - Enhanced common utilities with functions for reading memory regions, iterating Java items, and managing function metadata. - Added suppress_output context manager to hide process output during Ghidra startup. - Updated existing functions to improve error handling and output formatting.
This commit is contained in:
parent
24d4416003
commit
a56851f994
16 changed files with 1072 additions and 36 deletions
|
|
@ -4,6 +4,7 @@ from contextlib import contextmanager
|
|||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
|
|
@ -31,10 +32,29 @@ def ensure_pyghidra_started(install_dir: Path | None = None):
|
|||
|
||||
resolved_dir = Path(install_dir or DEFAULT_INSTALL_DIR)
|
||||
if not pyghidra.started():
|
||||
pyghidra.start(install_dir=resolved_dir)
|
||||
with suppress_process_output():
|
||||
pyghidra.start(install_dir=resolved_dir)
|
||||
return pyghidra
|
||||
|
||||
|
||||
@contextmanager
|
||||
def suppress_process_output():
|
||||
with open(os.devnull, "w", encoding="utf-8") as devnull:
|
||||
original_stdout = os.dup(1)
|
||||
original_stderr = os.dup(2)
|
||||
try:
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
os.dup2(devnull.fileno(), 1)
|
||||
os.dup2(devnull.fileno(), 2)
|
||||
yield
|
||||
finally:
|
||||
os.dup2(original_stdout, 1)
|
||||
os.dup2(original_stderr, 2)
|
||||
os.close(original_stdout)
|
||||
os.close(original_stderr)
|
||||
|
||||
|
||||
def parse_address_text(address_text: str) -> int:
|
||||
text = address_text.strip()
|
||||
if ":" in text:
|
||||
|
|
@ -48,6 +68,19 @@ def to_address(program, address_text: str):
|
|||
return address_space.getAddress(parse_address_text(address_text))
|
||||
|
||||
|
||||
def format_address(address) -> str:
|
||||
return str(address)
|
||||
|
||||
|
||||
def iter_java_items(items):
|
||||
if hasattr(items, "hasNext") and hasattr(items, "next"):
|
||||
while items.hasNext():
|
||||
yield items.next()
|
||||
return
|
||||
for item in items:
|
||||
yield item
|
||||
|
||||
|
||||
def format_project_error(config: ProjectConfig, exc: Exception) -> RuntimeError:
|
||||
lock_path = config.project_dir / f"{config.project_name}.lock"
|
||||
details = [
|
||||
|
|
@ -127,6 +160,141 @@ def get_function(program, entry_text: str):
|
|||
return program.getFunctionManager().getFunctionAt(to_address(program, entry_text))
|
||||
|
||||
|
||||
def get_function_containing(program, address_text: str):
|
||||
return program.getFunctionManager().getFunctionContaining(to_address(program, address_text))
|
||||
|
||||
|
||||
def read_region_bytes(program, start_text: str, end_text: str) -> bytes:
|
||||
memory = program.getMemory()
|
||||
start = to_address(program, start_text)
|
||||
end = to_address(program, end_text)
|
||||
size = end.subtract(start) + 1
|
||||
if size < 0:
|
||||
raise ValueError(f"invalid address range: {start_text}..{end_text}")
|
||||
|
||||
data = bytearray()
|
||||
current = start
|
||||
for _ in range(size):
|
||||
data.append(int(memory.getByte(current)) & 0xFF)
|
||||
current = current.next()
|
||||
return bytes(data)
|
||||
|
||||
|
||||
def iter_functions(program):
|
||||
return program.getFunctionManager().getFunctions(True)
|
||||
|
||||
|
||||
def function_signature(function) -> str:
|
||||
return function.getPrototypeString(True, True)
|
||||
|
||||
|
||||
def function_body_range(function) -> tuple[str, str]:
|
||||
body = function.getBody()
|
||||
return format_address(body.getMinAddress()), format_address(body.getMaxAddress())
|
||||
|
||||
|
||||
def format_function_summary(function) -> str:
|
||||
body_start, body_end = function_body_range(function)
|
||||
return (
|
||||
f"Function: {function.getName()} at {format_address(function.getEntryPoint())}\n"
|
||||
f"Signature: {function_signature(function)}\n"
|
||||
f"Entry: {format_address(function.getEntryPoint())}\n"
|
||||
f"Body: {body_start} - {body_end}"
|
||||
)
|
||||
|
||||
|
||||
def list_segments(program, offset: int = 0, limit: int = 100):
|
||||
memory = program.getMemory()
|
||||
matches = []
|
||||
skipped = 0
|
||||
for block in memory.getBlocks():
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
matches.append(
|
||||
{
|
||||
"name": block.getName(),
|
||||
"start": format_address(block.getStart()),
|
||||
"end": format_address(block.getEnd()),
|
||||
"length": int(block.getSize()),
|
||||
"initialized": bool(block.isInitialized()),
|
||||
"read": bool(block.isRead()),
|
||||
"write": bool(block.isWrite()),
|
||||
"execute": bool(block.isExecute()),
|
||||
}
|
||||
)
|
||||
if len(matches) >= limit:
|
||||
break
|
||||
return matches
|
||||
|
||||
|
||||
def list_data_items(program, offset: int = 0, limit: int = 100):
|
||||
listing = program.getListing()
|
||||
matches = []
|
||||
skipped = 0
|
||||
for data in iter_java_items(listing.getDefinedData(True)):
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
value = data.getValue()
|
||||
matches.append(
|
||||
{
|
||||
"address": format_address(data.getAddress()),
|
||||
"length": int(data.getLength()),
|
||||
"mnemonic": data.getMnemonicString(),
|
||||
"value": None if value is None else str(value),
|
||||
}
|
||||
)
|
||||
if len(matches) >= limit:
|
||||
break
|
||||
return matches
|
||||
|
||||
|
||||
def list_classes(program, offset: int = 0, limit: int = 100):
|
||||
from ghidra.program.model.symbol import SymbolType
|
||||
|
||||
symbol_table = program.getSymbolTable()
|
||||
matches = []
|
||||
skipped = 0
|
||||
for symbol in iter_java_items(symbol_table.getDefinedSymbols()):
|
||||
if symbol.getSymbolType() != SymbolType.CLASS:
|
||||
continue
|
||||
namespace = symbol.getObject()
|
||||
parent = namespace.getParentNamespace() if namespace is not None else None
|
||||
matches.append(
|
||||
{
|
||||
"name": symbol.getName(),
|
||||
"parent": None if parent is None or parent.isGlobal() else parent.getName(),
|
||||
}
|
||||
)
|
||||
matches.sort(key=lambda entry: (entry["parent"] or "", entry["name"]))
|
||||
return matches[offset: offset + limit]
|
||||
|
||||
|
||||
def search_functions_by_name(program, query: str, offset: int = 0, limit: int = 100):
|
||||
lowered = query.lower()
|
||||
matches = []
|
||||
skipped = 0
|
||||
for function in iter_java_items(iter_functions(program)):
|
||||
if lowered not in function.getName().lower():
|
||||
continue
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
matches.append(function)
|
||||
if len(matches) >= limit:
|
||||
break
|
||||
return matches
|
||||
|
||||
|
||||
def get_functions_by_exact_name(program, name: str):
|
||||
matches = []
|
||||
for function in iter_java_items(iter_functions(program)):
|
||||
if function.getName() == name:
|
||||
matches.append(function)
|
||||
return matches
|
||||
|
||||
|
||||
def create_function(program, entry_text: str, name: str, body_start: str | None, body_end: str | None):
|
||||
from ghidra.program.model.address import AddressSet
|
||||
from ghidra.program.model.symbol import SourceType
|
||||
|
|
@ -157,6 +325,199 @@ def rename_function(program, entry_text: str, new_name: str):
|
|||
return function
|
||||
|
||||
|
||||
def decompile_function(program, function, timeout_seconds: int = 30) -> str:
|
||||
from ghidra.app.decompiler import DecompInterface
|
||||
from ghidra.util.task import ConsoleTaskMonitor
|
||||
|
||||
interface = DecompInterface()
|
||||
interface.openProgram(program)
|
||||
try:
|
||||
result = interface.decompileFunction(function, timeout_seconds, ConsoleTaskMonitor())
|
||||
if not result.decompileCompleted():
|
||||
error_message = result.getErrorMessage() or "decompilation did not complete"
|
||||
raise RuntimeError(error_message)
|
||||
decompiled = result.getDecompiledFunction()
|
||||
if decompiled is None:
|
||||
raise RuntimeError("decompiler returned no function text")
|
||||
return decompiled.getC()
|
||||
finally:
|
||||
interface.dispose()
|
||||
|
||||
|
||||
def disassemble_function(program, function) -> list[str]:
|
||||
from ghidra.program.model.listing import CodeUnit
|
||||
|
||||
listing = program.getListing()
|
||||
lines = []
|
||||
for instruction in iter_java_items(listing.getInstructions(function.getBody(), True)):
|
||||
line = f"{format_address(instruction.getAddress())}: {instruction.toString()}"
|
||||
if instruction.getFlowType().isCall():
|
||||
references = instruction.getReferencesFrom()
|
||||
if references:
|
||||
target = references[0].getToAddress()
|
||||
target_function = program.getFunctionManager().getFunctionAt(target)
|
||||
if target_function is not None:
|
||||
line += f" -> {target_function.getName()} @ {format_address(target)}"
|
||||
else:
|
||||
line += f" -> {format_address(target)}"
|
||||
comment = instruction.getComment(CodeUnit.EOL_COMMENT)
|
||||
if comment:
|
||||
line += f" ; {comment}"
|
||||
lines.append(line)
|
||||
return lines
|
||||
|
||||
|
||||
def _reference_dict(reference) -> dict[str, str | int]:
|
||||
return {
|
||||
"from": format_address(reference.getFromAddress()),
|
||||
"to": format_address(reference.getToAddress()),
|
||||
"type": str(reference.getReferenceType()),
|
||||
"operand_index": int(reference.getOperandIndex()),
|
||||
}
|
||||
|
||||
|
||||
def get_xrefs_to(program, address_text: str, offset: int = 0, limit: int = 100) -> list[dict[str, str | int]]:
|
||||
reference_manager = program.getReferenceManager()
|
||||
target_address = to_address(program, address_text)
|
||||
results = []
|
||||
skipped = 0
|
||||
for reference in iter_java_items(reference_manager.getReferencesTo(target_address)):
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
results.append(_reference_dict(reference))
|
||||
if len(results) >= limit:
|
||||
break
|
||||
return results
|
||||
|
||||
|
||||
def get_xrefs_from(program, address_text: str, offset: int = 0, limit: int = 100) -> list[dict[str, str | int]]:
|
||||
reference_manager = program.getReferenceManager()
|
||||
source_address = to_address(program, address_text)
|
||||
results = []
|
||||
skipped = 0
|
||||
for reference in iter_java_items(reference_manager.getReferencesFrom(source_address)):
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
results.append(_reference_dict(reference))
|
||||
if len(results) >= limit:
|
||||
break
|
||||
return results
|
||||
|
||||
|
||||
def list_strings(program, offset: int = 0, limit: int = 2000, filter_text: str | None = None):
|
||||
listing = program.getListing()
|
||||
matches = []
|
||||
skipped = 0
|
||||
lowered_filter = filter_text.lower() if filter_text else None
|
||||
for data in iter_java_items(listing.getDefinedData(True)):
|
||||
if not data.hasStringValue():
|
||||
continue
|
||||
text = str(data.getValue())
|
||||
if lowered_filter and lowered_filter not in text.lower():
|
||||
continue
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
matches.append(
|
||||
{
|
||||
"address": format_address(data.getAddress()),
|
||||
"length": int(data.getLength()),
|
||||
"text": text,
|
||||
}
|
||||
)
|
||||
if len(matches) >= limit:
|
||||
break
|
||||
return matches
|
||||
|
||||
|
||||
def list_imports(program, offset: int = 0, limit: int = 100):
|
||||
external_manager = program.getExternalManager()
|
||||
matches = []
|
||||
skipped = 0
|
||||
for library_name in external_manager.getExternalLibraryNames():
|
||||
for location in iter_java_items(external_manager.getExternalLocations(library_name)):
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
label = location.getLabel()
|
||||
address = location.getAddress()
|
||||
matches.append(
|
||||
{
|
||||
"library": str(library_name),
|
||||
"label": str(label) if label is not None else None,
|
||||
"address": format_address(address) if address is not None else None,
|
||||
}
|
||||
)
|
||||
if len(matches) >= limit:
|
||||
return matches
|
||||
return matches
|
||||
|
||||
|
||||
def list_exports(program, offset: int = 0, limit: int = 100):
|
||||
symbol_table = program.getSymbolTable()
|
||||
function_manager = program.getFunctionManager()
|
||||
matches = []
|
||||
skipped = 0
|
||||
for address in iter_java_items(symbol_table.getExternalEntryPointIterator()):
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
function = function_manager.getFunctionAt(address)
|
||||
primary_symbol = symbol_table.getPrimarySymbol(address)
|
||||
matches.append(
|
||||
{
|
||||
"address": format_address(address),
|
||||
"name": function.getName() if function is not None else (primary_symbol.getName() if primary_symbol is not None else None),
|
||||
"kind": "function" if function is not None else (str(primary_symbol.getSymbolType()) if primary_symbol is not None else "unknown"),
|
||||
}
|
||||
)
|
||||
if len(matches) >= limit:
|
||||
break
|
||||
return matches
|
||||
|
||||
|
||||
def list_namespaces(program, offset: int = 0, limit: int = 100):
|
||||
from ghidra.program.model.symbol import SymbolType
|
||||
|
||||
symbol_table = program.getSymbolTable()
|
||||
matches = []
|
||||
skipped = 0
|
||||
for symbol in iter_java_items(symbol_table.getDefinedSymbols()):
|
||||
symbol_type = symbol.getSymbolType()
|
||||
if symbol_type not in (SymbolType.NAMESPACE, SymbolType.CLASS, SymbolType.LIBRARY):
|
||||
continue
|
||||
namespace = symbol.getObject()
|
||||
parent = namespace.getParentNamespace() if namespace is not None else None
|
||||
if parent is not None and parent.isGlobal():
|
||||
parent_name = None
|
||||
else:
|
||||
parent_name = parent.getName() if parent is not None else None
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
matches.append(
|
||||
{
|
||||
"name": symbol.getName(),
|
||||
"type": str(symbol_type),
|
||||
"parent": parent_name,
|
||||
}
|
||||
)
|
||||
if len(matches) >= limit:
|
||||
break
|
||||
return matches
|
||||
|
||||
|
||||
def run_script_file(script_path: Path, globals_dict: dict):
|
||||
script_globals = dict(globals_dict)
|
||||
script_globals.setdefault("__name__", "__main__")
|
||||
script_globals.setdefault("__file__", str(script_path))
|
||||
code = compile(script_path.read_text(encoding="utf-8"), str(script_path), "exec")
|
||||
exec(code, script_globals, script_globals)
|
||||
return script_globals
|
||||
|
||||
|
||||
def set_comment(program, address_text: str, comment: str, comment_type: str):
|
||||
from ghidra.program.model.listing import CodeUnit
|
||||
|
||||
|
|
@ -171,9 +532,14 @@ def set_comment(program, address_text: str, comment: str, comment_type: str):
|
|||
raise ValueError(f"unsupported comment type: {comment_type}")
|
||||
|
||||
listing = program.getListing()
|
||||
code_unit = listing.getCodeUnitAt(to_address(program, address_text))
|
||||
target_address = to_address(program, address_text)
|
||||
code_unit = listing.getCodeUnitAt(target_address)
|
||||
if code_unit is None:
|
||||
raise ValueError(f"no code unit found at {address_text}")
|
||||
function = program.getFunctionManager().getFunctionAt(target_address)
|
||||
if function is not None:
|
||||
function.setComment(comment)
|
||||
return
|
||||
raise ValueError(f"no code unit or function found at {address_text}")
|
||||
code_unit.setComment(comment_types[comment_type], comment)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue