diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 7443b44..135c749 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -17,4 +17,5 @@ applyTo: '**' - Validate reverse-engineering and tooling changes with the narrowest relevant check. - Keep read-only analysis separate from any explicit writable workflow. - For authored map-placement or link investigation in Crusader-Map-Viewer, prefer decompressed `.cache/scene-cache//map-*//scene.json` over `site/data`; the cache scenes preserve direct item objects and world coordinates. +- Never write documentation into `k:\ghidra\crusader_map_viewer` unless the user explicitly asks for documentation changes in that repo. - If the shell becomes stuck on multiline input or otherwise unhealthy, you could try to press esc in the shell to see if it gets unstuck, otherwise immediately use `vscode_askQuestions` to ask the user to fix the terminal state, then continue the task once the user confirms it is fixed. \ No newline at end of file diff --git a/Crusader.rep/idata/01/~00000015.db/change.data.gbf b/Crusader.rep/idata/01/~00000015.db/change.data.gbf index 72ec249..0a90d03 100644 Binary files a/Crusader.rep/idata/01/~00000015.db/change.data.gbf and b/Crusader.rep/idata/01/~00000015.db/change.data.gbf differ diff --git a/Crusader.rep/idata/01/~00000015.db/change.map.gbf b/Crusader.rep/idata/01/~00000015.db/change.map.gbf index b8d0fbf..aebed21 100644 Binary files a/Crusader.rep/idata/01/~00000015.db/change.map.gbf and b/Crusader.rep/idata/01/~00000015.db/change.map.gbf differ diff --git a/Crusader.rep/idata/01/~00000015.db/db.68.gbf b/Crusader.rep/idata/01/~00000015.db/db.77.gbf similarity index 98% rename from Crusader.rep/idata/01/~00000015.db/db.68.gbf rename to Crusader.rep/idata/01/~00000015.db/db.77.gbf index ec12335..035b7cc 100644 Binary files a/Crusader.rep/idata/01/~00000015.db/db.68.gbf and b/Crusader.rep/idata/01/~00000015.db/db.77.gbf differ diff --git a/Crusader.rep/idata/01/~00000015.db/db.67.gbf b/Crusader.rep/idata/01/~00000015.db/db.78.gbf similarity index 98% rename from Crusader.rep/idata/01/~00000015.db/db.67.gbf rename to Crusader.rep/idata/01/~00000015.db/db.78.gbf index 2670c98..e072259 100644 Binary files a/Crusader.rep/idata/01/~00000015.db/db.67.gbf and b/Crusader.rep/idata/01/~00000015.db/db.78.gbf differ diff --git a/Crusader.rep/idata/01/~0000001b.db/db.7.gbf b/Crusader.rep/idata/01/~0000001b.db/db.11.gbf similarity index 98% rename from Crusader.rep/idata/01/~0000001b.db/db.7.gbf rename to Crusader.rep/idata/01/~0000001b.db/db.11.gbf index 6d71f1b..c5c3cc6 100644 Binary files a/Crusader.rep/idata/01/~0000001b.db/db.7.gbf and b/Crusader.rep/idata/01/~0000001b.db/db.11.gbf differ diff --git a/Crusader.rep/idata/01/~0000001b.db/db.8.gbf b/Crusader.rep/idata/01/~0000001b.db/db.12.gbf similarity index 97% rename from Crusader.rep/idata/01/~0000001b.db/db.8.gbf rename to Crusader.rep/idata/01/~0000001b.db/db.12.gbf index e77dba1..313934c 100644 Binary files a/Crusader.rep/idata/01/~0000001b.db/db.8.gbf and b/Crusader.rep/idata/01/~0000001b.db/db.12.gbf differ diff --git a/Crusader.rep/projectState b/Crusader.rep/projectState index 1c9566e..877ece6 100644 --- a/Crusader.rep/projectState +++ b/Crusader.rep/projectState @@ -3,365 +3,15 @@ + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + diff --git a/Crusader.rep/user/00/~00000008.db/db.44.gbf b/Crusader.rep/user/00/~00000008.db/db.48.gbf similarity index 99% rename from Crusader.rep/user/00/~00000008.db/db.44.gbf rename to Crusader.rep/user/00/~00000008.db/db.48.gbf index 777b8c2..a1bfedb 100644 Binary files a/Crusader.rep/user/00/~00000008.db/db.44.gbf and b/Crusader.rep/user/00/~00000008.db/db.48.gbf differ diff --git a/Crusader.rep/user/00/~00000008.db/db.45.gbf b/Crusader.rep/user/00/~00000008.db/db.49.gbf similarity index 98% rename from Crusader.rep/user/00/~00000008.db/db.45.gbf rename to Crusader.rep/user/00/~00000008.db/db.49.gbf index ee47c03..b1ad5b8 100644 Binary files a/Crusader.rep/user/00/~00000008.db/db.45.gbf and b/Crusader.rep/user/00/~00000008.db/db.49.gbf differ diff --git a/Crusader.rep/user/00/~0000000d.db/db.3.gbf b/Crusader.rep/user/00/~0000000d.db/db.4.gbf similarity index 99% rename from Crusader.rep/user/00/~0000000d.db/db.3.gbf rename to Crusader.rep/user/00/~0000000d.db/db.4.gbf index 1a381bf..e0aa631 100644 Binary files a/Crusader.rep/user/00/~0000000d.db/db.3.gbf and b/Crusader.rep/user/00/~0000000d.db/db.4.gbf differ diff --git a/Crusader.rep/user/00/~0000000d.db/db.2.gbf b/Crusader.rep/user/00/~0000000d.db/db.5.gbf similarity index 79% rename from Crusader.rep/user/00/~0000000d.db/db.2.gbf rename to Crusader.rep/user/00/~0000000d.db/db.5.gbf index 17f484d..5e2d55c 100644 Binary files a/Crusader.rep/user/00/~0000000d.db/db.2.gbf and b/Crusader.rep/user/00/~0000000d.db/db.5.gbf differ diff --git a/_tmp_inspect_psx_banks.js b/_tmp_inspect_psx_banks.js new file mode 100644 index 0000000..2314eef --- /dev/null +++ b/_tmp_inspect_psx_banks.js @@ -0,0 +1,197 @@ +const fs = require("fs"); + +function readU32LE(buffer, offset) { + return buffer.readUInt32LE(offset); +} + +function readU16LE(buffer, offset) { + return buffer.readUInt16LE(offset); +} + +function parseLsetWdl(data) { + const headerSize = readU32LE(data, 0); + if (headerSize !== 0x34 || headerSize > data.length) { + throw new Error(`unexpected header size ${headerSize}`); + } + + const headerWords = []; + for (let offset = 0; offset < headerSize; offset += 4) { + headerWords.push(readU32LE(data, offset)); + } + + const audioSize = headerWords[1]; + const sectionSizes = []; + for (let offset = 0x08; offset < 0x38; offset += 4) { + sectionSizes.push(readU32LE(data, offset)); + } + + const sections = []; + let cursor = headerSize + audioSize; + for (let index = 0; index < sectionSizes.length; index += 1) { + const size = sectionSizes[index]; + if (size <= 0 || cursor + size > data.length) { + break; + } + sections.push({ + index, + name: `post_audio_section_${String(index).padStart(2, "0")}`, + offset: cursor, + size + }); + cursor += size; + } + + return { headerWords, sections }; +} + +function readSectionBytes(data, section) { + return data.subarray(section.offset, section.offset + section.size); +} + +function parseTypedSection8(data, section) { + const bytes = readSectionBytes(data, section); + if (bytes.length < 8) { + return null; + } + const recordCount = readU32LE(bytes, 0); + const payloadBytes = readU32LE(bytes, 4); + const headerOffset = 8 + payloadBytes; + if (recordCount <= 0 || recordCount > 0x400) { + return null; + } + if (payloadBytes < 0 || headerOffset + recordCount * 8 > bytes.length) { + return null; + } + + let payloadCursor = 8; + const records = []; + for (let index = 0; index < recordCount; index += 1) { + const descriptorOffset = headerOffset + index * 8; + const blockSize = readU32LE(bytes, descriptorOffset); + const typeId = readU32LE(bytes, descriptorOffset + 4); + if (blockSize < 0 || payloadCursor + blockSize > headerOffset) { + return null; + } + const payload = bytes.subarray(payloadCursor, payloadCursor + blockSize); + const payloadDwords = []; + for (let offset = 0; offset + 4 <= payload.length; offset += 4) { + payloadDwords.push(readU32LE(payload, offset)); + } + records.push({ index, typeId, blockSize, payloadDwords }); + payloadCursor += blockSize; + } + + return { section, recordCount, payloadBytes, records }; +} + +function parseTypedSection16(data, section) { + const bytes = readSectionBytes(data, section); + if (bytes.length < 8) { + return null; + } + const recordCount = readU32LE(bytes, 0); + const payloadBytes = readU32LE(bytes, 4); + const headerOffset = 8 + payloadBytes; + if (recordCount <= 0 || recordCount > 0x400) { + return null; + } + if (payloadBytes < 0 || headerOffset + recordCount * 16 > bytes.length) { + return null; + } + + let payloadCursor = 8; + const records = []; + for (let index = 0; index < recordCount; index += 1) { + const descriptorOffset = headerOffset + index * 16; + const d4Size = readU32LE(bytes, descriptorOffset); + const ccSize = readU32LE(bytes, descriptorOffset + 4); + const d0Size = readU32LE(bytes, descriptorOffset + 8); + const typeId = readU16LE(bytes, descriptorOffset + 12); + const variantTypeId = readU16LE(bytes, descriptorOffset + 14); + const ccPayload = bytes.subarray(payloadCursor, payloadCursor + ccSize); + const d0Payload = bytes.subarray(payloadCursor + ccSize, payloadCursor + ccSize + d0Size); + const d4Payload = bytes.subarray(payloadCursor + ccSize + d0Size, payloadCursor + ccSize + d0Size + d4Size); + if (payloadCursor + ccSize + d0Size + d4Size > headerOffset) { + return null; + } + records.push({ + index, + typeId, + variantTypeId, + ccSize, + d0Size, + d4Size, + ccDwords: readDwords(ccPayload), + d0Dwords: readDwords(d0Payload), + d4Dwords: readDwords(d4Payload) + }); + payloadCursor += ccSize + d0Size + d4Size; + } + + return { section, recordCount, payloadBytes, records }; +} + +function readDwords(payload) { + const values = []; + for (let offset = 0; offset + 4 <= payload.length; offset += 4) { + values.push(readU32LE(payload, offset)); + } + return values; +} + +function main() { + const filePath = process.argv[2]; + const wantedTypes = new Set(process.argv.slice(3).map((value) => Number.parseInt(value, 16))); + const data = fs.readFileSync(filePath); + const parsed = parseLsetWdl(data); + + const section8 = parsed.sections + .map((section) => parseTypedSection8(data, section)) + .filter(Boolean) + .sort((left, right) => right.recordCount - left.recordCount || right.payloadBytes - left.payloadBytes)[0]; + const section16 = parsed.sections + .map((section) => parseTypedSection16(data, section)) + .filter(Boolean) + .sort((left, right) => right.recordCount - left.recordCount || right.payloadBytes - left.payloadBytes)[0]; + + const summary = { + filePath, + section8: section8 ? { + section: section8.section.name, + offset: `0x${section8.section.offset.toString(16)}`, + size: `0x${section8.section.size.toString(16)}`, + recordCount: section8.recordCount, + wanted: section8.records + .filter((record) => wantedTypes.has(record.typeId)) + .map((record) => ({ + index: record.index, + typeId: `0x${record.typeId.toString(16)}`, + blockSize: `0x${record.blockSize.toString(16)}`, + payloadDwords: record.payloadDwords.map((value) => `0x${value.toString(16)}`) + })) + } : null, + section16: section16 ? { + section: section16.section.name, + offset: `0x${section16.section.offset.toString(16)}`, + size: `0x${section16.section.size.toString(16)}`, + recordCount: section16.recordCount, + wanted: section16.records + .filter((record) => wantedTypes.has(record.typeId) || wantedTypes.has(record.variantTypeId)) + .map((record) => ({ + index: record.index, + typeId: `0x${record.typeId.toString(16)}`, + variantTypeId: `0x${record.variantTypeId.toString(16)}`, + ccSize: `0x${record.ccSize.toString(16)}`, + d0Size: `0x${record.d0Size.toString(16)}`, + d4Size: `0x${record.d4Size.toString(16)}`, + ccDwords: record.ccDwords.map((value) => `0x${value.toString(16)}`), + d0Dwords: record.d0Dwords.map((value) => `0x${value.toString(16)}`), + d4Dwords: record.d4Dwords.map((value) => `0x${value.toString(16)}`) + })) + } : null + }; + + console.log(JSON.stringify(summary, null, 2)); +} + +main(); \ No newline at end of file diff --git a/_tmp_probe_psx_section16.js b/_tmp_probe_psx_section16.js new file mode 100644 index 0000000..103494a --- /dev/null +++ b/_tmp_probe_psx_section16.js @@ -0,0 +1,132 @@ +const fs = require("fs"); + +function readU32LE(buffer, offset) { + return buffer.readUInt32LE(offset); +} + +function readU16LE(buffer, offset) { + return buffer.readUInt16LE(offset); +} + +function parseLsetWdl(data) { + const headerSize = readU32LE(data, 0); + if (headerSize !== 0x34 || headerSize > data.length) { + throw new Error(`unexpected header size ${headerSize}`); + } + + const headerWords = []; + for (let offset = 0; offset < headerSize; offset += 4) { + headerWords.push(readU32LE(data, offset)); + } + + const audioSize = headerWords[1]; + const sectionSizes = []; + for (let offset = 0x08; offset < 0x38; offset += 4) { + sectionSizes.push(readU32LE(data, offset)); + } + + const sections = []; + let cursor = headerSize + audioSize; + for (let index = 0; index < sectionSizes.length; index += 1) { + const size = sectionSizes[index]; + if (size <= 0 || cursor + size > data.length) { + break; + } + sections.push({ + index, + name: `post_audio_section_${String(index).padStart(2, "0")}`, + offset: cursor, + size + }); + cursor += size; + } + + return { sections }; +} + +function parseTypedSection16(data, section, startOffset) { + const bytes = data.subarray(section.offset + startOffset, section.offset + section.size); + if (bytes.length < 8) { + return null; + } + const recordCount = readU32LE(bytes, 0); + const payloadBytes = readU32LE(bytes, 4); + const headerOffset = 8 + payloadBytes; + if (recordCount <= 0 || recordCount > 0x400) { + return null; + } + if (payloadBytes < 0 || headerOffset + recordCount * 16 > bytes.length) { + return null; + } + + let payloadCursor = 8; + const records = []; + for (let index = 0; index < recordCount; index += 1) { + const descriptorOffset = headerOffset + index * 16; + const d4Size = readU32LE(bytes, descriptorOffset); + const ccSize = readU32LE(bytes, descriptorOffset + 4); + const d0Size = readU32LE(bytes, descriptorOffset + 8); + const typeId = readU16LE(bytes, descriptorOffset + 12); + const variantTypeId = readU16LE(bytes, descriptorOffset + 14); + const endOffset = payloadCursor + ccSize + d0Size + d4Size; + if (endOffset > headerOffset) { + return null; + } + records.push({ index, typeId, variantTypeId, ccSize, d0Size, d4Size, payloadCursor }); + payloadCursor = endOffset; + } + + return { + sectionName: section.name, + startOffset, + recordCount, + payloadBytes, + headerOffset, + records + }; +} + +function summarizeCandidate(candidate, wantedTypes) { + return { + sectionName: candidate.sectionName, + startOffset: `0x${candidate.startOffset.toString(16)}`, + recordCount: candidate.recordCount, + payloadBytes: `0x${candidate.payloadBytes.toString(16)}`, + wanted: candidate.records + .filter((record) => wantedTypes.has(record.typeId) || wantedTypes.has(record.variantTypeId)) + .map((record) => ({ + index: record.index, + typeId: `0x${record.typeId.toString(16)}`, + variantTypeId: `0x${record.variantTypeId.toString(16)}`, + ccSize: `0x${record.ccSize.toString(16)}`, + d0Size: `0x${record.d0Size.toString(16)}`, + d4Size: `0x${record.d4Size.toString(16)}`, + payloadCursor: `0x${record.payloadCursor.toString(16)}` + })) + }; +} + +function main() { + const filePath = process.argv[2]; + const wantedTypes = new Set(process.argv.slice(3).map((value) => Number.parseInt(value, 16))); + const data = fs.readFileSync(filePath); + const parsed = parseLsetWdl(data); + const candidates = []; + + for (const section of parsed.sections) { + for (let startOffset = 0; startOffset < Math.min(section.size, 0x800); startOffset += 4) { + const candidate = parseTypedSection16(data, section, startOffset); + if (!candidate) { + continue; + } + const summary = summarizeCandidate(candidate, wantedTypes); + if (summary.wanted.length > 0) { + candidates.push(summary); + } + } + } + + console.log(JSON.stringify(candidates, null, 2)); +} + +main(); \ No newline at end of file diff --git a/_tmp_probe_sections.js b/_tmp_probe_sections.js new file mode 100644 index 0000000..0358925 --- /dev/null +++ b/_tmp_probe_sections.js @@ -0,0 +1,111 @@ +const fs = require("fs"); + +const ALLOWED_U5 = new Set([0x20, 0x22, 0x30]); + +function readU32LE(buffer, offset) { + return buffer.readUInt32LE(offset); +} + +function readU16LE(buffer, offset) { + return buffer.readUInt16LE(offset); +} + +function isStructuredCandidate(record) { + if (record[0] >= 0x200) { + return false; + } + if (record[1] === 0 && record[2] === 0) { + return false; + } + if (record[1] >= 0x4000 || record[2] >= 0x4000) { + return false; + } + if (record[3] > 0x20 || record[4] > 0x04) { + return false; + } + return ALLOWED_U5.has(record[5]); +} + +function probeFile(filePath) { + const data = fs.readFileSync(filePath); + const headerSize = readU32LE(data, 0); + const audioSize = readU32LE(data, 4); + const sectionSizes = []; + for (let offset = 8; offset < 0x38; offset += 4) { + sectionSizes.push(readU32LE(data, offset)); + } + + let cursor = headerSize + audioSize; + const sections = []; + for (let index = 0; index < sectionSizes.length; index += 1) { + const size = sectionSizes[index]; + const start = cursor; + const end = start + size; + const bytes = data.subarray(start, end); + cursor = end; + + let rowCount = null; + let rootHits = 0; + let bulkHits = 0; + + if (bytes.length >= 4) { + rowCount = readU32LE(bytes, 0); + for (let rowIndex = 0; rowIndex < Math.min(rowCount, 5000); rowIndex += 1) { + const base = 4 + rowIndex * 24; + if (base + 24 > bytes.length) { + break; + } + const words = []; + for (let wordIndex = 0; wordIndex < 12; wordIndex += 1) { + words.push(readU16LE(bytes, base + wordIndex * 2)); + } + const left = [words[4], words[5], words[0], words[1], words[2], words[3]]; + const right = [words[10], words[11], words[6], words[7], words[8], words[9]]; + if (isStructuredCandidate(left)) { + rootHits += 1; + } + if (isStructuredCandidate(right)) { + rootHits += 1; + } + } + } + + const usableSize = bytes.length - (bytes.length % 24); + for (let offset = 0; offset < usableSize; offset += 24) { + for (const sideOffset of [0, 12]) { + const base = offset + sideOffset; + const record = [ + readU16LE(bytes, base), + readU16LE(bytes, base + 2), + readU16LE(bytes, base + 4), + readU16LE(bytes, base + 6), + readU16LE(bytes, base + 8), + readU16LE(bytes, base + 10) + ]; + if (isStructuredCandidate(record)) { + bulkHits += 1; + } + } + } + + sections.push({ + index, + start, + size, + rowCount, + rootHits, + bulkHits + }); + } + + return { + filePath, + headerSize, + audioSize, + sections + }; +} + +for (const filePath of process.argv.slice(2)) { + console.log(JSON.stringify(probeFile(filePath), null, 2)); +} diff --git a/crusader_decompilation_notes.md b/crusader_decompilation_notes.md index cb874b3..837baf1 100644 --- a/crusader_decompilation_notes.md +++ b/crusader_decompilation_notes.md @@ -4,6 +4,8 @@ This file is an index. Detailed notes have been split into the `docs/` folder by Active live analysis target is now `CRUSADER.EXE`. Existing `CRUSADER-RAW.EXE` notes remain in scope as cross-reference evidence and should be cited alongside live NE addresses when they support a rename, variable role, or behavior claim. +Recent verified PSX map-viewer batch: [docs/psx/psx.md](docs/psx/psx.md) and [docs/psx/map-viewer-plan.md](docs/psx/map-viewer-plan.md) now record the latest executable-backed correction to the PSX renderer model. Current best read is that the cache builder still exports executable-named section-0 visible families (`section0_dispatch_roots`, `section0_constructor_placements`), runtime/state layers for `DAT_800758d8`, `DAT_800758d0`, `DAT_800758cc`, `DAT_800758d4`, and one offline `FUN_8003b00c` decode candidate for `DAT_8006b5d8 -> DAT_8006769c`, and packs shared PSX art into `1925` shared atlases instead of the earlier one-atlas-per-shape spread. But the same batch also shows the current fallback art path is wrong at the root: early `section0_dispatch_roots` types such as `0x0042` and `0x0049` currently bind to portrait/talk-animation bundles, so the real remaining blocker is the multi-stage per-type template/state/variant selection path, not just a missing one-step bundle lookup. + Recent verified combat-data batch: [docs/combat-dat.md](docs/combat-dat.md) now documents the shipped `COMBAT.DAT` archive end to end. Current best read is that all local Remorse/Regret variants ship the same `14`-record combat-tactic archive, each record contains a `16`-byte name plus four block offsets and bytecode, and the tactic VM is now grounded both in the live `CRUSADER.EXE` helpers (`Attack_SetupForTacticNo`, `Attack_SetupForBlockNo`, `NPC_Get/SetNPCTacticNo`) and in ScummVM's readable Crusader attack-process implementation. The new note also promotes the per-tactic human-readable catalog, including the midpoint-pressure, marker-shuttle, step-out-shoot, and stationary-chaos families. Recent verified NE movement/collision batch: [docs/raw-0008-000c.md](docs/raw-0008-000c.md) now extends the live `AreaSearch_CollideMove` lane one helper layer deeper. Current best read is that the collision-storage queue is no longer only anchored at `StorageDataProcess_Create` / `Run` and the legal-move wrappers: the live database now also carries the step-aware seg029 sweep helpers `AreaSearch_SweepShapeBetweenPoints`, `AreaSearch_SweepItemToPointWithStepUp`, and `AreaSearch_SweepShapeBetweenPointsWithStepUp`, the seg031 release-side queue cleanup pair `StorageDataProcess_Release` and `storage_process_ref_list_terminate_item_matches`, and adjacent seg090 helper `ItemCache_PushAndPopToDirectionalOffset`. The practical remaining gap in this lane is now earlier caller policy rather than local helper identity. diff --git a/docs/entity-vm-runtime-owner-resource-layout.md b/docs/entity-vm-runtime-owner-resource-layout.md index b38f681..9a1c502 100644 --- a/docs/entity-vm-runtime-owner-resource-layout.md +++ b/docs/entity-vm-runtime-owner-resource-layout.md @@ -231,19 +231,106 @@ Verified first batch landed in the live `CRUSADER.EXE` session on 2026-04-05. - Exercised the new storage-aware prototype route against the two known 16-bit repair cases (`1000:42e2` and `1420:1499`) through the active MCP session. The checked-in source has the new route wiring, but the live GUI plugin still answered with legacy behavior: `/set_function_prototype_storage` returned the old `set_function_prototype` failure body, and `/set_storage_aware_prototype` returned `404 No context found for request`. That confirms the remaining issue is live deployment parity, not endpoint design. - Rechecked the direct callers of `CreateFromSlotIndex`: `Usecode_ItemCallEvent` plus two `Interpreter_NextUsecodeOp` call sites. The `Usecode_ItemCallEvent` path explicitly calls `CreateFromSlotIndex((EntityVmContext *)0x0,0,...)` as an allocate-and-return factory, and the current caller-side uses immediately consume only base `Process`-style fields such as `procid` and termination flags. The two interpreter call sites likewise just store the returned far pointer in `DX:AX` scratch pairs before later base-process handling. - That caller evidence is enough to keep the current conservative return type for now: `CreateFromSlotIndex` is clearly manufacturing an `EntityVmContext`, but promoting the return to `EntityVmContext *` before the inheritance/base-process datatype story is explicit would probably make current caller decompilation less clear rather than more clear. +- Verified seventh live batch landed on 2026-04-06 after the refreshed MCP build came up. +- Re-exercised `set_function_prototype_storage(...)` in-session on the two known 16-bit repair cases. The route now reaches the real storage-aware implementation and can preserve the explicit `AX:DX` return storage in-session, but two live issues remain: stack offsets at `10` and above currently need `0x` prefixes to avoid landing at `0x10`/`0x12`/`0x14`/`0x16`, and `calling_convention='__cdecl16far'` still normalizes the repaired functions to plain `__cdecl`. +- Updated `/Remorse/EntityVmSlotEntry` one step deeper from the `InitSlotOwnerBuffers` and `EnsureSlotChunkLoaded` evidence: + - `+0x00 match_key_farptr` + - `+0x0a owner_chunk_count` + - `+0x12 owner_data_base` + - retained the earlier `+0x1e..+0x24` owner-buffer and chunk-state pointer pairs +- Updated local variable typing so `AcquireSlotForEntity` now carries `EntityVmSlotEntry *` locals for the current slot cursor/free-slot candidate lane, and `InitSlotOwnerBuffers` now carries an `EntityVmSlotEntry *` local for the owner-metadata scratch object. +- The decompiler payoff is immediate: `InitSlotOwnerBuffers` now shows `owner_chunk_count`, `owner_buffer_*`, and `chunk_state_*` directly, and `EnsureSlotChunkLoaded` now shows `owner_data_base` where the slot metadata seeds the later owner-data window. +- Tried the stronger storage-aware `Create(this: /Remorse/EntityVmRuntime * @ stack:0x4:4, ...)` model through the new endpoint, but it still fails with `Storage size does not match data type size: 2`. That makes the remaining blocker more precise again: the live MCP route is now good enough to express the desired 4-byte storage, but the current `EntityVmRuntime *` datatype in this 16-bit NE session still resolves to a 2-byte pointer type. +- Verified eighth live batch landed on 2026-04-06. +- Reloaded the live plugin and re-verified `set_function_prototype_storage(...)` on the two known 16-bit proof cases. The route now works in-session and preserves explicit `AX:DX` return storage cleanly, but `calling_convention='__cdecl16far'` still normalizes both `1000:42e2` and `1420:1499` to plain `__cdecl`. +- Renamed `1420:1d72` to `entity_vm_runtime_get_slot_chunk_ptr_at_offset` after confirming from `CreateFromSlotIndex`, `Load`, and `FUN_1418_035f` that it is just a wrapper over `EnsureSlotChunkLoaded` plus a caller-supplied offset. +- Renamed `1420:1d8d` to `entity_vm_runtime_release_slot_chunk_ref` after confirming from the `Interpreter_NextUsecodeOp` caller that it decrements one live chunk-state refcount and asserts if the chunk was not retained. +- Renamed `1420:1e17` to `entity_vm_runtime_try_unload_slot_chunk` after confirming from `entity_vm_runtime_apply_to_matching_owner_rows` that it only unloads a chunk when the chunk-state count has reached zero, restoring the owner-buffer entry and freeing runtime budget during cleanup/eviction. +- Added short decompiler comments to those three helpers so the slot-entry ownership story stays visible in the live database. +- Verified ninth live batch landed on 2026-04-06. +- Created provisional datatype `/Remorse/EntityVmLoadedChunkRecord` with the current stable cleanup/iterator anchors: + - `+0x06 next_offset` + - `+0x08 next_segment` + - `+0x0e saved_chunk_offset` + - `+0x10 saved_chunk_segment` + - `+0x12 slot_index` + - `+0x14 chunk_index` +- Updated `1420:1e17 entity_vm_runtime_try_unload_slot_chunk` so the second parameter is now `EntityVmLoadedChunkRecord * loaded_chunk_record`, and then tightened the return to `byte __cdecl16far` with explicit `AL` storage after caller disassembly at `1420:1f50` and `1420:1fc1` showed both call sites consume only `AL`. +- Updated the iterator local `uStack_6` in `1420:1f24 entity_vm_runtime_apply_to_matching_owner_rows` to `EntityVmLoadedChunkRecord *`, so the owner-row cleanup walk now renders `next_*`, `slot_index`, and `chunk_index` directly instead of anonymous stack-pair traffic. +- Confirmed the interpreter-side release helper caller at `1418:3330` pushes the live chunk record's `slot_index` / `chunk_index` pair from `ES:[BX+0x32]` / `ES:[BX+0x34]` together with the runtime far pointer before calling `entity_vm_runtime_release_slot_chunk_ref`, which makes the loaded-chunk record a real shared runtime record rather than a one-off cleanup scratch blob. +- Verified tenth live batch landed on 2026-04-06. +- Renamed local helper `1418:003c` to `interpreter_pop_saved_farptr` after confirming from its only caller in `Interpreter_NextUsecodeOp` that it decrements a saved-farptr stack count at `+0x80` and returns the far pointer stored at the new top entry. +- Added short decompiler comments at `1418:003c` and `1418:3330` so the interpreter-side release/restore lane stays visible without overcommitting the restored far pointer to a stronger semantic than the current evidence supports. +- Verified eleventh live batch landed on 2026-04-06. +- Created class owner `Remorse::EntityVmSlotEntry` in the live database and moved `1420:2040` under it as `CreateOrClear`. +- Tightened `Remorse::EntityVmSlotEntry::CreateOrClear` so the single parameter is now named `this` and the explicit far return storage is restored to `AX` for the `EntityVmSlotEntry *` result. +- Moved the previously global runtime cleanup helpers under `Remorse::EntityVmRuntime` as real methods: + - `1420:1d72` -> `GetSlotChunkPtrAtOffset` + - `1420:1d8d` -> `ReleaseSlotChunkRef` + - `1420:1cca` -> `DebugDumpSlotMemory` + - `1420:1e17` -> `TryUnloadSlotChunk` + - `1420:1f24` -> `ApplyToMatchingOwnerRows` +- Tightened the `ReleaseSlotChunkRef` parameter names to `runtime_farptr`, `slot_index`, and `chunk_index`, and renamed the `DebugDumpSlotMemory` far-pointer argument to `runtime_farptr` so the runtime-owned chunk/refcount lane reads more like method code than detached helper code. +- Verified twelfth live batch landed on 2026-04-06. +- Tightened `Remorse::EntityVmRuntime::GetSlotChunkPtrAtOffset` to `dword __stdcall16far GetSlotChunkPtrAtOffset(dword runtime_farptr, int slot_index, int chunk_index, dword intra_chunk_offset)` after re-checking the `CreateFromSlotIndex` and `Load` callers. The current best read is: load/ensure one slot chunk through the runtime, then add a caller-supplied intra-chunk offset pair to the returned far pointer. +- Tightened `Remorse::EntityVmRuntime::ApplyToMatchingOwnerRows` to `byte __cdecl16far ApplyToMatchingOwnerRows(dword runtime_farptr, int slot_index_filter, int chunk_index_filter)` after re-checking the `AcquireSlotForEntity` and `EnsureSlotChunkLoaded` callers. The current best read is: iterate the runtime-owned loaded-chunk list either broadly (`-1/-1`) or for one current slot/chunk pair. +- Restored explicit return storage after the storage-aware retype pass so `GetSlotChunkPtrAtOffset` still returns its far pointer in `DX:AX` and `ApplyToMatchingOwnerRows` still returns its boolean result in `AL`. +- Verified thirteenth live batch landed on 2026-04-06. +- Lifted the grouped runtime methods from split-word `runtime_farptr` parameters to explicit 4-byte `EntityVmRuntime * this` storage using `/Remorse/EntityVmRuntime *32` in-session. The live signatures now read as real methods for: + - `Create` + - `InitSlots` + - `ReleaseSlots` + - `DebugDumpSlotMemory` + - `ReleaseSlotChunkRef` + - `GetSlotChunkPtrAtOffset` + - `TryUnloadSlotChunk` + - `ApplyToMatchingOwnerRows` + - `EnsureSlotChunkLoaded` +- `Remorse::EntityVmRuntime::Create` is the biggest change in that batch: it no longer needs the old split-word placeholder form and now holds `dword __cdecl16far Create(EntityVmRuntime * this, word owner_type, word owner_id)` with the original `AX:DX` return preserved. +- `EnsureSlotChunkLoaded` now also carries the clearer `EntityVmRuntime * this, short slot_index, short chunk_index` signature with the original far-pointer return preserved in `DX:AX`. +- `AcquireSlotForEntity` and `InitSlotOwnerBuffers` are now fully over that hurdle too: `AcquireSlotForEntity` returns `EntityVmSlotEntry *32` in `DX:AX`, and `InitSlotOwnerBuffers` now carries `EntityVmSlotEntry *32 slot_entry` as its third parameter. +- Verified fourteenth live batch landed on 2026-04-06. +- Finished the remaining straightforward VM pointer cleanup outside the hottest runtime helper cluster: + - `1430:0000 Remorse::EntityVmOwnerResource::Create` -> `byte __cdecl16far Create(EntityVmOwnerResource * this, dword owner_resource_spec)` + - `1430:00fd Remorse::EntityVmOwnerResource::Destroy` -> `Destroy(EntityVmOwnerResource * this, uint destroy_flags)` + - `1420:1601 Remorse::EntityVmRuntime::Destroy` -> `byte __cdecl16far Destroy(EntityVmRuntime * this, word destroy_flags)` + - `1420:10b6/10da/1162/118f/1278 Remorse::EntityVmContext::{FreeBuffer, SyncGlobalValueAndDispatch, Destroy, Save, Load}` now all carry explicit `EntityVmContext *32 this` +- That leaves `CreateFromSlotIndex` as the one clearly still-complex VM signature in this family cluster: the body still shows a far `this`, but the remaining argument pack needs a dedicated caller-side recovery pass rather than another pointer-only rewrite. +- Verified fifteenth live batch landed on 2026-04-06. +- Recovered the mixed caller pack on `1420:0eec Remorse::EntityVmContext::CreateFromSlotIndex` far enough to replace the old anonymous split arguments with caller-backed names: + - `dword owner_source_farptr` + - `dword pitemno_farptr` + - `word mode_flags` + - `word slot_index` + - `word value_add_offset` + - `word intra_chunk_offset` + - `dword ucparam_farptr` + - `uint ucparamsize` +- Restored explicit far return storage on `CreateFromSlotIndex` to `AX:DX` after the storage-aware apply briefly dropped it. +- The same live pass also made the remaining endpoint weakness more concrete again: once the caller-backed custom-storage pack is applied, the endpoint still textualizes the function as plain `dword __cdecl` instead of preserving the earlier higher-level `UsecodeProcess *` / `__stdcall16far` surface, even though the decompiler now keeps the correct argument boundaries and the return really is back in `AX:DX`. +- Current best caller-backed read for `CreateFromSlotIndex` is now narrower and more useful than the old placeholder form: + - `owner_source_farptr` is a real far-pointer input that is persisted to context `+0x11b/+0x11d` + - `ucparam_farptr` is a real far-pointer input copied into the backward-growing buffer at `+0x102` + - `slot_index`, `value_add_offset`, and `intra_chunk_offset` are distinct scalar inputs rather than one collapsed anonymous pack + - the conservative semantic story is still `factory/setup bridge that returns a far process/context pointer`, not `final inheritance-clean constructor signature` Current live datatype state: - `/Remorse/EntityVmOwnerResource` is the cleanest landed class in this lane so far. - `/Remorse/EntityVmRuntime` currently only freezes the stable tail fields and helper pointer, not the full slot-entry schema. -- `/Remorse/EntityVmSlotEntry` now exists as a bounded helper datatype, but only the stable tail buffer-pair fields are named so far. +- `/Remorse/EntityVmSlotEntry` now exists both as a bounded helper datatype and as a live `Remorse` class owner. Its current authored surface is intentionally small: one constructor/clear method plus the stable `match_key_farptr`, `owner_chunk_count`, `owner_data_base`, and owner-buffer / chunk-state pointer anchors. +- `/Remorse/EntityVmLoadedChunkRecord` now exists as the shared cleanup/iteration record for the chunk-release and conditional-unload lane, with the currently stable next-link, saved-owner-buffer, slot-index, and chunk-index fields named. - `/Remorse/EntityVmContext` now exists and matches the current owned lifecycle cluster, but it still only records the safest field anchors rather than the full embedded mini-VM layout. - `apply_class_layout` succeeded for `Remorse::EntityVmOwnerResource` but failed for `Remorse::EntityVmRuntime` when the binder tried to apply a `this` type, even though plain ownership moves worked. - The old `apply_class_layout` dry-run null failure for `Remorse::EntityVmContext` no longer reproduces on the current live server, but the actual write-side `this` typing path is still effectively old-build behavior: the real apply and direct `set_function_this_type` calls still fail on the existing `UsecodeProcess *` lifecycle signatures with `Storage size does not match data type size: 2`. - The `EntityVmContext` lifecycle signatures are now locally repaired through PyGhidra: `CreateFromSlotIndex` plus `FreeBuffer` / `SyncGlobalValueAndDispatch` / `Destroy` / `Save` / `Load` all carry `EntityVmContext * this` as their first parameter. -- `CreateFromSlotIndex` should still keep its conservative `UsecodeProcess *` return type for the moment. The allocate-and-return behavior is clear, but the known callers currently consume it through base-process fields, and the repo does not yet have an inheritance-aware `EntityVmContext : UsecodeProcess` datatype model that would make a promoted return cleaner across the call sites. +- `CreateFromSlotIndex` should still keep a conservative semantic return in the notes for the moment. The active live endpoint now textualizes it as `dword __cdecl` after the caller-packed custom-storage cleanup, but the allocate-and-return behavior is clear, the real return storage is back in `AX:DX`, and the known callers still consume the result through base-process fields rather than through an inheritance-aware `EntityVmContext : UsecodeProcess` datatype model. - The runtime lane is now split more accurately: `InitSlots` and `ReleaseSlots` can carry a direct `EntityVmRuntime * this`, while `Create` still needs the split-word custom-storage form to avoid hidden return-storage breakage. -- The first slot-entry prototype batch is tighter now that `EnsureSlotChunkLoaded` carries a real `EntityVmSlotEntry *` local on the acquired-slot path, but the wider slot-entry model is still improved rather than finished. +- The runtime lane is grouped more accurately too: the chunk-access, chunk-ref release, debug-dump, conditional-unload, and owner-row iterator helpers now sit under `Remorse::EntityVmRuntime` instead of remaining global free functions. +- The runtime lane is also typed more accurately now: the chunk accessor is no longer a five-word anonymous wrapper, and the owner-row iterator no longer pretends its runtime pointer is two independent split-word parameters. +- The authored VM lane is now much closer to a real class surface than a namespace grouping: `EntityVmRuntime`, `EntityVmOwnerResource`, `EntityVmContext`, `EntityVmSlotEntry`, and the helper `EntityVmLoadedChunkRecord` all now participate in a mostly far-pointer-correct live type model, with `CreateFromSlotIndex` as the main remaining signature outlier. +- The slot-entry model is tighter again: beyond the earlier `owner_buffer_*` and `chunk_state_*` tails, the datatype now also exposes `owner_chunk_count` and `owner_data_base`, which makes the allocator/count path in `InitSlotOwnerBuffers` and the owner-data window math in `EnsureSlotChunkLoaded` read as object state rather than anonymous offset pairs. +- The adjacent helper map is tighter too: the slot-entry consumer side now has one pointer-plus-offset accessor, one chunk-ref release helper, one conditional-unload helper, and one named loaded-chunk iterator record instead of a mix of anonymous `1420:` placeholders and anonymous stack-pair scratch state. Current scope of that batch stayed intentionally conservative: @@ -258,9 +345,14 @@ Best immediate next moves after this landed: - inspect `EnsureSlotChunkLoaded` and adjacent `1420:` helpers again now that `AcquireSlotForEntity` returns `EntityVmSlotEntry *`, and push the slot-entry type one step deeper only where the resulting local/object read is genuinely clearer - decide whether `CreateFromSlotIndex` can safely promote its return type from `UsecodeProcess *` to `EntityVmContext *`, or whether it should stay a factory-style bridge that only types `this` - if the context/base-process inheritance story becomes explicit in datatypes, revisit `CreateFromSlotIndex` return typing then; until that point, keep the current `UsecodeProcess *` return even though the body itself clearly builds an `EntityVmContext` -- recover a storage-aware `this`-typing path for `Create` specifically; `InitSlots` and `ReleaseSlots` no longer need to stay in the unresolved set +- decide whether `match_key_farptr` at `+0x00` should stay as a neutral far-pointer field or can now be promoted to a stronger entity/owner key name from caller-side evidence +- recover a storage-aware `this`-typing path for `Create` specifically; the live route now works well enough to test explicit 4-byte storage, but the remaining blocker is the 2-byte `EntityVmRuntime *` datatype itself rather than endpoint reachability +- inspect the broader `Interpreter_NextUsecodeOp` lane around `1418:3330` now that the release call and `interpreter_pop_saved_farptr` are anchored, and decide whether the loaded-chunk record can absorb any more of the surrounding save/restore stack traffic without overfitting transient interpreter locals - redeploy or otherwise verify the live storage-fallback `set_function_this_type` / `apply_class_layout` build, then retry the `EntityVmContext` lifecycle typing pass in-session before dropping back to local PyGhidra - identify one or two additional strongly owned runtime or owner-resource helpers if the live session exposes them cleanly +- decide whether `ApplyToMatchingOwnerRows` should keep its current generic split-word parameters under `Remorse::EntityVmRuntime` or whether the first argument pair is now well enough understood to collapse into a typed runtime `this` +- decide whether the newly clarified `runtime_farptr` argument on `GetSlotChunkPtrAtOffset` and `ApplyToMatchingOwnerRows` is enough to justify a safe typed-`this` experiment on those methods, or whether the current `EntityVmRuntime *` pointer-size issue still makes the explicit `dword runtime_farptr` form the least misleading representation +- use the now-recovered `CreateFromSlotIndex` caller pack as the baseline for any next cleanup, and only chase a prettier return type once the base-process inheritance story is explicit enough to make that promotion a real readability win - keep the masked-create hub and offset-specialized wrapper ladder outside the class until caller-side role recovery is tighter ## Source-Emission Guidance diff --git a/docs/psx/map-viewer-plan.md b/docs/psx/map-viewer-plan.md new file mode 100644 index 0000000..f12ae93 --- /dev/null +++ b/docs/psx/map-viewer-plan.md @@ -0,0 +1,164 @@ +# PSX Map Viewer And JL-9 Investigation Plan + +## Scope + +- Active target: retail PlayStation `SLUS_002.68` already loaded in Ghidra. +- Keep all PSX documentation in `docs/psx/`. +- Primary objective: get PSX maps loading into the existing map viewer coherently. +- Secondary objective: make PSX graphics export with the correct palette automatically instead of by partial heuristics. +- Tertiary objective: determine whether `JL-9` is a real weapon in the PSX build, how it is unlocked or granted, and which sprite/bundle represents it. + +## Current State + +- `docs/psx/psx.md` already closes the boot executable, the broad `LSET*.WDL` layout, and the likely split between map-like regions and graphics-like regions. +- The earlier `region00-first` viewer export is now known to be based on a bad assumption: the `~45..59` records it exposes per map are only the small top-level WDL descriptor stream, not the full level content. +- The stronger current model is a multi-section bundle layout: a top-level `0x18`-byte dispatch-record table, typed subordinate resource tables rooted at `DAT_800758cc/d0/d4/d8`, and at least one separate compressed level-state blob that is inflated into `DAT_8006769c` by `FUN_8003b00c(..., 0x3e00, 0x3e00)`. +- The strongest current graphics source remains `post_audio_region_04`. +- A first PSX debug scene has already been exported experimentally, but the active workflow is now the renderer-local `.cache` pipeline rather than `site` output. +- The active live probe now builds provisional real-art atlases in `map_renderer/src/build-psx-cache.js` from `map_renderer/STATIC_PSX` into `.cache/psx`, `.cache/reference-data/psx-remorse`, and `.cache/scene-cache/psx-remorse/...`. +- The current verified processed build exposes `62` PSX maps in the live renderer catalog under the runtime-record scene format (`4032` atlas-backed shapes, `1925` packed shared atlases after the latest atlas pass). +- The exporter root cause is now clearer: the old five-region post-audio carve was still masking the real visible payload. Loader-sized `post_audio_section_00` contains both the small `0x18` root descriptor rows and the dense 24-byte bulk placement rows, so the cache builder now recovers both visible families from that first real section instead of from the guessed `region00/region01` split. +- A verified full rebuild now carries `region00 + region01` across all `62` maps. `LSET1/L0.WDL` now emits `1189` items, `LSET1/L1.WDL` emits `754`, and every rebuilt map now reports `uniqueZCount > 1` instead of the earlier mostly-flat `z = 0` export. +- The next subordinate layers are now structurally split too: `DAT_800758d8` is the per-type art/template bank, `DAT_800758d0` feeds the simple constructor's local component payload, and `DAT_800758cc/d4` feed the compound constructor's state/variant tables. The executable model is solid, but the generic raw-file export for `DAT_800758cc/d0/d4` is not currently landing in the live scene cache, so that serialization path stays open work. +- The late LSET template bank is now less speculative too. The currently working map-local `DAT_800758d8` candidate is not the old "small typed section" guess; on retail `LSET1/L9.WDL` it decodes cleanly only when the parser treats the late large section as a bank with an embedded `+0x38` start, which is now enough to recover real bundle-backed mappings for a first subset of map types. +- The main visible bulk layer is no longer flat. The accepted `region01` placements now use the constructor-backed `+0x06` byte as provisional `z`, and `LSET1/L0.WDL` currently exports `11` distinct structured elevation levels instead of one forced `z = 0` plane. +- One renderer-side mismatch is now closed: PSX sprites use authored `item.screen` rectangles, and the bounding/highlight overlay path now uses those same authored rectangles instead of recomputing a DOS-style wireframe from provisional `world` coordinates. +- The executable now closes the last projection stage: authored object coordinates land in object fields `+0x3c/+0x40/+0x44` as `16.16` fixed-point values, and `FUN_80040d44` / `FUN_80040f78` project them with `screen_x = y - x` and `screen_y = 2*z - (x + y)/2` before writing the final screen rectangle at `+0x20..+0x2e`. +- Palette handling is partially grounded by runtime VRAM evidence, but the per-placement override rule is still missing. +- The scene/cache naming now uses executable-backed family names (`section0_dispatch_roots`, `section0_constructor_placements`) with the old `region00/region01` labels kept only as legacy aliases. +- The offline `FUN_8003b00c` path now exists in the renderer-local exporter and serializes one candidate on-disk compressed source plus the decoded `0x3e00` state buffer into the cache for each map. +- The type-to-art pass is still open. The exporter now scans parsed per-type template-bank payloads for bundle references, and it no longer promotes the disproven scan-order bundle fallback into visible map art. Unverified types stay on placeholders until the executable state/type path yields a real art binding. +- That loader-shaped bank selection is now already paying off in the live cache: map `9` moved from `0` resolved bundle-mapped items to `111` after the template pass switched to the embedded late-section parse, even though unresolved root-dispatch families such as `0x0042` and `0x0049` still need the downstream state/variant path before they can stop using placeholders. +- The old fallback art binding is now positively disproven for map rendering, not just "still unverified": in the live cache, early `section0_dispatch_roots` types `0x0042` and `0x0049` repeatedly bind to portrait/talk-animation bundles (for example map `0` offsets `0x000B2970` and `0x000D84F4`), which confirms the section-0 dispatch rows are generic runtime-object descriptors whose visible art still depends on downstream per-type state/variant selection. +- The executable-side type path is now clearer and named in the live PSX Ghidra database. `psx_object_create_simple_record` and `psx_object_create_compound_record` both index the same per-type banks rooted at `DAT_800758d8/d0/cc/d4`; `psx_object_select_state_script` selects an active state script from `DAT_800758cc`, `psx_object_advance_state_script` at `0x80025d68` interprets sentinel-driven script records, `psx_object_lookup_variant_entry` resolves a companion entry from `DAT_800758d4`, and `psx_reset_type_runtime_banks_from` at `0x80025ce8` is the nearby bank-reset helper that had been misnamed earlier. So the missing map-render rule is not one flat `type -> bundle` table but a multi-stage runtime selection path. +- The visible render pass is less opaque now too. `FUN_80041378` draws in three stages: the sorted visible-object list through `FUN_80041458`, a second special-visible list through `FUN_80041144`, and then HUD/overlay/icon primitives through `FUN_800416cc`. That means the remaining map-viewer gap is still mainly in world-object and special-object families, not in the HUD pass. +- The stage-2 path is now strong enough to affect renderer planning directly. `FUN_80040f78` is the queue-builder for the `FUN_80041144` pass: it projects an object just like the main `FUN_80040d44` path but appends it to `DAT_80078b70` / `DAT_80067472` instead of the main `DAT_8006ad5c` visible list. So a renderer that only models the stage-1 visible list will still miss a real world-facing object lane. +- Palette override provenance is tighter too: object field `+0xa0` is the original authored source-record pointer written by both constructors, so the current override path in `FUN_80041458` is reading authored record bytes directly rather than a hidden runtime side table. +- One narrow renderer-side consequence is now verified in output, not just in notes: the cache builder now applies the executable-backed `0x0050` selector map (`0..3 -> frame 0..3`) as a temporary fallback, and retail map `9` now exports `type=80 state_selector=1 chosen_frame=1` instead of forcing frame `0`. +- `JL-9` already appears in recovered PSX weapon-name tables, but gameplay availability and sprite identity are not yet closed. + +## Success Criteria + +### Map-viewer success + +- At least one PSX map loads in the existing viewer with stable world placement, defensible draw order, and recognizable room/layout structure. +- The PSX path reuses the existing viewer pipeline instead of creating a separate one-off viewer. +- Exported scene data preserves enough raw metadata to keep later decomp passes reversible. + +### Palette success + +- Bundle export chooses the same palette family the runtime uses for that placement class. +- At least one tile-heavy scene and one object-heavy scene render with mostly correct colors without manual palette swapping. +- Palette selection logic is encoded in exporter metadata or viewer-side decode rules, not only in prose notes. + +### JL-9 success + +- `JL-9` is classified as one of: fully usable weapon, cut/incomplete leftover, menu-only string, or debug-only grant. +- The unlock or acquisition path is identified from executable logic, data tables, or authored content. +- The weapon's sprite or best candidate art bundle is identified and documented. + +## Workstreams + +## 1. Close the PSX map record format + +Purpose: replace the invalid `small top-level record stream == whole level` assumption with a renderer-fed scene that includes the real bulk map substrate. + +Tasks: + +1. Revisit the executable loader chain around the `LSET*.WDL` stream consumer and name the section families loaded into `DAT_800678f4`, `DAT_80067720`, `DAT_800758cc/d0/d4/d8`, `DAT_800675f8`, and `DAT_8006769c`. +2. Prove which loaded section is the small top-level object/dispatch list and which section holds the actual bulk map substrate. +3. Recover the format and semantics of the compressed blob that `FUN_8003b00c` inflates into the `0x3e00` level buffer. +4. Tie one concrete subordinate record family to the constructor inputs that feed object `+0x3c/+0x40/+0x44` as `16.16` fixed-point coordinates. +5. Recover the bundle/frame binding rule for map placements well enough to stop relying on broad candidate pairing. +6. Recover the draw-order or layer rule used when multiple map records overlap. +7. Validate the corrected multi-section schema on at least `L0.WDL` and `L1.WDL` so the decode is not overfit to one level. + +Expected output: + +- a stable PSX placement schema recorded in `docs/psx/` +- one exporter that emits scene JSON in the same broad shape as the existing viewer pipeline +- one known-good reference map whose structure is visually recognizable + +## 2. Close palette selection instead of guessing it + +Purpose: make exported graphics match the runtime palette path automatically. + +Tasks: + +1. Continue from the already identified texture draw helpers and the caller path that reads palette override metadata from the object field currently described as `+0xa0` in the notes. +2. Determine whether the placement record itself, a second-stage runtime header, or a side table supplies the override palette index. +3. Reconcile the live VRAM `row 0xF0 / x=0` success case against the on-disk palette blob so the export path can reproduce the runtime source instead of only matching dumps. +4. Identify whether different bundle modes or resource classes use different CLUT selection rules. +5. Add exporter-side palette metadata that preserves both bundle default palette and resolved placement palette. +6. Validate against at least three anchor assets: one wall/floor-heavy tile set, one object sprite with obvious color identity, and one UI or portrait-like asset. + +Expected output: + +- a documented palette-selection rule in `docs/psx/` +- exported PSX atlases or frame PNGs that no longer require manual palette picking for the common solved families +- a short unresolved list only for genuinely exceptional palette cases + +## 3. Integrate the PSX decode into the existing map viewer + +Purpose: stop treating PSX as a disconnected experiment and make it a first-class renderer source. + +Tasks: + +1. Define one PSX scene format version that keeps raw decode fields visible while still fitting the current viewer's atlas-plus-scene model. +2. Export one minimal but real PSX map scene from the solved map schema and load it through the existing viewer path. +3. Compare the rendered result against in-game screenshots, captured VRAM/framebuffer evidence, or clearly identifiable room geometry. +4. Tighten the exporter until one map reads coherently before trying to bulk-export the entire disc. +5. Only after a coherent single-map success, generalize to more `LSET` maps and add any PSX-specific catalog or loader toggles the viewer needs. + +Expected output: + +- one coherent PSX map visible in the existing viewer +- one stable exporter path that can be iterated on without forking the viewer architecture + +## 4. Investigate JL-9 as data, logic, and art + +Purpose: close the question of whether `JL-9` is real and what it corresponds to visually. + +Tasks: + +1. Locate the PSX weapon-name table and the code/data structure that indexes into it. +2. Identify the item or weapon definition row for `JL-9`, including ammo type, flags, and any inventory/equipability markers. +3. Trace all code and data references to that row: mission rewards, cheats, debug grants, pickups, shop/loadout flow, or scripted usecode equivalents if present. +4. Check whether `JL-9` appears in the pre-alpha build under the same index and whether its surrounding data differs from retail. +5. Identify the sprite by following the weapon/item definition to the bundle/frame or icon resource it uses. +6. Classify the result clearly: shipped and obtainable, shipped but gated/unused, or string/data leftover only. + +Expected output: + +- a short `docs/psx/` note or section that states whether `JL-9` is real +- the acquisition or unlock path if one exists +- the best supported sprite or bundle match + +## Recommended Execution Order + +1. Finish map-record closure enough to bind placements to the right art. +2. Replace the current `.cache` runtime-record probe premise with the corrected multi-section WDL model, then recover the runtime type/resource lookup that can replace the still-provisional `u0 -> bundle index` rule with real art binding. +3. Get one map loading coherently in the existing viewer. +4. After the viewer path is grounded, use the now-stronger bundle identification flow to close `JL-9` sprite identity and availability. + +## Immediate Next Batch + +1. In Ghidra, tighten the section-family naming around `DAT_800678f4`, `DAT_80067720`, and the candidate `DAT_8006b5d8` source so the current `section0_*` labels can be promoted from exporter-safe names to exact loader names. +2. Record which helpers read `DAT_80067720` versus which helpers read the decompressed `DAT_8006769c` buffer now that the offline decode path is present in the cache. +3. Compare the rebuilt all-map exports against recognizable rooms and decide whether the remaining missing structure now lives mainly in the decoded `DAT_8006769c` buffer or in still-unrendered subordinate tables. +4. Tighten the raw file mappings for the newly exported runtime-bank layers (`DAT_800758d8`, `DAT_800758d0`, `DAT_800758cc`, `DAT_800758d4`) so their current section selection is proven rather than heuristic. +5. Recover an actual bundle/frame reference from the per-type template payloads or their consumers so the exporter can replace the now-disproven scan-order bundle fallback with a verified type-to-art rule. +Current delta: the template bank selection is now stronger and already recovers real art for a first subset, but the still-missing families need the stage-1/stage-2 object draw path plus `DAT_800758cc/d4` state interpretation, not more HUD/overlay decoding. +Current delta: stage 2 is no longer hypothetical. The next renderer-improvement candidate is to expose/export the queued-object lane that feeds `FUN_80041144`, because the executable now clearly maintains it separately from the main visible list. +6. Split section-0 placements into at least three executable-backed render classes: world-facing geometry/object placements, animated runtime-only objects, and clearly non-map-facing UI/talk assets such as the portrait bundles currently surfacing through fallback art matching. +7. Decode the `psx_object_advance_state_script` sentinel opcodes (`ffff`, `fffe`, `fffd`, `fffc`, `fffb`) well enough to tell when a placement loops, jumps into a subsidiary script, or fires a side-effect helper, because that state-machine branch is now the main discriminator between map-facing art and non-map runtime assets. +Current delta: `fffe` is now closed as an audio/effect dispatch through `FUN_8004061c`, so the next sentinel work should focus on the remaining control-flow opcodes. +8. In parallel with the map pass, trace the palette-override read path from the known draw helper caller and document which source field feeds the resolved CLUT. +9. Locate the `JL-9` weapon entry in the PSX executable tables and log its table index, surrounding weapon names, and all code/data xrefs. +10. Create a short follow-up note in `docs/psx/` after the batch rather than burying the result only in Ghidra comments. + +## Documentation Rule For This Track + +- Keep long-form findings in `docs/psx/psx.md` or another dedicated file under `docs/psx/`. +- Keep this file as the active plan and update it when a major blocker closes or the execution order changes. +- When `JL-9` closes cleanly, give it its own short note under `docs/psx/` instead of leaving it as one bullet in a larger map note. \ No newline at end of file diff --git a/docs/psx/psx.md b/docs/psx/psx.md index ec7ac49..bbeaef5 100644 --- a/docs/psx/psx.md +++ b/docs/psx/psx.md @@ -380,6 +380,9 @@ Current color blocker: - both main texture draw helpers (`FUN_80044bdc` and `FUN_80044e9c`) fall back to the bundle default palette index only when no override is present - the important caller path at `FUN_80041458` ORs in a high-byte palette override from object/tile metadata pointed to by object field `+0xa0` +- that `+0xa0` pointer is now tighter too: both object constructors store the original authored source-record pointer there, so the override is not coming from a hidden runtime side table. For current solved families the draw helper reads the override straight from the authored record bytes: + - type `0x003e..0x00ab`: high byte of source word at record `+0x06` + - type `>= 0x00ac`: high byte of source word at record `+0x0c` - that means standalone bundle previews can still be wrong even when the bundle parser and raw CLUT table are both correct - the extractor now emits wider `u16x12` raw CSV views for `post_audio_region_01` and `post_audio_region_02` because the relevant override state appears to live beyond the first 6 words of those candidate placement records - the current top-ranked portrait bundle (`bundle_00064478`, default palette index `106`) is a useful color-validation anchor because the grayscale frame is obviously correct while all raw-palette candidates remain visibly wrong @@ -497,27 +500,30 @@ Current evidence-backed next step: Current renderer-compatibility result: -- a first PSX-compatible static real-art probe scene is now exported for the public map renderer -- exporter script: - - `tools/psx_export_map_debug_scene.py` -- current generated public-report outputs: - - `k:\ghidra\Crusader_Decomp_Public\map_renderer\site\data\maps\psx-remorse\map-0\scene.json` - - multiple copied frame atlases such as `k:\ghidra\Crusader_Decomp_Public\map_renderer\site\data\maps\psx-remorse\map-0\bundle_0003917C_frame_000.png` - - `k:\ghidra\Crusader_Decomp_Public\map_renderer\site\data\catalog.json` - - `k:\ghidra\Crusader_Decomp_Public\map_renderer\site\data\catalogs\psx-remorse.csv` -- current scene characteristics: - - source: filtered `LSET1/L0.WDL` `post_audio_region_01` paired-record candidates - - rendered items: `1050` - - unique bundle-backed shape definitions: `49` - - copied atlas/frame PNGs: `62` - - bounds: `3896 x 8431` - - scene format version: `psx-region01-bundle-probe-v1` - - current probe stats: `u0` span `62..111`, fallback frame count `187` +- the old Python/site real-art probe remains useful as discarded negative evidence, but it is no longer the active viewer workflow +- the active integration path now lives inside `k:\ghidra\crusader_map_viewer\map_renderer` and builds live data into `.cache` from `STATIC_PSX` +- active renderer-local scripts: + - `src/build-psx-cache.js` + - `src/lib/psx-cache.js` +- build entrypoint: + - `npm run build-psx-cache` +- current generated live-cache outputs: + - `k:\ghidra\crusader_map_viewer\map_renderer\.cache\psx\catalog.json` + - `k:\ghidra\crusader_map_viewer\map_renderer\.cache\reference-data\psx-remorse\reference-data.json` + - per-map scene files under `k:\ghidra\crusader_map_viewer\map_renderer\.cache\scene-cache\psx-remorse\map-*\\scene.json` + - `k:\ghidra\crusader_map_viewer\map_renderer\Catalogs\psx_shape_catalog_remorse.csv` +- current processed-cache characteristics from the verified build: + - source: `k:\ghidra\crusader_map_viewer\map_renderer\STATIC_PSX` + - scene format version: `psx-region01-provisional-art-probe-v2` + - processed maps: `23` + - shared shape definitions: `313` + - shared atlases: `313` + - largest currently useful placement-heavy maps: `LSET1/L0` (`1050` items), `LSET4/L33` (`942` items), `LSET5/L48` (`851` items), `LSET6/L51` (`463` items), `LSET7/L63` (`315` items) Current art-binding hypothesis used by this probe: - region-01 `u0` is treated as a provisional direct bundle index into the extracted `sprite_bundles/` set -- region-01 `u4` is treated as a provisional frame index within that bundle, clamped to the highest available frame when out of range +- region-01 `u4` was originally treated as a provisional frame index within that bundle, but that interpretation is now considered wrong; the constructor chain instead points to `u4` as a state/script selector candidate - this is evidence-backed enough to render real PSX art in the existing map renderer, but not strong enough yet to call the binding solved - the strongest negative check so far is that the region-01 `u5` values (`0x20`, `0x22`, `0x30`) do not match the bundle default palette indexes, so the palette-selection/control path is still missing @@ -540,13 +546,19 @@ New loader/data evidence from this pass: - little-endian words: `0x004A, 0x1603, 0x0EE7, 0x0000, 0x0001, 0x0020` - that record family is a better next target than the invalidated direct bundle probe because it already exposes a small type-like word (`0x004A`) plus coordinate-like words without forcing an arbitrary raw-bundle index -What this first public renderer pass means: +What this renderer pass means now: -- the existing renderer app can now load a PSX scene bundle from the static report without any PC `FIXED.DAT` dependency -- this is currently a real-art probe of filtered placement candidates, not a final decoded PSX map -- the renderer now displays extracted bundle art from `post_audio_region_04` instead of synthetic colored stand-ins -- the current output is still useful because it shows that filtered region-01 records can drive recognizable, repeatedly used PSX art through the existing renderer pipeline -- one bad extracted origin (`1x6` sprite with `xoff=65535`) initially blew out the fit bounds; the exporter now sanitizes implausible origins before writing scene metadata +- the live renderer can expose PSX as an optional game only after the processed cache exists; it is no longer tied to ad hoc `site` exports +- the current active output is now a provisional real-art probe rather than a placeholder-only type/lane scene +- the processed-cache path is now compatible with the existing shared reference-data pipeline and PC-style catalog grouping, which keeps PSX integration inside the normal viewer architecture instead of forking it +- the old real-art probe is still valuable as negative evidence because it proved that direct raw bundle ordering produces obviously wrong scene content + +New renderer-grounded improvement from this pass: + +- `src/lib/psx-cache.js` now scans `post_audio_region_04` directly from `STATIC_PSX`, parses bundle headers in JavaScript, colorizes the extracted frames with the currently available default/heuristic palette path, and writes per-map bundle atlases into `.cache/reference-data/psx-remorse` +- the live cache no longer uses only synthetic placeholder shapes for map `0`; the current `LSET1/L0.WDL` scene references `49` real atlases and `62` real sprite frames under the still-provisional direct `u0 -> bundle index` hypothesis +- extracted bundle origins are now sanitized on import so bad `0xFFFF` offsets do not blow out the scene bounds; `LSET1/L0.WDL` is back to a sane `3896 x 8431` footprint instead of the broken `67k`-pixel-wide intermediate result +- PSX shape definitions now use a `1x1x1` footprint and the scene items synthesize viewer-compatible `world.x/world.y/world.z` from the final screen anchors; this keeps bounding-box and preview overlays aligned with the PSX art probe instead of projecting nonsense from the raw `u1/u2/u3` words Current app compatibility notes: @@ -562,6 +574,242 @@ Immediate implications for the next decode pass: - the palette override path is still the main blocker to correct final color selection even when the bundle/frame choice is plausible - once the bundle key and palette control path are recovered, the same scene-export path can graduate from `real-art probe` to actual PSX map rendering +## PSX Provisional Real-Art Probe + +The live renderer now prefers a smaller loader-backed record family when it can normalize that family into structured placement rows, while still preserving the older dense region-01 probe as a fallback/debugging strategy. + +What changed in this pass: + +- the temporary Python probe established the scene structure, but the active implementation is now renderer-local JavaScript rather than a standalone exporter +- `src/lib/psx-cache.js` now reads `STATIC_PSX`, parses `LSET*.WDL`, prefers normalized `post_audio_region_00` count-prefixed records when they pass the existing structured-candidate filter, falls back to `post_audio_region_01` otherwise, scans `post_audio_region_04` for sprite bundles, and emits per-map atlases built from the extracted PSX frame data +- `src/build-psx-cache.js` writes the resulting processed data into the live cache tree: + - `k:\ghidra\crusader_map_viewer\map_renderer\.cache\psx\catalog.json` + - `k:\ghidra\crusader_map_viewer\map_renderer\.cache\reference-data\psx-remorse\reference-data.json` + - per-map scenes under `k:\ghidra\crusader_map_viewer\map_renderer\.cache\scene-cache\psx-remorse\...` + - `k:\ghidra\crusader_map_viewer\map_renderer\Catalogs\psx_shape_catalog_remorse.csv` +- the viewer now detects `psx-remorse` from the processed manifest instead of from a fake PC-style source-file heuristic +- scene items now keep the candidate PSX `x/y` words directly in `world`, use the executable-backed projection basis `screen_x = y - x`, `screen_y = 2*z - (x + y)/2` with provisional `z = 0`, and keep `1x1x1` shape footprints so overlay boxes remain usable without pretending the old PC-style world export is solved + +Current verified processed-cache result: + +- scene format version: `psx-runtime-record-probe-v1` +- processed maps: `61` +- atlas-backed shapes: `1112` +- atlases: `1112` +- `LSET1/L0.WDL` preferred source family: `post_audio_region_00` +- `LSET1/L0.WDL` rendered items from the preferred family: `59` +- `LSET1/L0.WDL` still has `1050` dense fallback `post_audio_region_01` records preserved in scene metadata for comparison +- `LSET1/L0.WDL` resolved real-art atlases for the preferred family: `18` +- `LSET1/L0.WDL` resolved sprite frames for the preferred family: `26` +- `LSET1/L0.WDL` unique `u0` types in the preferred family: `18` +- lane split: + - `0x0020`: `26` + - `0x0022`: `21` + - `0x0030`: `12` +- `LSET1/L0.WDL` current scene bounds after the runtime-record pass: `1313 x 438` +- `LSET1/L0.WDL` currently resolves all `59` preferred-family records to real extracted bundles with `0` placeholder fallbacks, but still clamps `15` frame requests down to the highest available extracted frame index +- one visible viewer mismatch is now separated from the remaining map-format problem: PSX sprites already draw from authored `item.screen`, but the old highlight/bounding overlay path was still recomputing DOS-style wireframes from provisional `item.world`; `scene-presentation.js` now falls back to authored screen rectangles for PSX items instead of drawing those incorrect projected boxes + +Why this matters: + +- this is the first live viewer path that prefers a loader-compatible, count-prefixed record family instead of treating the huge dense region-01 stream as the only scene source +- it keeps the strongest current working assumption narrower and more explicit: + - normalized `post_audio_region_00` rows are now the preferred placement family when they satisfy the same structural checks as the older region-01 records + - `post_audio_region_01` remains a dense fallback evidence source instead of being silently discarded + - the art lookup is still unresolved and must be recovered from the real runtime resource tables rather than inferred from raw bundle ordering +- it also moves the viewer one step closer to the executable model by applying the recovered PSX projection basis directly in the cache builder instead of plotting raw `u1/u2` values on a pseudo-screen plane + +Immediate next consequence: + +- the next map-format batch should treat the processed `.cache` runtime-record probe as the baseline renderer target and focus on proving exactly how the normalized `post_audio_region_00` words line up with the constructor-fed `x/y/z` fields +- the old dense region-01 path should stay available as evidence, but it should no longer be the default scene family unless the loader-backed family fails to normalize on a given map +- that means the remaining visual corruption should now be treated primarily as a placement/schema problem again, not as a box-overlay problem; the next pass needs to recover the authoritative height lane and the exact constructor-fed field mapping instead of spending more time on DOS-style overlay math + +## PSX Map-System Correction + +The current live viewer export was built on the wrong premise. The `~45..59` records currently exported per PSX map are not enough to represent a whole Crusader level, and executable tracing now shows why. + +What the loader actually does: + +- `wdl_resource_bundle_load_by_index` reads the selected `LSET*.WDL` into multiple section pointers, not one flat placement stream. +- The first runtime section is a top-level table at `DAT_800678f4` whose record stride is `0x18` bytes. +- The loader iterates that first section with: + - `for each 0x18-byte top-level record` + - `type = record[+0x08]` + - `dispatch through PTR_PTR_80063118[type]` +- Those dispatch handlers do not behave like a terrain-tile walker. They construct one runtime object or a tiny object cluster at a time through `FUN_800249f4`, `FUN_80024eec`, `FUN_8003c314`, `FUN_8003c714`, and `FUN_8003cc08`. + +Why the current export is incoherent: + +- the current `region00`-first exporter is effectively treating that small top-level descriptor family as if it were the whole level +- those records are only the root nodes of the level bundle's object/resource system +- they are too few because the bulk level content lives elsewhere in the loaded bundle state + +New executable-backed evidence for the missing bulk content: + +- `level_resource_stream_load` and `FUN_8003917c` populate the typed runtime resource tables rooted at `DAT_800758cc/d0/d4/d8` +- `DAT_80067720` is a small top-level `0x18` record list used by object/event-style helpers such as `FUN_80031044` and `FUN_8002b1a8`; it is not a whole-map terrain stream +- during bundle load, `FUN_8003b00c(DAT_8006769c, &DAT_8006b5d8, 0x3e00, 0x3e00)` inflates a separate compressed blob into a dedicated level buffer +- that decompressed buffer is carried through save/load helpers (`FUN_8003a0f4`, `FUN_80049890`) independently of the tiny top-level descriptor list, which is exactly what a real map substrate would do +- the two `DAT_80067720` helpers are now clearer about role too: + - `FUN_80031044` scans the `0x18`-stride rows for `0xAAAA`-tagged entries and low-6-bit selector matches, then caches a pointer to the matched row payload + - `FUN_8002b1a8` mutates matching rows by type/id and flag bits in place + - both behaviors fit a small event/marker/control list and do not look like whole-map geometry submission +- the decompressed lane is more clearly persistent substrate/state than before: + - `FUN_8003a0f4` hands `DAT_8006769c` plus `DAT_80067528` to the save helper path + - `FUN_80049890` repacks the `DAT_8006b5d8` / `0x3e00` state lane into the `0x4000` memory-card save block + - this strengthens the read that `DAT_8006769c` is the saved/restored map-state substrate while `DAT_80067720` stays the tiny top-level control list + +Current safest read: + +- the `~59` exported records are top-level WDL nodes, not the entire PSX map +- the real PSX level is split across: + - a small top-level descriptor stream + - typed subordinate resource tables + - at least one separate decompressed level-state blob +- the viewer looks nonsensical because it is rendering only one small layer of that system and mistaking it for the full map + +Immediate consequence for the exporter: + +- stop treating `post_audio_region_00` as the default whole-map scene source +- keep `post_audio_region_00` and `post_audio_region_01` as evidence sources, but pivot the next decode pass toward the multi-section WDL model recovered from the executable +- the next map-export target must include the decompressed bundle state and/or the subordinate placement/tile resources behind the top-level `0x18` records, not just the root records themselves + +Exporter status after the next renderer pass: + +- the earlier five-region post-audio carve was still wrong for visible-map recovery. The corrected loader-sized section probe shows that the first post-audio section already contains both the count-prefixed top-level descriptor rows and the dense 24-byte bulk placement rows that the flat maps were missing. +- `map_renderer/src/lib/psx-cache.js` now recovers visible families from loader-sized `post_audio_section_00` instead of treating the old guessed `post_audio_region_01` carve as the default bulk source. +- the exported scene metadata now records those visible families under executable-backed names instead of the old provisional labels: + - `section0_dispatch_roots` for the top-level dispatch/root records + - `section0_constructor_placements` for the dense constructor-fed placement records +- a verified full rebuild now exports all `62` PSX maps with large scene volumes and non-flat `z` stats. `LSET1/L0.WDL` now emits `1189` items, `LSET1/L1.WDL` jumps from `53` items to `754`, and the rebuilt catalog reports `62/62` maps with `section0_dispatch_roots + section0_constructor_placements` coverage and `uniqueZCount > 1`. +- the renderer-side reference payload no longer emits one atlas per resolved PSX shape. The new packed-atlas pass reduces the shared PSX reference cache from the old `4032` one-shape atlases to `1925` shared packed atlases across the same `4032` shape definitions, and a spot-check on `LSET1/L0.WDL` now exports the map scene itself with `atlasCount = 1` instead of a long per-bundle atlas list. +- the cache export still carries the parsed `DAT_800758d8` candidate section and an offline `FUN_8003b00c` decode candidate for the compressed source feeding `DAT_8006b5d8 -> DAT_8006769c`, but the generic raw-file `DAT_800758cc/d0/d4` serialization is not currently landing in the live scene cache and should be treated as an open exporter gap rather than a closed layer. +- this still does not mean the PSX map decode is fully solved: the viewer now has enough volume to represent whole-level candidates across the disc, but the remaining blocker is semantic decoding of the subordinate runtime banks and the separate decompressed `0x3e00` buffer, not record-count starvation. +- the type-to-art path is only partially improved. The cache builder now scans the parsed per-type art-template payloads for bundle references, and the renderer no longer treats the disproven scan-order `u0 -> bundle` mapping as trustworthy visible art. Unverified types now stay on placeholder art instead of surfacing known-bad portrait/talk bundles as map geometry. +- the scan-order fallback is now known to be wrong at the root, not merely incomplete. In the live `.cache` output, `section0_dispatch_roots` types `0x0042` and `0x0049` repeatedly bind to portrait/talk-animation bundles such as map `0` type `0042` -> offset `0x000B2970` and map `0` type `0049` -> offset `0x000D84F4`, with the same failure pattern continuing through early maps. Those portrait bundles are useful negative evidence: they show the top-level dispatch rows are generic object/state descriptors, not a direct map-graphics stream that can be paired to bundle order. + +Next decoded runtime layers from the constructor pass: + +- `DAT_800758d8` is the per-type art/template bank, not the missing whole-map substrate. `wdl_resource_bundle_load_by_index` populates it from an `8`-byte descriptor table, and both `FUN_800249f4` and `FUN_80024eec` consume it before calling `FUN_80044434` through the loader-side helper path. +- `DAT_800758d0` is a per-type companion/component bank for the simpler constructor family. `FUN_800249f4` copies the resolved pointer from that bank into the local object payload at `obj->8->[0,4]`, so this looks like a per-type component/template block rather than a top-level placement stream. +- `DAT_800758cc` is a per-type offset-table bank for the compound constructor family. `FUN_80024eec` stores it at `obj+0x88`, and `FUN_800260e8` later indexes it with the placement byte at `record+0x08` to resolve a state/offset subrecord into `obj+0x8c/0x90`. +- `DAT_800758d4` is another per-type companion bank for the compound constructor family. `FUN_80024eec` stores it at `obj+0x84`, and `FUN_8002841c` queries it later using the object's `+0x94` selector, so it behaves like a variant table or companion lookup rather than raw map geometry. +- The key functions in that chain are now renamed in the live PSX Ghidra database: + - `FUN_800249f4` -> `psx_object_create_simple_record` + - `FUN_80024eec` -> `psx_object_create_compound_record` + - `FUN_80025ce8` -> `psx_reset_type_runtime_banks_from` + - `FUN_80025d68` -> `psx_object_advance_state_script` + - `FUN_800260e8` -> `psx_object_select_state_script` + - `FUN_8002841c` -> `psx_object_lookup_variant_entry` + - `FUN_8003917c` -> `psx_load_type_state_banks` + - `FUN_80044434` -> `psx_create_image_resource_from_descriptor` + - `FUN_80045ffc` -> `psx_cache_type_art_descriptor` +- the constructor/runtime chain is now clearer too: + - `psx_reset_type_runtime_banks_from` is a bank reset helper used during init/recycle paths; it clears `DAT_800758c4/c8/cc/d0/d4/d8` from the requested type index upward and is not the state interpreter itself. + - `psx_object_create_simple_record` and `psx_object_create_compound_record` are two placement constructors for different section-0 row layouts, but both index the same per-type runtime banks by type id before any final render-facing selection is made. + - `psx_create_image_resource_from_descriptor` turns the `DAT_800758d8` per-type descriptor into a renderable resource/header object; this is why `DAT_800758d8` should be read as an art/template descriptor bank, not as a whole-map tile layer. + - `psx_object_select_state_script` selects a state or animation subrecord from `DAT_800758cc` using a placement byte (`record+0x08` in the compound family), storing the resolved script/state pointer at `obj+0x8c/0x90` and the selector at `obj+0x9e`. + - `psx_object_advance_state_script` then interprets the active state script with sentinel/control values such as `0xffff`, `0xfffe`, `0xfffd`, `0xfffc`, and `0xfffb`, so the visible frame path is explicitly state-driven rather than just "type id -> one bundle". + - The current renderer-side consequence is important: section-0 word `u4` is no longer treated as a verified sprite-frame index. It is now carried forward as a state-selector candidate in exported scene metadata until the `DAT_800758cc/d4` path is decoded far enough to pick the right animation frame from executable evidence. + - Current strongest sentinel read: + - `0xfffe` dispatches `FUN_8004061c`, which is an audio/effect helper rather than a visible-frame selector. + - `0xfffd` is an in-script jump/re-anchor control that rewrites `obj+0x90` relative to the current script base. + - `0xfffc` switches `obj+0x8c/0x90` to another subsidiary script selected through the `DAT_800758cc` offset table. + - `0xfffb` also switches into a subsidiary script, but first scans forward to an in-script `0xfffd` marker before choosing the destination entry. + - Current best read of those sentinels: + - `0xffff` marks a terminal or restart control that re-anchors the script at `obj+0x8c` and raises object-state flags. + - `0xfffe` dispatches a side-effect helper (`FUN_8004061c`) using the following word as a parameter before advancing. + - `0xfffd`, `0xfffc`, and `0xfffb` switch into subsidiary scripts through the `DAT_800758cc` offset table rooted at `obj+0x88`. + - `psx_object_lookup_variant_entry` finally uses `obj+0x94` to look up a companion entry in `DAT_800758d4`, which means even after construction the art-facing choice is still mediated by per-type variant/state tables. +- This means the next PSX layers are now at least structurally separated: + - visible root descriptors (`section0_dispatch_roots`, legacy alias `region00`) + - visible bulk placement candidates (`section0_constructor_placements`, legacy alias `region01`) + - per-type art/template descriptors (`DAT_800758d8`) + - per-type simple-object component blocks (`DAT_800758d0`) + - per-type compound state-offset tables (`DAT_800758cc`) + - per-type compound variant tables (`DAT_800758d4`) + - the still-separate decompressed `0x3e00` level-state buffer (`DAT_8006769c`) +- The current renderer pass now records those banks explicitly as exported scene/state layers, while still only rendering the first two as visible scene items. +- Immediate map-viewer consequence: the current fallback art probe should be treated only as a diagnostic overlay for candidate bundle families. A workable renderer will need to recover the per-type `DAT_800758d8` descriptor mapping and the downstream `DAT_800758cc/d4` state+variant selection path before it can decide whether a section-0 placement should show world geometry, an animated object, or something non-map-facing like a portrait/talk asset. +- The next loader-side correction is now verified in the live cache too: the effective late `LSET*.WDL` `DAT_800758d8` candidate is not the earlier small-section heuristic, but a large late section whose working descriptor stream begins at an embedded `+0x38` offset. On retail map `9` that correction alone lifts `bundleMappedItemCount` from `0` to `111`, which is enough to restore real bundle-backed art for a first subset of types without reintroducing the disproven scan-order fallback. + The still-unresolved root-dispatch families remain instructive rather than contradictory. `0x0042` and `0x0049` still stay on placeholders after the bank-selection fix, but the same pass now decodes their `DAT_800758cc` state rows more cleanly: type `0x0042` carries three selector-targeted scripts (`0`, `1`, `2`) that all terminate through `0xffff`, while type `0x0049` carries a single selector-`0` script. So the remaining blocker for those roots is no longer "find any late template bank at all"; it is the deeper `DAT_800758cc/d4` state-to-visible-art bridge. + A first renderer-safe bridge landed even with that exporter gap still open: the verified `0x0050` state-script mapping (`selector 0..3 -> frame 0..3`) is now applied as a narrow fallback in the cache builder, and the rebuilt live map-9 scene now shows `type=80 state_selector=1 chosen_frame=1` instead of the old forced `chosen_frame=0`. Unresolved fallback placeholders are also now clamped to `opacity=0.45` in live scene output so the still-missing families stop visually overpowering the recovered real art. This remains intentionally scoped: the fallback frame map only covers the one family with direct executable-backed frame evidence, and the opacity clamp is diagnostic relief rather than a decoding claim. + The current draw split is clearer too. `FUN_80041378` is a three-stage render pass: + - stage 2: a second special-visible list drawn by `FUN_80041144` + - stage 3: HUD/overlay/icon primitives from `FUN_800416cc` +- That split matters for the map-viewer target: stages 1 and 2 remain relevant to missing world-facing content, while stage 3 is mostly front-end or overlay material and should not be mistaken for the missing half of the map. +- Stage 2 is now materially better understood and is no longer just a read-side observation: + - `FUN_80040f78` is the queue-builder for that pass. It projects an object with the same fixed-point world-to-screen math as `FUN_80040d44`, writes the final screen rectangle to `+0x20..+0x2e`, then appends the object to `DAT_80078b70` and increments `DAT_80067472`. + - `FUN_80041144` consumes that queue directly, iterating `DAT_80078b70[0 .. DAT_80067472)` and submitting sprite primitives through the same texture draw helpers as the main object pass. + - `FUN_80044fec` resets the queue each frame by clearing `DAT_80067472` after the top-level draw pass. + - So the stage-2 list is not UI/HUD noise and not a duplicate of the main clipped visible list. It is a distinct world-facing queued-object lane, which is now a concrete candidate explanation for part of the still-missing map content in the viewer. +- The immediate caller-side consequence matters too: + - `FUN_80040d44` remains the main clipped visible-list toggle, calling the stage-1 add/remove helpers when an object enters or leaves the screen. + - The recovered post-state-advance updater family now splits into five visible call sites: `0x80012b44`, `0x80013524`, `0x80013564`, `0x80013650`, and `0x80013778` all call `psx_object_advance_state_script`. + - Three of those sites then feed the main stage-1 projector path through `FUN_80040d44` (`0x80012b60`, `0x8001357c`, `0x800136d4`), while two feed the stage-2 queue-builder path through `FUN_80040f78` (`0x8001352c`, `0x80013780`). + - That exact `3` versus `2` split matters because it tightens the earlier claim: stage-2 membership is tied to a narrower runtime object/state branch after state advance, not to the decompressed substrate buffer alone and not to all state-advanced objects indiscriminately. +- One state-script sentinel is now functionally closed too: `0xfffe` dispatches `FUN_8004061c`, which is an audio/effect helper rather than a visible-frame selector. That shrinks the unknown sentinel set for the remaining `DAT_800758cc` script work. +- The main visible-list helpers are now also separated cleanly enough to stop treating them as a blocker: + - `FUN_8002d240` adds an object to the stage-1 `DAT_8006ad5c` visible-list array. + - `FUN_8002d35c` removes an object from that same array. + - `FUN_8002d59c` returns the sorted slice that `FUN_80041378` iterates for the stage-1 world-object pass. + - `FUN_8002d6f8` and `FUN_8002d778` act as refresh/rebucket/sort helpers over that main list. +- This is an important scope reduction for renderer work: the remaining missing world content is now less likely to be caused by misunderstanding the main stage-1 visibility array itself, and more likely to live in the separate stage-2 queued-object pass plus the still-unresolved `DAT_800758cc/d4` state-to-art path. + +Recovered next visible layer from the bulk placement family: + +- The structured `section0_constructor_placements` rows are no longer height-agnostic. The `FUN_80024eec` constructor reads its authored elevation from byte `+0x06` of the input record, which corresponds to the low byte of the current exported `u3` word for the accepted bulk-placement records. +- That byte is not just random payload on the accepted rows. Under the corrected section-0 scan, the same ladder generalizes across the whole rebuilt catalog instead of only the earlier `L0` subset. `LSET1/L0.WDL` still collapses to `11` distinct height values (`0, 2, 4, 10, 12, 14, 18, 20, 22, 24, 26`), and `LSET1/L1.WDL` now exposes `9` distinct levels with a `z` range of `0..32`. +- The PSX cache builder now uses that recovered `z` byte for `section0_constructor_placements` projection instead of forcing the whole bulk layer onto `z = 0`, while the top-level `section0_dispatch_roots` descriptor stream stays at `z = 0` until its own constructor-backed height source is proven. +- This is now the first PSX export pass in the viewer pipeline that produces visibly multi-layer whole-map candidates across the rebuilt retail catalog from executable-backed height data rather than from a single flattened candidate layer. + +## PSX Coordinate Model From Executable + +The current coordinate problem is no longer a renderer-only guess. The executable now closes the last projection step well enough to treat PSX placement as its own map-space model instead of as a PC-style direct world export. + +Key function evidence: + +- `FUN_800249f4` and `FUN_80024eec` are constructor paths that load authored coordinates into object fields `+0x3c`, `+0x40`, and `+0x44` as `16.16` fixed-point values. +- For the first family, the source record shape is now strong enough to describe directly: + - `u16` word at record `+0x08` -> object `+0x3c` as `value << 16` + - `u16` word at record `+0x0a` -> object `+0x40` as `value << 16` + - `u8` byte at record `+0x0c` -> object `+0x44` as `value << 16` +- `FUN_80040d44` and `FUN_80040f78` are the projection helpers that turn those fixed-point object coordinates into the per-object screen rectangle stored at `+0x20..+0x2e`. +- `FUN_80041458` and `FUN_80041144` then consume that already-built rectangle directly during draw submission; they do not derive screen position on the fly. + +Recovered projection model: + +- `+0x3e` and `+0x42` are not separate authored fields. They are the high `16`-bit halves of the fixed-point `x` and `y` values stored at `+0x3c` and `+0x40`. +- The runtime builds an intermediate screen anchor in fixed-point at `+0x78/+0x7c` from those world coordinates: + - `screen_anchor_x = y - x` + - `screen_anchor_y = 2 * z - (x + y) / 2` +- `FUN_80040d44` computes that anchor with the exact writes: + - `obj+0x78 = ((y_hi - x_hi) << 16)` + - `obj+0x7c = (obj_z * 2) - ((x_hi + y_hi) << 15)` +- The projection helper then subtracts the current camera anchor from `DAT_800678d4 + 0x3c/+0x40`, subtracts sprite-frame origin/size metadata from `FUN_8004513c`, `FUN_800451d0`, `FUN_80045014`, and `FUN_800450a8`, and writes the final visible rectangle into `+0x20..+0x2e`. + +What this means for the viewer: + +- the PSX map does not want the PC viewer's current synthetic `world.x/world.y/world.z` guess based directly on raw candidate words +- the most defensible renderer-side export target is now the runtime's own projected anchor or the equivalent fixed-point world tuple that reproduces the same `screen_anchor_x/screen_anchor_y` formulas +- any importer that treats the raw authored coordinates as if they were already PC-style isometric world coordinates will bunch objects together or smear them across the map because PSX uses a different projection basis +- the current cache builder no longer synthesizes PC-style world coordinates from final screen anchors; it now keeps the candidate PSX `x/y` words directly in exported scene items and applies the runtime projection basis separately during anchor generation + +Open parts that still matter: + +- this closes the final world-to-screen math, but it does not yet prove which raw `post_audio_region_01` or `post_audio_region_00` record family feeds each constructor path +- it also does not close the type/resource lookup that selects the correct bundle/frame through `DAT_800758cc/d0/d4/d8` +- palette override remains a separate unresolved control path layered on top of the now-understood projection math + +Immediate consequence for the next pass: + +- the next executable-guided decode step should map candidate authored record words directly onto constructor inputs, not onto PC-style scene coordinates +- once the correct record family is tied to `FUN_800249f4` or `FUN_80024eec`, the renderer can export either: + - the raw fixed-point PSX world tuple, plus a viewer-side reproduction of the runtime projection, or + - the runtime-equivalent projected anchor/rectangle directly for debug rendering +- the cache builder now uses the recovered projection basis and prefers the loader-backed record family, but the exact record-to-constructor link and the authoritative height lane still need proof before this can be called a solved map export + ## PSX Script / Usecode Equivalent Current status: diff --git a/docs/remorse-class-lift-index.md b/docs/remorse-class-lift-index.md index 4a80aba..d918c73 100644 --- a/docs/remorse-class-lift-index.md +++ b/docs/remorse-class-lift-index.md @@ -43,6 +43,7 @@ That set gives the high-level target, the current candidate families, the rebuil - [docs/entity-class-family-split.md](docs/entity-class-family-split.md): conservative split of the large `Entity` lane into base, projectile, debris, corpse/actor, and adjacent non-entity families. - [docs/entity-vm-runtime-owner-resource-layout.md](docs/entity-vm-runtime-owner-resource-layout.md): current runtime/helper/context ownership model for the VM lane. - [docs/presentation-callback-broker-layout.md](docs/presentation-callback-broker-layout.md): current object/lifecycle/vtable evidence for the `0x4588` presentation-state callback broker family. +- [docs/usecode-debugger-break-state-layout.md](docs/usecode-debugger-break-state-layout.md): current object/lifecycle/layout evidence for the dormant seg1408 debugger-state family. ### 4. Execution Checklists @@ -87,6 +88,34 @@ The future MCP endpoint sequence should follow the spec note rather than ad hoc 3. Add one more dedicated note for the callback/object lane around `0x4588` only if later caller evidence supports a stronger subsystem name than `PresentationCallbackBroker`. 4. Turn the first-class-authoring checklist into a completed execution log once the first real MCP batch lands. +## Current Live Authoring Snapshot + +The live `CRUSADER.EXE` class-authoring lane is no longer just a plan. + +Current authored `Remorse` classes in the active database are: + +- `EntityVmOwnerResource` +- `EntityVmRuntime` +- `EntityVmContext` +- `EntityVmSlotEntry` + +The VM lane is still the furthest along in actual Ghidra authoring. Recent live batches added the bounded `EntityVmSlotEntry` class owner plus more owned `EntityVmRuntime` methods (`GetSlotChunkPtrAtOffset`, `ReleaseSlotChunkRef`, `TryUnloadSlotChunk`, `DebugDumpSlotMemory`, `ApplyToMatchingOwnerRows`) rather than stopping at free-function naming. + +The latest signature-recovery pass also tightened two of those runtime methods materially: + +- `GetSlotChunkPtrAtOffset(runtime_farptr, slot_index, chunk_index, intra_chunk_offset)` now reads as a real slot-chunk accessor instead of a five-word anonymous wrapper. +- `ApplyToMatchingOwnerRows(runtime_farptr, slot_index_filter, chunk_index_filter)` now reads as a real iterator/filter helper instead of a split-word scratch signature. + +The next live batch pushed that further still: most of the `EntityVmRuntime` method cluster now carries an explicit 4-byte `EntityVmRuntime * this` in-session, including `Create`. The main remaining type gap inside that class is no longer the runtime object itself, but the exact far slot-entry pointer positions on `AcquireSlotForEntity` and `InitSlotOwnerBuffers`. + +That VM-side gap is now closed too: `AcquireSlotForEntity` returns `EntityVmSlotEntry *32` in `DX:AX`, `InitSlotOwnerBuffers` now accepts `EntityVmSlotEntry *32`, `EntityVmOwnerResource::{Create,Destroy}` now carry explicit 4-byte `this`, and the simple `EntityVmContext` lifecycle methods now do the same. + +The next family switch has also landed in the live database: `Remorse::UsecodeDebuggerBreakState` now exists as a real class owner with a provisional `0x2f2` datatype and a stronger method batch (`Create`, `MaybeBreakOnCurrentLine`, `BreakpointInsertSorted`, `BreakpointRemove`, `HasBreakpoint`, `CallstackPushFrame`, `CallstackPushEntry`, `CallstackPopEntry`, `EnableSingleStep`, `ClearStepState`, `CurrentEntryGetUnitName`). + +That debugger family is no longer just a top-level shell. The internal record shapes are now recovered and applied live well enough to treat the two tables as real fixed-size arrays in-session: breakpoint entries are `0x0b` bytes with `unit_name_inline[9] + line_number`, and callstack entries are `0x15` bytes with `unit_name_inline[9]` plus the currently safest trailing fields `source_stream_target_farptr`, `current_frame_payload_farptr`, and still-neutral `aux_farptr`. + +The VM lane also advanced one more selective step without overpromoting inheritance: `Remorse::EntityVmContext::CreateFromSlotIndex` now has a caller-backed mixed parameter pack (`owner_source_farptr`, `pitemno_farptr`, `mode_flags`, `slot_index`, `value_add_offset`, `intra_chunk_offset`, `ucparam_farptr`, `ucparamsize`) and an explicit far return restored in `AX:DX`, even though the current live endpoint still textualizes that repaired signature conservatively as plain `dword __cdecl`. + ## Bottom Line The current prep work is now large enough that it should be treated as one coordinated lane rather than scattered notes. diff --git a/docs/usecode-debugger-break-state-layout.md b/docs/usecode-debugger-break-state-layout.md new file mode 100644 index 0000000..dfed4b7 --- /dev/null +++ b/docs/usecode-debugger-break-state-layout.md @@ -0,0 +1,221 @@ +# Usecode Debugger Break-State Layout + +## Purpose + +This note captures the current class-lift-relevant evidence for the dormant seg1408 debugger-state object. + +The retail binary still appears to leave this family orphaned at runtime, but the object model itself is strong enough to justify explicit class authoring in Ghidra. + +Current working family name: + +- `UsecodeDebuggerBreakState` + +## Current Best Class-Level Read + +`UsecodeDebuggerBreakState` is a retained debugger object that owns: + +- a small breakpoint table +- current interpreted-line state +- single-step / break-armed flags +- a callstack entry stack +- a small vtable-backed break/notify surface used by the interpreter callback lane + +The compiled interpreter still calls into this object when the global debugger-state pointer is non-null, even though the retail binary no longer seems to instantiate it during normal play. + +## Strongest Evidence Anchors + +### Constructor + +#### `1408:0000` `Create` + +Current verified behavior: + +- allocates `0x2f2` bytes when `this == null` +- writes retail vtable offset `0x65ab` at object `+0x00` +- fills the breakpoint-entry region starting at `+0x04` with `0xffff` +- clears `+0x02`, `+0x75`, and `+0x7a` +- returns the object far pointer in `DX:AX` + +This is a real constructor-style path, not just a helper wrapper. + +### Break gate + +#### `1408:0053` `MaybeBreakOnCurrentLine` + +Current verified behavior: + +- stores the incoming interpreted line minus one at `+0x72` +- resolves the current unit-name pointer through `CurrentEntryGetUnitName` +- checks file+line breakpoints through `HasBreakpoint` +- dispatches through the object vtable when a break condition is met + +This is the strongest proof that the hidden debugger lane is object-based and that the interpreter-side callback still expects a live debugger object. + +### Breakpoint table helpers + +#### `1408:00dd` `BreakpointInsertSorted` + +Current verified behavior: + +- enforces a maximum of ten breakpoint entries +- scans the `0x0b`-byte breakpoint-entry table rooted at `+0x04` +- compares unit-name strings via the common string helper +- inserts a new `(unit_name, line_number)` pair into the sorted table + +#### `1408:01a5` `BreakpointRemove` + +Current verified behavior: + +- scans the same `0x0b`-byte breakpoint-entry table for an exact `(unit_name, line_number)` match +- compares the stored inline name bytes first, then the stored line word at entry `+0x09` +- compacts the remaining entries downward when a match is found +- decrements `breakpoint_count` + +#### `1408:029e` `HasBreakpoint` + +Current verified behavior: + +- scans the same breakpoint-entry table +- compares the requested line number against entry `+0x09` +- compares the requested unit-name pair against the stored name bytes +- returns a boolean-style `uint` in `AX` + +### Callstack helpers + +#### `1408:02f5` `CallstackPushFrame` + +Current verified behavior: + +- computes the current callstack-entry base as `this + 0x7c + callstack_depth * 0x15` +- copies an inline unit-name buffer into entry `+0x00` +- enforces a maximum visible unit-name length of eight characters plus terminator +- stores three trailing far-pointer/state dwords at `+0x09`, `+0x0d`, and `+0x11` +- increments `callstack_depth` + +#### `1408:03b0` `CallstackPushEntry` + +Current verified behavior: + +- uses `+0x7a` as the current callstack depth +- acts as a thinner wrapper over `CallstackPushFrame` when only the inline unit-name payload matters +- increments the depth and asserts when it reaches `0x1e` + +#### `1408:03f7` `CallstackPopEntry` + +Current verified behavior: + +- decrements `+0x7a` +- asserts if the depth underflows below zero + +### Step-state helpers + +#### `1408:0419` `EnableSingleStep` + +- clears `+0x76/+0x78` +- sets `+0x75 = 1` + +#### `1408:0432` `ClearStepState` + +- clears `+0x74` +- clears `+0x75` + +### Current-entry name accessor + +#### `1408:0444` `CurrentEntryGetUnitName` + +Current verified behavior: + +- returns null when `callstack_depth <= 0` +- otherwise returns a far pointer to the current callstack entry's inline unit-name buffer + +## Recovered Entry Schemas + +The live debugger-state model is now strong enough to split the old table blobs into concrete fixed-size entry records. + +### `UsecodeDebuggerBreakpointEntry` (`0x0b` bytes) + +| Offset | Current name | Confidence | Current meaning | +|---|---|---|---| +| `+0x00..+0x08` | `unit_name_inline[9]` | High | Inline unit-name buffer, consistent with eight visible characters plus terminator. | +| `+0x09` | `line_number` | High | Breakpoint line number compared by `BreakpointInsertSorted`, `BreakpointRemove`, and `HasBreakpoint`. | + +### `UsecodeDebuggerCallstackEntry` (`0x15` bytes) + +| Offset | Current name | Confidence | Current meaning | +|---|---|---|---| +| `+0x00..+0x08` | `unit_name_inline[9]` | High | Inline unit-name buffer for the active frame. | +| `+0x09` | `source_stream_target_farptr` | Medium | Far pointer derived from the interpreter source-stream lane plus one fetched word in the only verified caller. | +| `+0x0d` | `current_frame_payload_farptr` | Medium | Far pointer to the current frame payload at `frame_base + 0x04` in the only verified caller. | +| `+0x11` | `aux_farptr` | Low | Trailing auxiliary far pointer slot; still zero in the only verified caller. | + +## Current Working Layout + +The live datatype `/Remorse/UsecodeDebuggerBreakState` now exists in-session with the currently safest anchors: + +| Offset | Current name | Confidence | Current meaning | +|---|---|---|---| +| `+0x00` | `vtable_offset` | High | Retail debugger-state vtable offset `0x65ab`. | +| `+0x02` | `breakpoint_count` | High | Count of `0x0b`-byte breakpoint entries. | +| `+0x04..+0x71` | `breakpoint_entries[10]` | High | Ten inline `UsecodeDebuggerBreakpointEntry` records. | +| `+0x72` | `current_line` | High | Current interpreted line minus one. | +| `+0x74` | `break_armed` | Medium | Break/armed flag cleared by `ClearStepState`. | +| `+0x75` | `single_step_enabled` | High | Single-step flag set by `EnableSingleStep`. | +| `+0x76/+0x78` | `step_state_lo/hi` | Medium | Step-state pair cleared by `EnableSingleStep`. | +| `+0x7a` | `callstack_depth` | High | Current callstack depth. | +| `+0x7c..+0x2f1` | `callstack_entries[30]` | High | Thirty inline `UsecodeDebuggerCallstackEntry` records. | + +## Live Ghidra Authoring Status + +Verified first live class batch landed on 2026-04-06. + +- Created class owner `Remorse::UsecodeDebuggerBreakState`. +- Created `/Remorse/UsecodeDebuggerBreakpointEntry` (`0x0b`) and `/Remorse/UsecodeDebuggerCallstackEntry` (`0x15`) in the live data-type manager. +- Rewrote `/Remorse/UsecodeDebuggerBreakState` in-session so the old blob regions are now explicit `UsecodeDebuggerBreakpointEntry[10]` and `UsecodeDebuggerCallstackEntry[30]` arrays at the recovered offsets. +- Moved the main seg1408 helpers under the class owner: + - `1408:0000` -> `Create` + - `1408:0053` -> `MaybeBreakOnCurrentLine` + - `1408:00dd` -> `BreakpointInsertSorted` + - `1408:01a5` -> `BreakpointRemove` + - `1408:029e` -> `HasBreakpoint` + - `1408:02f5` -> `CallstackPushFrame` + - `1408:03b0` -> `CallstackPushEntry` + - `1408:03f7` -> `CallstackPopEntry` + - `1408:0419` -> `EnableSingleStep` + - `1408:0432` -> `ClearStepState` + - `1408:0444` -> `CurrentEntryGetUnitName` +- Tightened the live method signatures to explicit object-style forms, including: + - `UsecodeDebuggerBreakState * __cdecl16far Create(UsecodeDebuggerBreakState * this, dword init_spec)` + - `void __cdecl16far MaybeBreakOnCurrentLine(UsecodeDebuggerBreakState * this, word current_line)` + - `byte __cdecl16far BreakpointInsertSorted(UsecodeDebuggerBreakState * this, dword unit_name_farptr, word line_number)` + - `void __cdecl16far BreakpointRemove(UsecodeDebuggerBreakState * this, dword unit_name_farptr, word line_number)` + - `uint __cdecl16far HasBreakpoint(UsecodeDebuggerBreakState * this, dword unit_name_farptr, word line_number)` + - `void __cdecl16far CallstackPushFrame(UsecodeDebuggerBreakState * this, dword unit_name_farptr, dword source_stream_target_farptr, dword current_frame_payload_farptr, dword aux_farptr)` + - `byte __cdecl16far CallstackPushEntry(UsecodeDebuggerBreakState * this, dword unit_name_farptr)` + - `void __cdecl16far CallstackPopEntry(UsecodeDebuggerBreakState * this)` + - `void __cdecl16far EnableSingleStep(UsecodeDebuggerBreakState * this)` + - `void __cdecl16far ClearStepState(UsecodeDebuggerBreakState * this)` + - `dword __cdecl16far CurrentEntryGetUnitName(UsecodeDebuggerBreakState * this)` +- Added decompiler comments on the breakpoint and callstack helpers so the recovered inline-record layout is visible in-session even before every field is formally typed. +- Added decompiler comments on the only verified `Interpreter_NextUsecodeOp` caller of `CallstackPushFrame`, which confirms the current live read of the three trailing callstack dwords: + - `source_stream_target_farptr` is source-stream-derived from the interpreter `+0xd6/+0xd8` lane plus one fetched word + - `current_frame_payload_farptr` is current-frame-derived from the `frame_base + 0x04` lane + - `aux_farptr` is still zero in the only verified caller + +## Current Cautions + +- The retail instantiation path still appears absent; no normal caller currently reaches `Create` in the unpatched retail binary. +- The record boundaries inside both tables are now landed in the live datatype, and two of the three trailing callstack dwords now have caller-backed structural names. The exact gameplay role behind those two far pointers is still only partly recovered. +- `init_spec` on `Create` and `unit_name_farptr` on the breakpoint/callstack helpers are intentionally neutral names; the live signatures are object-correct, but the payload semantics should stay conservative. + +## Best Next Moves + +1. Identify the real gameplay semantics of `source_stream_target_farptr` and `current_frame_payload_farptr` from the interpreter-side caller lanes before promoting subsystem-specific names. +2. Identify the vtable callback slots used by `MaybeBreakOnCurrentLine` and decide whether one or two additional methods belong on the class owner. +3. Cross-check the seg1408 class note against the interpreter callback site at `1418:04b5` so the dormant-orphan lifecycle remains explicit in the live notes. +4. Decide whether `aux_farptr` should remain neutral or can be promoted after one more caller or consumer pass. + +## Bottom Line + +`UsecodeDebuggerBreakState` is now past the “interesting orphan subsystem” stage and into real class territory. + +The live database now has a bounded debugger-state class with a constructor, breakpoint gate, breakpoint table helpers, callstack helpers, explicit recovered `0x0b` / `0x15` entry schemas, and step-state helpers, even though the retail game still appears to leave that object dormant. \ No newline at end of file diff --git a/ghidra_mcp_wishlist.md b/ghidra_mcp_wishlist.md index fe2d9bc..0ad7cfe 100644 --- a/ghidra_mcp_wishlist.md +++ b/ghidra_mcp_wishlist.md @@ -1,342 +1,83 @@ # Ghidra MCP Wishlist -This file records concrete gaps in the current Ghidra MCP workflow. -Update it whenever a task requires PyGhidra or another local-only fallback because MCP lacks the needed operation. +This file records concrete MCP gaps hit during Crusader workflow passes. -For each new entry, keep the format short: -- Missing capability -- Current fallback -- Why it matters in this repo -- Proposed MCP endpoint or behavior +Rules for keeping it useful: +- Put only unresolved work in `Remaining TODOs`. +- Move implemented or source-fixed items to `Done / Implemented`. +- Keep each remaining item short: missing capability, fallback, why it matters, proposed behavior, latest status. -## Current Wishlist +## Remaining TODOs -### POST Body Contract Gap Hit During Runtime Prototype Repair (2026-04-05) +### Class-Lift Typing Live Parity -- Missing capability: POST endpoints only accept form-urlencoded key/value parameters; direct JSON bodies fail as if required parameters were omitted. -- Current fallback: use bridge helpers or manual form-encoded POSTs when testing endpoints such as `set_function_prototype(...)` directly. -- Why it matters: MCP clients, ad hoc terminal tests, and future automation naturally try JSON first for structured payloads, especially on newer class-lift and prototype endpoints. -- Proposed MCP behavior: accept both `application/x-www-form-urlencoded` and `application/json` on POST endpoints, or return a structured unsupported-content-type error that explicitly says the route only accepts form parameters. -- Status update (2026-04-05): local plugin `parsePostParams(...)` still only splits `key=value&...` bodies and ignores JSON payloads entirely, which is why direct JSON POSTs looked like missing-parameter failures during the `EntityVmRuntime::Create` repair. -- Status update (2026-04-05, local fork): plugin `parsePostParams(...)` now accepts both form-urlencoded bodies and JSON object bodies across POST routes. Unsupported POST bodies now fail early with an explicit `unsupported-content-type` parser error instead of silently degrading into missing-parameter behavior. +- Missing capability: end-to-end live-session parity for storage-aware `this` typing on 16-bit NE methods whose current storage does not match the default pointer storage the binder would choose. +- Current fallback: use local PyGhidra with `DYNAMIC_STORAGE_ALL_PARAMS`, or move methods with `set_function_class(...)` and defer final `this` typing/manual prototype cleanup. +- Why it matters: `EntityVmContext` lifecycle methods and `EntityVmRuntime::Create` still need the live MCP path to behave like the verified local PyGhidra repair flow. +- Proposed MCP behavior: `set_function_this_type(...)` and `apply_class_layout(...)` should reliably fall back to dynamic storage in-session for these 16-bit cases, while preserving structured per-method warnings instead of aborting the batch. +- Latest status (2026-04-06): local PyGhidra confirmed that `1420:0eec`, `1420:10b6`, `1420:10da`, `1420:1162`, `1420:118f`, and `1420:1278` accept `EntityVmContext * this` cleanly via `DYNAMIC_STORAGE_ALL_PARAMS`. The live storage-aware path now also accepts explicit `/Remorse/EntityVmRuntime *32`, `/Remorse/EntityVmOwnerResource *32`, `/Remorse/EntityVmContext *32`, and `/Remorse/EntityVmSlotEntry *32` signatures in-session once the exact `*32` datatype has first been resolved into the program data-type manager. The remaining live gap is now mostly about deeper mixed-width parameter packs like `1420:0eec CreateFromSlotIndex`, not the previously blocked 4-byte object-pointer cases themselves. -### Live PyGhidra Write Gap Hit During Runtime Repair Pass (2026-04-05) +### Storage-Aware Prototype Live Verification -- Missing capability: constrained live PyGhidra write execution through MCP when Ghidra was started with Python enabled. -- Current fallback: keep read-only inspection in live MCP via `run_readonly_script(...)`, but close the GUI and drop back to local project-open PyGhidra for write-side repairs such as custom-storage prototype fixes and datatype edits. -- Why it matters: the runtime class-lift batch had to leave the live session and reopen the project locally just to repair one 16-bit function signature and one allocator-helper callee, even though the live Ghidra instance could already host Python scripts. -- Proposed MCP behavior: add a narrowly scoped live write-script or transaction endpoint family that runs against the active writable program with explicit safety limits, dry-run support where possible, and machine-friendly transaction results. -- Status update (2026-04-05): the local fork can already probe and run live read-only Python when Ghidra starts with PyGhidra enabled, so the remaining gap is write-side exposure and safety policy rather than Python availability itself. -- Status update (2026-04-05, local fork): local plugin and bridge now expose `run_write_script(script_path|script_text, dry_run?)` plus the alias route `run_transaction_script`. The implementation reuses explicit write-target selectors, validates inline or file-backed scripts against a write-policy denylist, wraps execution in a single MCP-managed transaction, reports machine-friendly status/output, and surfaces `write_script_*` capability fields from `get_runtime_capabilities()`. The remaining gap is finer-grained safety policy and live workflow verification, not basic write-side exposure. -- Status update (2026-04-06, VM class-lift pass): direct bridge `run_write_script(...)` still returned `404 No context found for request` against the active `CRUSADER.EXE` GUI session even with explicit target selectors, so the `EntityVmContext` datatype plus the slot-entry/runtime prototype batch still had to fall back to closed-project local PyGhidra. The remaining gap is now active-session context binding for the write-script route, not route availability alone. -- Status update (2026-04-06, local fork hardening): plugin explicit-target binding now normalizes Windows `project_dir` casing/separators, infers missing `project_dir` / `project_name` from the active program when possible, and fills the matching `folder_path` from the active domain file before trying to reopen a target. Bridge `run_write_script(...)` now retries the `run_transaction_script` alias on `404` or `No context found for request`, reducing mixed-build false negatives while live-session verification continues. -- Status update (2026-04-06, live context-typing retry): the trivial dry-run probe for `run_write_script(...)` still returned `404 No context found for request` against the active `CRUSADER.EXE` session both with implicit active-program targeting and with explicit `project_dir` / `project_name` / `folder_path` / `program_name` selectors. The route is still not usable as an in-session fallback for the `EntityVmContext` typing pass. +- Missing capability: confirmed live-session parity for the newest storage-aware prototype fixes on 16-bit NE repair cases. +- Current fallback: if the active GUI session is on an older plugin build, reload the plugin; if parity still fails, use local PyGhidra or manually compensate when testing stack offsets / calling conventions. +- Why it matters: `1000:42e2` and `1420:1499` are the known proof cases for explicit return storage, stack-word parameter modeling, and 16-bit far calling conventions. +- Proposed MCP behavior: `set_function_prototype_storage(...)` should accept bare `stack:` offsets in the same hex-style form used in current workflow notes and should preserve exact calling-convention tokens such as `__cdecl16far` before falling back to lossy legacy normalization. +- Latest status (2026-04-06): the reloaded live plugin now reaches the real storage-aware implementation in-session on both proof cases, and explicit `AX:DX` return storage survives correctly on `1000:42e2` and `1420:1499`. The remaining live parity issue is now narrower: `calling_convention='__cdecl16far'` still normalizes those proof-case applies to plain `__cdecl`, but direct live `run_write_script(...)` calls can immediately restore `__cdecl16far`, which proves the live database accepts the exact convention token and leaves the endpoint-side normalization/deployment path as the remaining gap. -### Class-Lift Typing Gap Hit During VM Runtime Pass (2026-04-05) +### Live Metadata / Read-Target Verification -- Missing capability: a storage-aware class-layout or `this`-typing path for 16-bit NE methods whose current function storage does not match the default pointer storage the binder tries to apply. -- Current fallback: create/update the class namespace and datatype, then move methods individually with `set_function_class(...)` and leave `this` typing/manual prototype cleanup for later. -- Why it matters: the current Remorse class-lift workflow can land ownership cleanly for `EntityVmRuntime`, but `apply_class_layout(...)` failed on the runtime lifecycle cluster with `Failed to apply this type: Storage size does not match data type size: 2` even though the same binder succeeded for `EntityVmOwnerResource`. -- Proposed MCP behavior: let `apply_class_layout(...)` either skip/soft-fail `this` typing per method with structured results, or accept an explicit storage/calling-convention override for `this` so 16-bit segmented/custom-storage methods can still be class-bound and partially typed in one pass. -- Status update (2026-04-05, later MCP-upgrade pass): the upgraded tool surface now allows direct `set_function_class(...)` moves for additional `EntityVmRuntime` helpers and `set_function_this_type(...)` succeeded on `1420:1601 Destroy` when forced to `this_storage=farptr`, but `1420:1499 Create`, `1420:1536 InitSlots`, and `1420:1575 ReleaseSlots` still fail with the same storage-size mismatch, so the gap is narrower but not resolved. -- Status update (2026-04-05, local fork): `set_function_this_type(...)` now treats `this_storage` as a real storage strategy hint instead of always reusing the old first-parameter storage. For existing parameters it tries preserved custom storage first only when the caller asked to preserve/current storage, then falls back to `DYNAMIC_STORAGE_ALL_PARAMS` when the preserved storage is incompatible with the requested `this` type. `apply_class_layout(...)` now records per-method typing failures as structured warnings instead of aborting the entire batch, and bridge method payloads can carry per-method `this_storage` and `calling_convention` overrides. -- Status update (2026-04-06, VM class-lift pass): after landing `/Remorse/EntityVmContext` and the first slot-entry prototype batch, local PyGhidra could collapse `1420:1536 InitSlots` and `1420:1575 ReleaseSlots` to direct `EntityVmRuntime * this`, but `1420:1499 Create` still reintroduced hidden `__return_storage_ptr__` corruption whenever the split-word far runtime pointer was collapsed to a typed `this`. The open gap is now mostly `Create` plus any future 16-bit constructors/factories with the same far-pointer/custom-storage shape. -- Status update (2026-04-06, live context-typing retry): the old `apply_class_layout(...)` dry-run null failure for `/Remorse/EntityVmContext` no longer reproduces, but the real live write path still behaves like the older storage-preserving build. Actual `apply_class_layout(...)` and direct `set_function_this_type(...)` calls on `1420:10b6`, `1420:10da`, `1420:1162`, `1420:118f`, and `1420:1278` all still fail with `Storage size does not match data type size: 2`, so the open gap is now specifically live deployment parity for the dynamic-storage fallback rather than dry-run binder coverage. -- Status update (2026-04-06, local PyGhidra confirmation): after closing the GUI and running the local `tools.pyghidra_crusader` script path, the same context lifecycle entries (`1420:0eec`, `1420:10b6`, `1420:10da`, `1420:1162`, `1420:118f`, `1420:1278`) all accepted `EntityVmContext * this` cleanly via `DYNAMIC_STORAGE_ALL_PARAMS`. That confirms the typing model is valid and the remaining gap is live-session deployment parity, not the class layout itself. +- Missing capability: fully verified live-session parity for selector-aware reads and metadata helpers in mixed-build or partially refreshed GUI sessions. +- Current fallback: bridge alias retries, explicit-target normalization, and manual project-note cross-checks when a live session still behaves like an older plugin build. +- Why it matters: Crusader work routinely needs side-by-side reads across `/CRUSADER.EXE`, `/es/CRUSADER.EXE`, `/Writable/...`, and other project entries without changing the active Ghidra tab. +- Proposed MCP behavior: `list_project_programs(...)`, `get_runtime_capabilities(...)`, `get_callers(...)`, and other selector-aware read helpers should bind reliably to the requested or active target and return structured unsupported-state output instead of raw context failures. +- Latest status (2026-04-06): the local fork already includes alias fallbacks and Windows path/folder normalization for explicit-target matching. Remaining work is live-session verification after plugin refresh, not additional local source coverage. -### 16-bit Prototype And Hidden Return-Storage Gap Hit During Runtime Repair (2026-04-05) +## Done / Implemented In Local Fork -- Missing capability: a semantics-preserving prototype/storage endpoint for 16-bit NE functions that can set explicit parameter storage, explicit return storage, and avoid parser-induced hidden `__return_storage_ptr__` rewrites. -- Current fallback: inspect the broken caller plus its direct callees, then use local PyGhidra to normalize callee prototypes and apply custom storage manually. -- Why it matters: `1420:1499 Remorse::EntityVmRuntime::Create` kept throwing `Low-level Error: Symbol $$undef00000006 extends beyond the end of the address space` until the shared allocator helper at `1000:42e2` was repaired from a pointer-return signature that decompiled with a hidden return-storage parameter. -- Proposed MCP behavior: expose a storage-aware prototype/update endpoint that accepts explicit parameter and return storage, plus optionally a decompiler-health check or warning when a candidate prototype would inject hidden return storage into a 16-bit caller chain. -- Status update (2026-04-05): parser-string prototype updates alone were not sufficient here; the stable repair required explicit `AX:DX` return storage on `1000:42e2` and split-stack-word modeling for the runtime far pointer on `1420:1499`. -- Status update (2026-04-05, later MCP-upgrade pass): the new live `run_write_script(...)` path gives MCP a constrained way to perform these repairs inside the active writable session, but there is still no first-class storage-aware prototype endpoint that models explicit return/parameter storage declaratively. This wishlist item remains open. -- Status update (2026-04-06, local fork): local plugin and bridge now expose `set_function_prototype_storage(...)` plus the alias `set_storage_aware_prototype(...)`. The endpoint accepts declarative `return_type`, `return_storage`, and ordered `parameters` lines (`name|type|storage`), supports explicit target selectors, applies custom return/parameter storage in one transaction, and reports a warning when the resulting signature still contains hidden `__return_storage_ptr__` state. -- Status update (2026-04-06, live in-session verification): the checked-in Java source now wires both `/set_function_prototype_storage` and `/set_storage_aware_prototype` to the storage-aware implementation, but the active GUI session still does not match that build. Direct live POSTs to `/set_function_prototype_storage` returned HTTP 200 with the old legacy body `failed: set_function_prototype ... Function prototype is required`, while the alias route `/set_storage_aware_prototype` still returned `404 No context found for request`. So the live session still cannot exercise the new explicit-storage modeling in-session, and this remains a deployment/runtime parity gap rather than a source-level endpoint absence. +### Transport And Runtime -### Live MCP Issues Hit During Spanish Cheat Pass (2026-03-26) +- POST endpoints now accept both `application/json` and `application/x-www-form-urlencoded` request bodies. Unsupported POST payloads fail early with `unsupported-content-type` instead of degrading into missing-parameter errors. +- `get_runtime_capabilities()` reports readonly/write-script capability state and `run_readonly_script(...)` returns structured unsupported-state output when Python support is unavailable. +- `run_write_script(...)` and alias `run_transaction_script(...)` are implemented with dry-run support, explicit target selectors, a write-policy denylist, and machine-friendly transaction results. +- Bridge runtime helpers retry compatible aliases on `404` / `No context found for request` for mixed-build live sessions. -- Missing capability: working `search_bytes(...)` requests against the currently opened program. -- Current fallback: `read_region(...)`, `get_data_uses(...)`, `search_instructions(...)`, and manual/xref-driven narrowing inside `/es/CRUSADER.EXE`. -- Why it matters: the Spanish-cheat question specifically needed a direct full-memory search for the English `jassica16` scan-code table and any plausible replacement sequence. -- Proposed MCP behavior: `search_bytes(...)` should honor the active program context by default and return a machine-friendly empty-hit result when no matches exist, not `HTTP 404 No context found for request`. +### Explicit Targeting And Project Access -- Missing capability: reliable explicit target selection on read/query endpoints in the live server session. -- Current fallback: repo notes plus manual project `.prp` metadata inspection after `read_region(...)` and `get_function_by_address(...)` ignored explicit root-vs-`/es` selectors and still resolved against the active Spanish program. -- Why it matters: this repo routinely needs side-by-side comparisons between `/CRUSADER.EXE`, `/es/CRUSADER.EXE`, `/Writable/...`, and other project entries without changing the active Ghidra tab. -- Proposed MCP behavior: all selector-aware read endpoints should actually bind to the requested `project_dir` / `project_name` / `folder_path` / `program_name`, or return a structured target-resolution failure instead of silently reading the active program. +- Explicit write targeting is implemented for edit flows such as `apply_program_edit_plan(...)` and `patch_bytes_and_reanalyze(...)` with deterministic save behavior. +- Selector-aware read/query endpoints now accept `project_dir`, `project_name`, `folder_path`, and `program_name` and reuse the same target-resolution layer as write flows. +- Target matching now normalizes Windows path casing and slash style and can infer missing project selectors from the active domain file when appropriate. +- `list_project_programs(...)` plus alias `project_programs` is implemented and returns machine-friendly folder/program inventory. -- Missing capability: consistent context handling for project/runtime metadata helpers in the live server session. -- Current fallback: direct `get_project_access_info()` plus workspace-side `.prp` reads after `list_project_programs(...)`, `get_callers(...)`, `compare_functions(...)`, and `get_runtime_capabilities()` returned `404 No context found for request` during an otherwise healthy active-program session. -- Why it matters: these are the exact helper endpoints needed to validate which program is active, enumerate comparison targets, and reason about whether a failure is a real analysis result or an MCP/session problem. -- Proposed MCP behavior: metadata helpers should either work whenever an active program exists or return structured unsupported-state details, not raw 404 context failures. -- Status update (2026-03-26, later Spanish pass): the refreshed live server still returned `404 No context found for request` for `get_runtime_capabilities(...)` and `get_callers(...)` during an active `/es/CRUSADER.EXE` session, so this is still a live deployment or routing problem, not just an earlier-session artifact. -- Status update (2026-04-05, class-lift pass): after reloading the updated plugin, `get_project_access_info(...)` and the new class-lift write routes were reachable in the active `CRUSADER.EXE` session, but `list_project_programs(...)` still returned `404 No context found for request`, so the metadata-helper context issue is not fully resolved. -- Status update (2026-04-05, local bridge hardening): bridge `list_project_programs(...)` now retries the legacy `/project_programs` alias whenever the live server answers with `404` or `No context found for request`, which should smooth mixed-build sessions while the remaining live metadata routing issue is verified after redeploy. -- Status update (2026-04-06, local fork hardening): bridge `get_runtime_capabilities(...)` now retries the `/runtime_capabilities` alias on `404` or `No context found for request`, and plugin explicit-target matching no longer depends on exact Windows path casing or slash style when deciding whether an already-open program satisfies the request. This should reduce false context failures in mixed-build live sessions, though full deployment verification is still pending. +### Analysis, Inspection, And Xrefs -### Open Gaps Found During Hidden Usecode Debugger Patch Batch (2026-03-24) +- Function boundary repair helpers are implemented: `create_function_by_address(...)`, `delete_function_by_address(...)`, and `get_function_containing(...)`. +- Arbitrary memory/code inspection helpers are implemented: `read_region(...)`, `disassemble_region(...)`, `get_instruction_window(...)`, `search_instructions(...)`, and `get_data_uses(...)`. +- `search_bytes(...)` is implemented with `??` wildcards and machine-friendly hit output. +- Caller/xref recovery is improved via `get_callers(...)`, and `get_xrefs_to(...)` / `get_xrefs_from(...)` return typed reference kinds plus containing-function metadata. +- `get_symbol_at(address)` now uses direct routes when present and bridge-side legacy fallbacks when the live process is older. -- Missing capability: write-capable project/program selection for MCP edit operations. -- Current fallback: local PyGhidra `run-script` plus `read-region` against `--project-dir K:\ghidra\Crusader_Decomp --project-name Crusader --folder-path /Writable --program-name CRUSADER-PATCHED.EXE`. -- Why it matters: retail NE patch work in this repo must sometimes modify and save `/Writable/CRUSADER-PATCHED.EXE` with the GUI closed, while current MCP write flows depend on the active Ghidra session/program context. -- Proposed MCP addition: add bridge-exposed target selectors (`project_dir`, `project_name`, `folder_path`, `program_name`) for write endpoints, backed by plugin support to open the requested project file, apply `patch_bytes_and_reanalyze` or edit-plan writes, and save deterministically. -- Status update (2026-03-24): local fork now accepts optional `project_dir`, `project_name`, `folder_path`, and `program_name` selectors on `apply_program_edit_plan` and `patch_bytes_and_reanalyze`; explicit targets are opened through `GhidraProject`, written, saved deterministically, and then released. -- Status update (2026-03-24, follow-up): explicit target resolution now reuses an already-open matching program when possible and otherwise opens a writable domain object directly; MCP no longer opens explicit targets in read-only mode for edit operations. +### Batch Edits And Comparison Tools -### Open Gaps Found During Current 0x4588 Pass (2026-03-21) +- Batch helpers are implemented: `set_comments(...)`, `set_decompiler_comments(...)`, `rename_functions_by_address(...)`, and `apply_program_edit_plan(...)` with dry-run support. +- Reanalysis helpers are implemented: `reanalyze_region(...)`, `patch_bytes_and_reanalyze(...)`, and `analyze_function_boundaries(...)`. +- Cross-program comparison helpers are implemented: `compare_regions(...)`, `compare_strings(...)`, and `compare_functions(...)`. +- `port_symbols(...)` now ports verified names/comments between programs with provenance text and explicit source/target selectors. -- Missing capability: usable read-only scripting in the live MCP/Ghidra session. -- Current fallback: terminal-side Python and manual MCP inspection windows after `run_readonly_script` returned `Ghidra was not started with PyGhidra. Python is not available`. -- Why it matters: one-off structure probes and byte-pattern scans are still common during EUSECODE and overlap work, and they are much cleaner as constrained in-process reads than as external heuristics. -- Proposed MCP addition: expose runtime capability state for `run_readonly_script` and either guarantee a working in-process script engine or return a machine-friendly unsupported-state response early. -- Status update (2026-03-24): local fork now exposes `get_runtime_capabilities()` with readonly-script probe state and `run_readonly_script()` returns structured `status`/`reason`/`detail` output early when Python support is unavailable in the live session. -- Status update (2026-03-24, follow-up): `open_current_program_readonly()` is now intentionally disabled and returns an unsupported-state response so MCP does not create accidental read-only program instances in normal workflow. +### Class / Namespace / OO Recovery -- Status update (2026-03-21): the current live plugin process still returns HTTP 404 for direct symbol routes (`/get_symbol_at`, `/symbol_at`) in this chat session, but bridge `get_symbol_at(address)` now avoids raw 404s by falling back to compatible legacy endpoints and returning deterministic symbol-state output (for example `0x844` -> `symbol=`). -- Remaining gap: reload/redeploy the updated plugin build so direct symbol routes are present in the live process; bridge fallback now covers older live builds in the meantime. -- Implemented now: - - `get_xrefs_to(address)` / `get_xrefs_from(address)` with typed ref kinds (`call`, `read`, `write`, `jump`, `other`) plus containing-function metadata. - - tolerant `set_function_prototype` retries for legacy calling-convention tokens (for example `__cdecl16far`) and returns an accepted template example on parse/apply failure. - - `rename_data(address, new_name)` now renames or creates the primary symbol at any valid address and returns the resolved symbol metadata instead of `Rename data attempted`. - - `get_symbol_at(address)` returns the primary symbol state at an address so label changes can be verified directly without depending on decompiler refresh timing. - - `get_symbol_at(address)` now resolves the active program on the Swing thread, falls back to the visible/open program when the current-program pointer is transiently unavailable, and the bridge retries the compatible `/symbol_at` alias if a stale server route returns `404 No context found for request`. - - bridge `get_symbol_at(address)` now probes additional legacy aliases (`getSymbolAt`, `symbolAt`, `get_symbol`) and, if symbol routes are absent, derives symbol state from legacy endpoints (`get_function_by_address`, paged `data`) so callers receive machine-friendly output instead of a raw 404. -- Local bridge audit (2026-03-21): `get_xrefs_to` / `get_xrefs_from` wrappers are already present in `K:\mcp\GhidraMCP\bridge_mcp_ghidra.py`; if a client still does not surface them, that is a client/tool-refresh issue rather than a missing local-fork endpoint. +- Namespace and class authoring helpers are implemented: `create_namespace(...)`, `create_class(...)`, `list_namespace_members(...)`, `move_symbol_to_namespace(...)`, and `set_function_class(...)`. +- Vtable and struct helpers are implemented: `analyze_vtable(...)`, `create_or_update_struct(...)`, `create_or_update_vtable(...)`, and alias coverage such as `build_vtable` / `set_this_type`. +- `set_function_this_type(...)` supports storage-strategy hints and `apply_class_layout(...)` now soft-fails per-method typing with structured warnings instead of aborting the whole batch. -## Implemented In Local GhidraMCP Fork (2026-03-21) +### Prototype And Storage Modeling -Added endpoints in `K:\mcp\GhidraMCP\src\main\java\com\lauriewired\GhidraMCPPlugin.java` and tools in `K:\mcp\GhidraMCP\bridge_mcp_ghidra.py`: +- The storage-aware prototype endpoint is implemented as `set_function_prototype_storage(...)` with alias `set_storage_aware_prototype(...)`. +- The endpoint accepts declarative `return_type`, `return_storage`, ordered parameter lines (`name|type|storage`), explicit target selectors, varargs, and machine-friendly warnings when hidden `__return_storage_ptr__` state is still present. +- Source-level fixes landed on 2026-04-06 for the two known live correctness bugs: + - `stack:` storage is now parsed before generic Ghidra deserialization so workflow-style bare stack offsets are interpreted consistently. + - exact calling-convention tokens are tried before legacy normalization so 16-bit far conventions such as `__cdecl16far` are not needlessly collapsed to plain `__cdecl` when the exact token is accepted. -- Function boundary repair: - - `create_function_by_address(entry, name, body_start, body_end, comment?)` - - `delete_function_by_address(entry)` - - `get_function_containing(address)` -- Arbitrary code and memory inspection: - - `read_region(start, end)` - - `disassemble_region(start, end)` - - `get_instruction_window(address, before_count, after_count)` - - `search_instructions(query, mode=text|operand|address, limit?)` - - `get_data_uses(address, include_operand_scans=true, limit?)` -- Batch and transactional edits: - - `set_comments(batch)` - - `set_decompiler_comments(batch)` - - `rename_functions_by_address(batch)` - - `apply_program_edit_plan(plan, dry_run=false)` -- Reanalysis and repair helpers: - - `reanalyze_region(start, end)` - - `patch_bytes_and_reanalyze(start, bytes, comment?)` - - `analyze_function_boundaries(start, end)` -- Read-only project access and scripting: - - `get_project_access_info()` - - `get_runtime_capabilities()` - - `open_current_program_readonly(version=-1, make_current=true)` - - `run_readonly_script(script_path|script_text)` with a constrained token denylist policy +### Historical Notes -- Explicit write targeting: - - optional `project_dir`, `project_name`, `folder_path`, `program_name` selectors on `apply_program_edit_plan(...)` - - optional `project_dir`, `project_name`, `folder_path`, `program_name` selectors on `patch_bytes_and_reanalyze(...)` - -Batch encoding used by the current bridge: - -- `set_comments` and `set_decompiler_comments`: list of `(address, comment)` pairs. -- `rename_functions_by_address`: list of `(address, new_name)` pairs. -- `apply_program_edit_plan`: one action per line with `|` separators, for example: - - `create_function_by_address|000c:1234|name|000c:1234|000c:1260|note` - - `delete_function_by_address|000c:1234` - - `rename_function_by_address|000c:1234|new_name` - - `set_disassembly_comment|000c:1234|comment text` - - `set_decompiler_comment|000c:1234|comment text` - -Notes on read-only coverage: - -- `open_current_program_readonly` opens a read-only program object for the currently loaded domain file. -- Project-switch/open-by-path is still not implemented; MCP still operates on the active Ghidra GUI project context. - -### Function boundary repair - -- Missing capability: create a function at an explicit entry with an explicit body start/end. -- Current fallback: local PyGhidra `create-function` and JSON repair plans. -- Why it matters: boundary repair is a recurring part of this project, especially for overlapped or truncated raw functions. -- Proposed MCP addition: `create_function_by_address(entry, name, body_start, body_end, comment?)`. - -- Missing capability: delete an incorrect auto-created function. -- Current fallback: local PyGhidra `delete-function`. -- Why it matters: bad auto-analysis often blocks decompilation of adjacent real functions. -- Proposed MCP addition: `delete_function_by_address(entry)`. - -- Missing capability: get the function containing an arbitrary address. -- Current fallback: local PyGhidra `get-function-containing`. -- Why it matters: no-function windows and overlap investigations depend on quickly mapping instruction hits back to owning functions. -- Proposed MCP addition: `get_function_containing(address)`. - -### Arbitrary code and memory inspection - -- Missing capability: read raw bytes from an arbitrary address range in program memory. -- Current fallback: local PyGhidra `read-region`. -- Why it matters: some important sites are real code bytes that are not yet part of any function object. -- Proposed MCP addition: `read_region(start, end)` returning bytes and a compact hex view. - -- Missing capability: dump nearby instructions around an arbitrary address even when no function exists there. -- Current fallback: custom read-only PyGhidra scripts such as `pyghidra_plans/dump_instruction_windows.py`. -- Why it matters: the `0x4588` investigation depended on inspecting instruction windows in no-function regions. -- Proposed MCP addition: `disassemble_region(start, end)` or `get_instruction_window(address, before_count, after_count)`. - -- Missing capability: scan all instructions for a literal operand or address token. -- Current fallback: custom PyGhidra scripts such as `scan_4588_instruction_uses.py`. -- Why it matters: normal xref APIs can miss useful operand-text hits in partially analyzed regions. -- Proposed MCP addition: `search_instructions(query, mode=text|operand|address, limit?)`. - -- Missing capability: robust data-address xrefs that include operand-based uses even when the reference manager has none. -- Current fallback: instruction-text scans and manual disassembly windows. -- Why it matters: globals like `0x4588` can be heavily used before formal references exist in the database. -- Proposed MCP addition: `get_data_uses(address, include_operand_scans=true)`. - -### Batch and transactional edits - -- Missing capability: apply a small transactional edit plan containing function removals, function creations, renames, and comments. -- Current fallback: local PyGhidra `apply-plan` with JSON. -- Why it matters: boundary repair work is safer when a verified batch can be replayed atomically. -- Proposed MCP addition: `apply_program_edit_plan(plan)` with dry-run support. - -- Missing capability: batch comment creation for a verified address set. -- Current fallback: repeated single-address comment calls or PyGhidra plan files. -- Why it matters: reverse-engineering batches often produce several related evidence comments at once. -- Proposed MCP addition: `set_comments(batch)` and `set_decompiler_comments(batch)`. - -- Missing capability: batch rename-by-address for a small verified set. -- Current fallback: repeated `rename_function_by_address` calls or local plan files. -- Why it matters: verified raw-import ports often land in short, evidence-backed batches. -- Proposed MCP addition: `rename_functions_by_address(batch)`. - -### Reanalysis and repair helpers - -- Missing capability: re-disassemble or reanalyze a small address range after patching bytes or changing function boundaries. -- Current fallback: local scripted repair passes. -- Why it matters: the far-call fixup workflow and boundary recovery both depend on deterministic reanalysis of touched ranges. -- Proposed MCP addition: `reanalyze_region(start, end, options?)`. - -- Missing capability: patch a small byte range and immediately re-disassemble affected instructions. -- Current fallback: local PyGhidra repair scripts. -- Why it matters: the NE far-call fixup pass was a major workflow improvement and is exactly the sort of task MCP should eventually support. -- Proposed MCP addition: `patch_bytes_and_reanalyze(start, bytes, comment?)`. - -- Missing capability: detect likely bad function overlaps or candidate function starts in a small range. -- Current fallback: manual repair plus custom PyGhidra probing. -- Why it matters: overlap repair is one of the main reasons the workflow still has to leave MCP. -- Proposed MCP addition: `analyze_function_boundaries(start, end)` returning overlap warnings and candidate entries. - -### Read-only project access and scripting - -- Missing capability: open a locked project read-only or query a specified project clone directly from MCP. -- Current fallback: local PyGhidra against an unlocked temporary project clone. -- Why it matters: the GUI often owns the main project while read-only inspection still needs to continue. -- Proposed MCP addition: read-only project selection/open options for all analysis endpoints. - -- Missing capability: run a small read-only script for one-off inspections that do not justify a permanent MCP endpoint yet. -- Current fallback: local PyGhidra `run-script --read-only`. -- Why it matters: several repo workflows start as one-off analysis helpers before they prove worth productizing. -- Proposed MCP addition: a constrained `run_readonly_script(script_text|script_path)` endpoint with explicit safety limits. - -### Migrated entries from `ghidra-mcp_wishlist.md` - -Short, concrete gaps hit during live Crusader work. Each entry records what MCP lacked, what fallback was needed, and what a useful MCP feature should look like. - -## Open Gaps (migrated) - -### Byte-pattern search across program memory - -- Status: implemented in local fork (2026-03-26) -- Missing MCP capability: search raw bytes or byte patterns across the current program's mapped segments / address spaces. -- Fallback used: manual `read_region` sweeps plus local Python over the MCP HTTP bridge to scan live Spanish `CRUSADER.EXE` memory for the `jassica16` scan-code table. -- Useful MCP feature: - - `search_bytes(pattern, start?, end?, segment_filter?, max_hits?)` - - accepts hex byte patterns with optional wildcards - - returns exact hit addresses plus nearby hex context -- Why it matters: this would have closed the Spanish cheat-sequence question directly inside MCP instead of forcing ad hoc local scripting. -- Status update (2026-03-26): local fork now exposes `search_bytes(pattern, start?, end?, segment_filter?, max_hits?)` in both the Java plugin and Python bridge; it accepts `??` wildcards, scans mapped memory blocks, and returns machine-friendly hit lines with block names and nearby hex context. - -### Reliable caller/xref recovery for local call sites - -- Status: implemented in local fork (2026-03-26) -- Missing MCP capability: reliable function-call xrefs for near/local calls inside the active program. -- Fallback used: manual `search_instructions` and instruction-window inspection because `get_function_xrefs` did not surface some obvious local call sites in the Spanish keyboard/helper cluster. -- Useful MCP feature: - - improve `get_function_xrefs` so it includes near calls, far calls, tail-call-style jumps, and thunk references consistently - - or add `get_callers(address_or_name, include_near=true, include_far=true, include_jumps=true)` -- Why it matters: tracing helper chains around hidden key-sequence code is slower and less reliable when local callers have to be reconstructed by text search. -- Status update (2026-03-26): local fork now exposes `get_callers(target, include_near=true, include_far=true, include_jumps=true, limit?)`, combining reference-manager hits with instruction-flow scans so local near-call sites show up even when plain xrefs are incomplete; `get_function_xrefs` now reuses the same caller recovery path. - -### Cross-program reads inside the same Ghidra project - -- Status: implemented in local fork (2026-03-26) -- Missing MCP capability: read/query another program or assembly in the same project without switching the active program first. -- Fallback used: indirect comparison against repo notes, workspace-side files, and ad hoc local scripts instead of querying `/CRUSADER.EXE`, `/es/CRUSADER.EXE`, `/Writable/...`, or other domain files side by side through MCP. -- Useful MCP feature: - - allow explicit target selectors on all read/query endpoints, not only write endpoints - - example: `read_region(start, end, project_dir?, project_name?, folder_path?, program_name?)` - - same for strings, functions, xrefs, data uses, decompile, disassemble, symbol lookup, and segment listing -- Why it matters: live localized-build comparisons and writable-copy verification should not require changing the active Ghidra tab just to inspect another program. -- Status update (2026-03-26): read/query endpoints in the local fork now accept optional explicit target selectors (`project_dir`, `project_name`, `folder_path`, `program_name`) and reuse the same target-resolution layer as write flows; this now covers method/class listings, segments, imports/exports, namespaces, data items, function lookup/listing, decompile/disassembly, symbol lookup, regions, instruction scans, strings, xrefs, and data-use queries. - -### Cross-project / cross-program compare tooling - -- Status: implemented in local fork (2026-03-26) -- Missing MCP capability: first-class compare operations between two programs in the same project or across projects. -- Fallback used: manual note-to-note comparison, address math, and repeated per-program queries. -- Useful MCP feature: - - `compare_regions(left_program, left_range, right_program, right_range, mode=bytes|words|disasm|strings)` - - `compare_strings(left_program, right_program, filter?)` - - `compare_functions(left_program, left_addr_or_name, right_program, right_addr_or_name, mode=signature|disasm|decompile|xrefs)` - - machine-readable output with address pairs, similarity score, and differing bytes/instructions/strings -- Why it matters: this would make English vs Spanish / Remorse vs Regret / raw vs live NE comparisons much faster and less error-prone. -- Status update (2026-03-26): local fork now exposes `compare_regions(...)`, `compare_strings(...)`, and `compare_functions(...)` with left/right explicit target selectors; outputs are machine-friendly and include comparison mode, similarity score, and capped difference samples for byte/word, disassembly, string, signature, decompile, and xref views. - -### Port renames/comments/symbol facts between programs - -- Status: implemented in local fork (2026-03-26) -- Missing MCP capability: apply verified names/comments from one program to another program with explicit provenance instead of re-entering them one by one. -- Fallback used: manual rename/comment batches plus external notes to carry mapping provenance. -- Useful MCP feature: - - `port_symbols(source_program, target_program, mappings, apply=names|comments|both, provenance_comment_template?)` - - support direct address maps, segment-relative maps, and user-supplied CSV/JSON mapping tables - - dry-run mode showing collisions and ambiguous targets -- Why it matters: porting verified English or raw-import findings into Spanish or live NE targets is a recurring workflow. -- Status update (2026-03-26): local fork now exposes `port_symbols(mappings, apply=names|comments|both, provenance_comment_template?, dry_run?)` with `source_*` and `target_*` selectors; the bridge accepts a verified list of source/target address pairs and the plugin ports names plus PRE/EOL comments with optional provenance text and explicit-target save support. - -### Project inventory / browse endpoint - -- Status: implemented in local fork (2026-03-26) -- Missing MCP capability: list project folders and available programs through MCP. -- Fallback used: repo-side assumptions and local tooling; the current MCP read tools expose only the active program cleanly. -- Useful MCP feature: - - `list_project_programs(project_dir?, project_name?, folder_path?, recursive=true)` - - returns folder path, program name, read-only/writable/versioned state, and whether it is currently open -- Why it matters: comparing or porting across programs is awkward without a discoverable inventory of assemblies already in the Ghidra project. -- Status update (2026-03-26): local fork now exposes `list_project_programs(project_dir?, project_name?, folder_path?, recursive=true)` plus a `project_programs` alias; it walks project folders and returns machine-friendly program inventory lines with folder path, program name, content type, read-only/versioned flags, and current-open state. - -### Class / namespace authoring for C++ lifting - -- Missing MCP capability: create and manage Ghidra class or namespace symbols, then move existing functions under those owners as methods. -- Current fallback: manual Ghidra GUI edits in the Symbol Tree or one-off local scripts outside the normal MCP workflow. -- Why it matters: the Remorse binary already shows repeated ctor/dtor patterns, stable vtable roots, and class-like object families, but the current MCP workflow can only rename flat functions. That blocks a disciplined shift from procedural naming toward grouped C++-style ownership. -- Proposed MCP addition: - - `create_namespace(name, parent_path?, kind=namespace|class)` - - `move_symbol_to_namespace(symbol_address_or_name, namespace_path, new_name?)` - - `set_function_class(function_address, class_path, method_name?, this_param_name?, calling_convention?)` - - machine-friendly responses that include the final symbol path and any rename collisions. -- Status update (2026-04-05): local fork now exposes `create_namespace(...)`, `list_namespace_members(...)`, `move_symbol_to_namespace(...)`, and `set_function_class(...)` in both the Java plugin and Python bridge. The implementation supports explicit target selectors, dry-run moves, collision policies (`fail|keep_existing|rename_incoming`), and compatibility aliases (`create_class`, `move_function_to_class`). - -### Vtable / OO recovery helpers for class reconstruction - -- Missing MCP capability: first-class helpers for identifying vtables, attaching function slots to candidate classes, and materializing class/instance layouts from evidence-backed data. -- Current fallback: manual note collation from decompiler/disassembly output plus ad hoc datatype work in the GUI. -- Why it matters: the repo already has enough evidence to start lifting major families into C++ classes, but a recompilable source path needs more than renamed functions. It needs reproducible vtable maps, `this`-pointer typing, field layouts, inheritance guesses, and explicit provenance for each class model. -- Proposed MCP addition: - - `analyze_vtable(address, slot_count?, namespace_path?)` - - `create_or_update_struct(name, size?, fields)` - - `set_function_this_type(function_address, struct_name, this_storage=stack|register|farptr)` - - `apply_class_layout(class_path, instance_struct, vtable_struct?, methods)` - - optional dry-run output showing inferred slots, unresolved targets, and conflicting field/size evidence. -- Status update (2026-04-05): local fork now exposes `analyze_vtable(...)`, `create_or_update_struct(...)`, `create_or_update_vtable(...)`, `set_function_this_type(...)`, and `apply_class_layout(...)` in both layers. Struct and vtable authoring accept line-encoded field/slot batches from the bridge, `set_function_this_type(...)` updates the first parameter to a typed `this` pointer while preserving storage when possible, and `apply_class_layout(...)` batches namespace moves plus `this` typing with dry-run support. Compatibility aliases now also cover `set_this_type` and `build_vtable`. \ No newline at end of file +- If a future pass hits a new MCP gap, add it under `Remaining TODOs` and move it to `Done / Implemented In Local Fork` once the local source and bridge support are both in place. \ No newline at end of file diff --git a/plan-mid.md b/plan-mid.md index 6f86fb0..dd43c1f 100644 --- a/plan-mid.md +++ b/plan-mid.md @@ -15,7 +15,7 @@ Detailed completed analysis belongs in the files under `docs/`, not in this plan ## Progress Snapshot -Latest verified batch: [docs/combat-dat.md](docs/combat-dat.md) now closes the shipped combat-tactic data file as a documentation target instead of leaving it as a scratch-note reference. Current best read is that all local Remorse/Regret variants share one identical `14`-record `COMBAT.DAT`, the live NE database now already has the right tactic/process field anchors (`combatDatTacticPtr`, `combatDatTacticCurOffset`, `combatDatBlockNo`, `tacticNo`) plus setup helpers, and the shipped opcode subset is now decoded into a full human-readable tactic catalog using direct binary parsing plus the ScummVM Crusader attack-process interpreter as a reference model. +Latest verified batch: [docs/psx/psx.md](docs/psx/psx.md) now tightens the PSX render-side model enough that a renderer change is easier to justify. The late `LSET*.WDL` `DAT_800758d8` candidate still decodes from the embedded `+0x38` parse start and still recovers the first `111` real map-9 art bindings, while the executable pass still closes the second world-facing render lane: `FUN_80040f78` is the stage-2 queued-object projector/builder for `DAT_80078b70` / `DAT_80067472`, `FUN_80041144` consumes that queue, and `FUN_80044fec` resets it each frame. The new renderer-side bridge is intentionally narrow but now verified in live output too: because the generic raw-file `DAT_800758cc/d0/d4` export is still missing, the cache builder temporarily seeds the executable-backed `0x0050` selector map (`0..3 -> frame 0..3`), and retail map `9` now exports `type=80 state_selector=1 chosen_frame=1` instead of the old forced frame `0`. Alongside the earlier palette-source closure (`obj+0xa0` is the original authored record pointer), the `DAT_80067720` event/control-list evidence, and the `DAT_8006769c` persistent `0x3e00` substrate/state evidence, this means the remaining map-viewer gap is now better bounded: part of the missing world content likely lives in the separate stage-2 queued-object pass, while the rest still depends on restoring the generic `DAT_800758cc/d4` export path and then closing state-to-art selection for unresolved families such as `0x0042` and `0x0049`. - Overall useful decompilation progress: about 58% - Reasonable uncertainty band: about 55% to 63% @@ -56,7 +56,7 @@ Latest verified batch: [docs/combat-dat.md](docs/combat-dat.md) now closes the s - The owner-loaded body/range model is no longer speculative. Class-selection uses `class_id + 2`, header/subentry math matches extracted corpus output, and concrete body windows for `NPCTRIG`, `EVENT`, and related families are now verified. - The map-renderer/documentation lane now has a stronger shape/controller crosswalk. Recent closures include `CRUMORPH`, `NPC_ONLY`, `WATCHNS`, `WATCHEW`, `CRYOBOX`, `CRAZYEW`, `CRAZYNS`, `VIDEOBOX`, `PANELEW`, `GENERATR`, and cross-game `DEATHBOX`, with viewer-side links kept conservative where actor-side state is still runtime-only. - The command-line/startup lane is much tighter across both games: `-warp [x y z]`, `-mapoff`, `-egg`, startup teleporter selection, and the `-u` EUSECODE root override all now have practical behavior models instead of folklore-level descriptions. -- The PSX lane is no longer just side inventory. Retail/pre-alpha bundle loading, mission-briefing/passcode structure, and the reduced-content pre-alpha disc now have dedicated notes and enough stable naming to support future targeted passes. +- The PSX lane is no longer just side inventory. Retail/pre-alpha bundle loading, mission-briefing/passcode structure, the reduced-content pre-alpha disc, and now the retail map object's last projection stage all have dedicated notes and enough stable naming to support future targeted passes. - The Remorse class-lift preparation lane now has a usable document cluster: overall plan, candidate inventory, endpoint spec, ABI constraints, family notes for `EntityDispatchEntry` and `SpriteNode`, a conservative `Entity` family split, a VM runtime/owner-resource layout note, a compatibility-header draft, and one grouped resume index. - The same class-lift prep lane is now more execution-ready: the `0x4588` broker family has its own focused object note, the toolchain story has a dedicated fingerprint-evidence note, and there is now a concrete first-batch class-authoring checklist ready for the first MCP-backed namespace/struct/vtable pass. - The live Remorse VM class-lift lane also recovered from a decompiler breakage in `Remorse::EntityVmRuntime::Create`: the root cause was a hidden-return-storage allocator helper signature at `1000:42e2`, `Create` now decompiles again, and the provisional `/Remorse/EntityVmSlotEntry` datatype now exists with the stable `+0x1e..+0x24` buffer-pair fields named. @@ -68,6 +68,19 @@ Latest verified batch: [docs/combat-dat.md](docs/combat-dat.md) now closes the s - The matching MCP gap is also clearer now: the old `apply_class_layout` dry-run null failure no longer reproduces for `/Remorse/EntityVmContext`, but the real write path still behaves like the older storage-preserving build. Actual `apply_class_layout` and direct `set_function_this_type` calls on the context lifecycle methods still fail with `Storage size does not match data type size: 2`, and live `run_write_script(...)` still returns `404 No context found for request` even with explicit target selectors. - Closing the GUI and dropping to the local PyGhidra fallback then landed the blocked context typing work cleanly: `CreateFromSlotIndex`, `FreeBuffer`, `SyncGlobalValueAndDispatch`, `Destroy`, `Save`, and `Load` now all carry `EntityVmContext * this` as their first parameter in `CRUSADER.EXE`, which confirms the newer dynamic-storage rewrite is sound even though the live MCP session still is not taking it. - The next live verification pass tightened two details. First, the new checked-in storage-aware prototype endpoint still is not the build currently serving the active GUI session: direct live POSTs to `/set_function_prototype_storage` still answered with the legacy `set_function_prototype` failure body, and the alias route still returned `404 No context found for request`. Second, the direct callers of `CreateFromSlotIndex` still mostly consume the result as a base process object, so the current conservative `UsecodeProcess *` return should stay in place until the inheritance-aware datatype story is explicit. +- The refreshed live MCP build moved that forward materially: `set_function_prototype_storage(...)` now reaches the real storage-aware implementation in-session and the active-program `run_write_script(...)` path now executes instead of failing with `404`. The new blocker is narrower and more concrete: bare `stack:` offsets at `10` and above currently need `0x` prefixes to preserve the intended stack slots, `__cdecl16far` still normalizes to plain `__cdecl`, and `Create` still cannot collapse to a single `EntityVmRuntime * this` because the datatype itself still resolves to a 2-byte pointer size. +- The same live batch also tightened the slot-entry class model: `/Remorse/EntityVmSlotEntry` now carries `match_key_farptr`, `owner_chunk_count`, and `owner_data_base` in addition to the earlier owner-buffer and chunk-state tails, which makes `InitSlotOwnerBuffers`, `AcquireSlotForEntity`, and `EnsureSlotChunkLoaded` read more like object code and less like anonymous offset arithmetic. +- The next live batch tightened the adjacent helper map too: the old unnamed `1420:1d72`, `1420:1d8d`, and `1420:1e17` helpers are now `entity_vm_runtime_get_slot_chunk_ptr_at_offset`, `entity_vm_runtime_release_slot_chunk_ref`, and `entity_vm_runtime_try_unload_slot_chunk`, which makes the slot-entry lifecycle around load, refcount release, and conditional unload materially easier to navigate. +- The latest live batch turned that helper lane into a small shared record model: `/Remorse/EntityVmLoadedChunkRecord` now carries the stable `next_*`, `saved_chunk_*`, `slot_index`, and `chunk_index` anchors, `entity_vm_runtime_try_unload_slot_chunk` now takes `EntityVmLoadedChunkRecord *` and returns `byte` in `AL`, and `entity_vm_runtime_apply_to_matching_owner_rows` now iterates over a typed loaded-chunk record instead of anonymous stack-pair scratch state. +- The adjacent interpreter-side lane is slightly tighter too: local helper `1418:003c` is now `interpreter_pop_saved_farptr`, and the only verified `Interpreter_NextUsecodeOp` release path at `1418:3330` is commented as a save/restore boundary around `entity_vm_runtime_release_slot_chunk_ref` instead of being left as anonymous stack traffic. +- The live class-authoring state moved forward too: `Remorse::EntityVmSlotEntry` now exists as a real class owner in `CRUSADER.EXE`, `CreateOrClear` moved under it with an explicit `this` parameter and `AX` pointer return, and the runtime-local chunk helpers plus owner-row iterator/debug path now sit under `Remorse::EntityVmRuntime` instead of Global. +- The next live pass improved the runtime class surface further: `GetSlotChunkPtrAtOffset` now carries the recovered `runtime_farptr/slot_index/chunk_index/intra_chunk_offset` signature and still returns its far pointer in `DX:AX`, while `ApplyToMatchingOwnerRows` now carries the recovered `runtime_farptr/slot_index_filter/chunk_index_filter` signature and still returns its boolean in `AL`. +- The latest live pass removed the old runtime-wide 2-byte-`this` bottleneck for this cluster: `Create`, `InitSlots`, `ReleaseSlots`, `DebugDumpSlotMemory`, `ReleaseSlotChunkRef`, `GetSlotChunkPtrAtOffset`, `TryUnloadSlotChunk`, `ApplyToMatchingOwnerRows`, and `EnsureSlotChunkLoaded` now all accept an explicit 4-byte `EntityVmRuntime * this` through `/Remorse/EntityVmRuntime *32` custom storage in-session. The remaining live type gap is narrower again: exact `/Remorse/EntityVmSlotEntry *32` return/parameter typing still fails on `AcquireSlotForEntity` and `InitSlotOwnerBuffers`, so those positions are currently held as neutral `dword` placeholders instead of prettier but broken slot-entry pointer types. +- That slot-entry gap is now closed too, and the pointer cleanup widened beyond the runtime core: `AcquireSlotForEntity` now returns `EntityVmSlotEntry *32`, `InitSlotOwnerBuffers` now accepts `EntityVmSlotEntry *32`, `EntityVmOwnerResource::{Create,Destroy}` now carry explicit 4-byte `this`, and the simple `EntityVmContext` lifecycle methods now do the same. The main remaining VM signature outlier is `CreateFromSlotIndex`, whose argument pack still needs caller-side recovery rather than just pointer-width cleanup. +- The next family switch also landed: `Remorse::UsecodeDebuggerBreakState` now exists as a real class owner with a `0x2f2` provisional datatype plus a first method batch for construction, breakpoint gating, breakpoint table helpers, callstack helpers, and step-state helpers. +- That debugger batch is already tighter than the initial shell: `1408:01a5` is now verified as `BreakpointRemove`, `1408:02f5` is now verified as `CallstackPushFrame`, breakpoint entries are recovered as `0x0b` inline-name-plus-line records, and callstack entries are recovered as `0x15` inline-name-plus-three-dword records even though the trailing dword semantics remain open. +- The next pass landed the debugger struct rewrite in-session too: `/Remorse/UsecodeDebuggerBreakpointEntry`, `/Remorse/UsecodeDebuggerCallstackEntry`, and the updated `/Remorse/UsecodeDebuggerBreakState` array layout now exist live instead of only in notes, and the only verified `CallstackPushFrame` caller now narrows those three trailing dwords to `source_stream_target_farptr`, `current_frame_payload_farptr`, and still-neutral `aux_farptr`. +- `CreateFromSlotIndex` is no longer a raw anonymous pack either: the live signature now separates `owner_source_farptr`, `pitemno_farptr`, `mode_flags`, `slot_index`, `value_add_offset`, `intra_chunk_offset`, `ucparam_farptr`, and `ucparamsize`, with explicit `AX:DX` return storage restored even though the endpoint still textualizes the function conservatively as plain `dword __cdecl`. ### Areas That Are No Longer Live Priorities @@ -101,7 +114,8 @@ Latest verified batch: [docs/combat-dat.md](docs/combat-dat.md) now closes the s 4. Tighten the higher-slot wrapper ladder around `0005:3115..31da` so future event-label promotion depends on compiled caller behavior instead of external tables. 5. Tighten the seg006 masked-helper caller chains so the local state-selector/value family can be tied to concrete gameplay subsystems. 6. Classify the paired seg070 loops behind `entity_vm_runtime_owner_resource_create`, especially which temporary buffers and record schemas each family populates. -7. Stay on the Remorse VM class-lift batch while the repaired runtime lane is warm: redeploy or otherwise verify the live storage-aware prototype and storage-fallback class-layout builds so future context and slot-entry typing can stay in-session, then push `/Remorse/EntityVmSlotEntry` one step deeper through `EnsureSlotChunkLoaded` and adjacent slot helpers, keep `CreateFromSlotIndex` on the conservative `UsecodeProcess *` return until the base-process inheritance model is explicit, and keep the storage-aware `this` investigation focused on `Create` specifically now that `InitSlots` / `ReleaseSlots` and the broader context lifecycle are already typed. +7. Stay on the Remorse VM class-lift batch while the repaired runtime lane is warm: use the now-recovered `CreateFromSlotIndex` caller pack to decide whether any remaining scalar positions deserve stronger typedefs, but keep the return semantically conservative until the base-process inheritance model is explicit enough to justify a prettier live return type. +8. Continue the `UsecodeDebuggerBreakState` family from the now-landed live array layout: identify the exact gameplay semantics of `source_stream_target_farptr` and `current_frame_payload_farptr`, then decide whether `aux_farptr` should remain neutral and whether any further seg1408 or interpreter-side helpers belong under that class before widening into another family. 8. In the local GhidraMCP upgrade lane, add support for dual POST body decoding (`application/json` plus form-urlencoded) and a constrained live write-side PyGhidra endpoint family so future custom-storage/type repairs can stay inside the active MCP session when Python is enabled. 9. Promote additional ledger rows directly from already-verified docs and live comments, especially where segments already deserve `Foothold`, `Partial`, or `Deep`; the new seg029 step-aware sweep batch, seg031 queue-release batch, and seg090 movement-helper batch should be the immediate template. 10. If the VM lane stalls, revisit `000e:ffb0` from the now-better-constrained video/audio caller windows and try to recover an adjacent non-overlapped helper before attempting broad boundary repair. diff --git a/tools/psx_export_map_type_probe.py b/tools/psx_export_map_type_probe.py new file mode 100644 index 0000000..1297379 --- /dev/null +++ b/tools/psx_export_map_type_probe.py @@ -0,0 +1,586 @@ +from __future__ import annotations + +import argparse +import json +import math +import zlib +from dataclasses import dataclass +from pathlib import Path + + +DEFAULT_INPUT = Path(r"k:\ghidra\Crusader_Decomp\out\psx_wdl\L0\post_audio_region_01_00007448_paired_u16x6.json") +DEFAULT_OUTPUT_ROOT = Path(r"k:\ghidra\crusader_map_viewer\map_renderer\site\data") +DEFAULT_MAP_ID = 0 +DEBUG_SCENE_VERSION = "psx-region01-type-placement-probe-v1" +ALLOWED_U5 = {0x20, 0x22, 0x30} + + +@dataclass(frozen=True) +class PlacementRecord: + side: str + row_index: int + record_index: int + u0: int + u1: int + u2: int + u3: int + u4: int + u5: int + + +@dataclass(frozen=True) +class PlaceholderSprite: + type_id: int + variant: int + lane: int + atlas_id: str + file_name: str + shape_code: int + width: int + height: int + origin_x: int + origin_y: int + display_name: str + description: str + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Export a PSX LSET region-01 type-placement probe into the current map viewer static data." + ) + parser.add_argument("--input", type=Path, default=DEFAULT_INPUT, help="Path to paired_u16x6 JSON export") + parser.add_argument("--output-root", type=Path, default=DEFAULT_OUTPUT_ROOT, help="Map viewer site/data root") + parser.add_argument("--game-id", default="psx-remorse", help="Catalog game id to write") + parser.add_argument("--game-label", default="No Remorse (PSX)", help="Catalog game label") + parser.add_argument("--map-id", type=int, default=DEFAULT_MAP_ID, help="Numeric map id for the static scene") + parser.add_argument( + "--map-label", + default="PSX LSET1/L0 Type Placement Probe", + help="Human-readable map label for the catalog entry", + ) + parser.add_argument( + "--screen-scale", + type=int, + default=1, + help="Multiplier applied to the raw region-01 coordinate delta when placing probe sprites", + ) + return parser.parse_args() + + +def load_records(path: Path) -> list[PlacementRecord]: + payload = json.loads(path.read_text(encoding="utf-8")) + records: list[PlacementRecord] = [] + for row in payload.get("rows", []): + row_index = int(row["index"]) + for side_index, side in enumerate(("left", "right")): + values = row.get(side) + if not isinstance(values, dict): + continue + record = PlacementRecord( + side=side, + row_index=row_index, + record_index=row_index * 2 + side_index, + u0=int(values.get("u0", 0)), + u1=int(values.get("u1", 0)), + u2=int(values.get("u2", 0)), + u3=int(values.get("u3", 0)), + u4=int(values.get("u4", 0)), + u5=int(values.get("u5", 0)), + ) + if is_structured_candidate(record): + records.append(record) + records.sort(key=lambda record: (record.u2, record.u1, record.u5, record.u4, record.u0, record.record_index)) + return records + + +def is_structured_candidate(record: PlacementRecord) -> bool: + if record.u0 >= 0x200: + return False + if record.u1 == 0 and record.u2 == 0: + return False + if record.u1 >= 0x4000 or record.u2 >= 0x4000: + return False + if record.u3 > 0x20 or record.u4 > 0x04: + return False + if record.u5 not in ALLOWED_U5: + return False + return True + + +def chunk(tag: bytes, payload: bytes) -> bytes: + crc = zlib.crc32(tag) + crc = zlib.crc32(payload, crc) & 0xFFFFFFFF + return len(payload).to_bytes(4, "big") + tag + payload + crc.to_bytes(4, "big") + + +def write_png_rgba(path: Path, rgba: bytes, width: int, height: int) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + stride = width * 4 + scanlines = bytearray() + for row in range(height): + start = row * stride + scanlines.append(0) + scanlines.extend(rgba[start : start + stride]) + ihdr = width.to_bytes(4, "big") + height.to_bytes(4, "big") + bytes((8, 6, 0, 0, 0)) + idat = zlib.compress(bytes(scanlines), level=9) + png = b"\x89PNG\r\n\x1a\n" + chunk(b"IHDR", ihdr) + chunk(b"IDAT", idat) + chunk(b"IEND", b"") + path.write_bytes(png) + + +def clamp_channel(value: int) -> int: + return max(0, min(255, value)) + + +def color_from_key(type_id: int, variant: int, lane: int) -> tuple[int, int, int]: + seed = (type_id * 1103515245 + variant * 12345 + lane * 2654435761) & 0xFFFFFFFF + red = 64 + (seed & 0x7F) + green = 72 + ((seed >> 7) & 0x7F) + blue = 80 + ((seed >> 14) & 0x7F) + if lane == 0x22: + green += 24 + elif lane == 0x30: + blue += 28 + return clamp_channel(red), clamp_channel(green), clamp_channel(blue) + + +def placeholder_geometry(lane: int) -> tuple[int, int, int, int]: + if lane == 0x30: + return 64, 64, 32, 52 + if lane == 0x22: + return 64, 40, 32, 28 + return 64, 32, 32, 20 + + +def set_pixel(rgba: bytearray, width: int, x: int, y: int, color: tuple[int, int, int, int]) -> None: + if x < 0 or y < 0 or x >= width: + return + index = (y * width + x) * 4 + if index < 0 or index + 3 >= len(rgba): + return + rgba[index : index + 4] = bytes(color) + + +def build_placeholder_rgba(type_id: int, variant: int, lane: int) -> tuple[bytes, int, int, int, int]: + width, height, origin_x, origin_y = placeholder_geometry(lane) + rgba = bytearray(width * height * 4) + fill_rgb = color_from_key(type_id, variant, lane) + border_rgb = tuple(clamp_channel(channel - 36) for channel in fill_rgb) + top = 4 if lane != 0x30 else 16 + mid_y = top + 8 + bottom = height - 4 + center_x = width // 2 + half_span = width // 2 - 4 + for y in range(top, bottom): + if lane == 0x30 and y < mid_y: + continue + rel = (y - mid_y) / max(1, bottom - mid_y) + span = max(4, int(half_span * (1.0 - abs(rel)))) + left = center_x - span + right = center_x + span + for x in range(left, right + 1): + alpha = 220 + if x in (left, right) or y in (top, bottom, mid_y): + color = (*border_rgb, 255) + else: + color = (*fill_rgb, alpha) + set_pixel(rgba, width, x, y, color) + if lane == 0x30: + for y in range(8, mid_y): + left = center_x - 12 + right = center_x + 12 + for x in range(left, right + 1): + color = (*fill_rgb, 208) if x not in (left, right) else (*border_rgb, 255) + set_pixel(rgba, width, x, y, color) + stripe_count = min(variant, 3) + for stripe in range(stripe_count): + stripe_y = bottom - 6 - stripe * 4 + for x in range(center_x - 10, center_x + 11): + set_pixel(rgba, width, x, stripe_y, (255, 255, 255, 220)) + return bytes(rgba), width, height, origin_x, origin_y + + +def build_placeholder_sprites(output_root: Path, records: list[PlacementRecord]) -> dict[tuple[int, int, int], PlaceholderSprite]: + maps_root = output_root / "maps" / "psx-remorse" / "map-0" + sprites: dict[tuple[int, int, int], PlaceholderSprite] = {} + keys = sorted({(record.u0, record.u4, record.u5) for record in records}) + for index, (type_id, variant, lane) in enumerate(keys): + rgba, width, height, origin_x, origin_y = build_placeholder_rgba(type_id, variant, lane) + atlas_id = f"atlas-type-{type_id:04x}-variant-{variant}-lane-{lane:04x}" + file_name = f"type_{type_id:04X}_variant_{variant}_lane_{lane:04X}.png" + write_png_rgba(maps_root / file_name, rgba, width, height) + shape_code = 0xA000 + index + sprites[(type_id, variant, lane)] = PlaceholderSprite( + type_id=type_id, + variant=variant, + lane=lane, + atlas_id=atlas_id, + file_name=file_name, + shape_code=shape_code, + width=width, + height=height, + origin_x=origin_x, + origin_y=origin_y, + display_name=f"PSX type {type_id:04X} variant {variant} lane {lane:04X}", + description=( + "Region-01 placement probe using PSX type/lane placeholders. " + "This scene is intended to validate coordinate coherence before final art binding." + ), + ) + return sprites + + +def build_scene( + records: list[PlacementRecord], + placeholder_sprites: dict[tuple[int, int, int], PlaceholderSprite], + game_id: str, + game_label: str, + map_id: int, + screen_scale: int, +) -> dict[str, object]: + if not records: + raise ValueError("No structured PSX placement records survived the filter.") + + min_x = min(record.u1 for record in records) + max_x = max(record.u1 for record in records) + min_y = min(record.u2 for record in records) + max_y = max(record.u2 for record in records) + + atlases: list[dict[str, object]] = [] + sprites: list[dict[str, object]] = [] + shape_definitions: list[dict[str, object]] = [] + for sprite in placeholder_sprites.values(): + atlases.append({ + "id": sprite.atlas_id, + "fileName": sprite.file_name, + "width": sprite.width, + "height": sprite.height, + }) + sprites.append({ + "id": f"sprite:{sprite.shape_code}:0", + "atlasId": sprite.atlas_id, + "shape": sprite.shape_code, + "frame": 0, + "x": 0, + "y": 0, + "width": sprite.width, + "height": sprite.height, + "xoff": sprite.origin_x, + "yoff": sprite.origin_y, + }) + shape_definitions.append({ + "id": f"shape:{sprite.shape_code}", + "shape": sprite.shape_code, + "shapeHex": f"0x{sprite.shape_code:04x}", + "family": None, + "label": "Terrain", + "kind": "terrain", + "displayName": sprite.display_name, + "description": sprite.description, + "dimensions": {"x": sprite.width, "y": sprite.height, "z": 1}, + "visibilityTags": ["psx", "type-probe"], + "traits": { + "editor": False, + "roof": False, + "oob": False, + "occluding": False, + "translucent": False, + "solid": False, + "fixed": False, + "land": True, + "draw": True, + "invitem": False, + "animType": 0, + }, + "catalogEntry": { + "humanReadableId": sprite.display_name, + "description": sprite.description, + "roof": None, + "semitransparency": None, + "oob": None, + }, + "catalogOverrides": {"roof": None, "semitransparency": None, "oob": None}, + "tableFallback": None, + }) + + items: list[dict[str, object]] = [] + map_source_items: list[dict[str, object]] = [] + min_screen_left = None + min_screen_top = None + final_right = 0 + final_bottom = 0 + + for draw_order, record in enumerate(records): + sprite = placeholder_sprites[(record.u0, record.u4, record.u5)] + anchor_x = (record.u1 - min_x) * screen_scale + anchor_y = (max_y - record.u2) * screen_scale + screen_left = anchor_x - sprite.origin_x + screen_top = anchor_y - sprite.origin_y + min_screen_left = screen_left if min_screen_left is None else min(min_screen_left, screen_left) + min_screen_top = screen_top if min_screen_top is None else min(min_screen_top, screen_top) + final_right = max(final_right, screen_left + sprite.width) + final_bottom = max(final_bottom, screen_top + sprite.height) + item = { + "id": f"item:{draw_order}:psx-region01-type:{record.side}:{record.row_index}", + "mapSourceIndex": draw_order, + "drawOrder": draw_order, + "kind": "terrain", + "label": "Terrain", + "source": "psx-region01-type-probe", + "world": {"x": record.u1, "y": record.u2, "z": record.u3}, + "mapNum": record.u5, + "npcNum": record.u4, + "nextItem": 0, + "quality": record.u0, + "frame": 0, + "screen": { + "left": screen_left, + "top": screen_top, + "right": screen_left + sprite.width, + "bottom": screen_top + sprite.height, + "width": sprite.width, + "height": sprite.height, + "anchorX": anchor_x, + "anchorY": anchor_y, + }, + "flags": { + "raw": record.u3, + "hex": f"0x{record.u3:04X}", + "invisible": False, + "flipped": False, + }, + "presentation": {"opacity": 1, "visibilityDefault": True}, + "notes": [ + f"PSX region-01 type-placement probe record {record.side} row {record.row_index}", + f"raw words: {record.u0:04X} {record.u1:04X} {record.u2:04X} {record.u3:04X} {record.u4:04X} {record.u5:04X}", + f"placeholder mapping: type={record.u0:04X} variant={record.u4} lane={record.u5:04X}", + ], + "frameSize": { + "width": sprite.width, + "height": sprite.height, + "xoff": sprite.origin_x, + "yoff": sprite.origin_y, + }, + "egg": None, + "npcPreview": None, + "itemPreview": None, + "shapeDefId": f"shape:{sprite.shape_code}", + "spriteId": f"sprite:{sprite.shape_code}:0", + } + items.append(item) + map_source_items.append( + { + "x": record.u1, + "y": record.u2, + "z": record.u3, + "shape": sprite.shape_code, + "frame": 0, + "flags": record.u3, + "quality": record.u0, + "npcNum": record.u4, + "mapNum": record.u5, + "nextItem": 0, + "source": "psx-region01-type-probe", + "rawWords": [record.u0, record.u1, record.u2, record.u3, record.u4, record.u5], + "recordSide": record.side, + "rowIndex": record.row_index, + "typeId": record.u0, + "lane": record.u5, + "variant": record.u4, + "screenLeft": screen_left, + "screenTop": screen_top, + } + ) + + x_shift = -min(0, min_screen_left or 0) + y_shift = -min(0, min_screen_top or 0) + final_right = 0 + final_bottom = 0 + for item in items: + screen = item["screen"] + screen["left"] += x_shift + screen["right"] += x_shift + screen["top"] += y_shift + screen["bottom"] += y_shift + screen["anchorX"] += x_shift + screen["anchorY"] += y_shift + final_right = max(final_right, screen["right"]) + final_bottom = max(final_bottom, screen["bottom"]) + + lane_counts: dict[str, int] = {} + for record in records: + lane_key = f"0x{record.u5:04X}" + lane_counts[lane_key] = lane_counts.get(lane_key, 0) + 1 + + return { + "build": { + "version": DEBUG_SCENE_VERSION, + "fingerprint": "psx-lset1-l0-region01-type-placement", + "generatedAt": "2026-04-06T00:00:00.000Z", + "cacheMode": "single-scene", + }, + "metadata": { + "game": game_id, + "gameLabel": game_label, + "map": map_id, + "rawItemCount": len(records), + "itemCount": len(records), + "paintedItemCount": len(records), + "occludedItemCount": 0, + "invalidItemCount": 0, + "invalidItems": [], + "sceneSummary": { + "atlasCount": len(atlases), + "spriteCount": len(sprites), + "helperCount": 0, + "kindCounts": {"terrain": len(records)}, + "sourceCounts": {"psx-region01-type-probe": len(records)}, + "topFamilies": [{"family": None, "count": len(records)}], + }, + "usage": { + "status": "research", + "confidence": "medium", + "knownHints": [ + "Uses region-01 coordinate/type records with deterministic placeholder sprites instead of the invalid raw-bundle art mapping.", + "This scene is for validating PSX map coherence and placement clustering before final art binding.", + "Palette and final sprite lookup are intentionally deferred in this probe.", + ], + "itemMapNums": sorted({record.u5 for record in records}), + "nonzeroItemMapNums": sorted({record.u5 for record in records if record.u5 != 0}), + "npcLinkedItemCount": sum(1 for record in records if record.u4 != 0), + "note": "Type-placement probe using region-01 records. This is the current best map-coherence debug scene, not final art output.", + "hasRenderableContent": True, + "game": game_id, + "map": map_id, + }, + "baseItemSummary": { + "roofItems": 0, + "editorItems": 0, + "eggFamilyItems": 0, + "invisibleFlaggedItems": 0, + "npcLinkedItems": sum(1 for record in records if record.u4 != 0), + }, + "sorter": "psx_region01_type_probe", + "isEmpty": False, + "emptyReason": None, + "bounds": { + "screenLeft": 0, + "screenTop": 0, + "screenRight": final_right, + "screenBottom": final_bottom, + "width": final_right, + "height": final_bottom, + }, + "zoom": {"min": 0.01, "max": 8, "step": 0.1, "initial": 1}, + "buildFingerprint": "psx-lset1-l0-region01-type-placement", + "generatedAt": "2026-04-06T00:00:00.000Z", + "probeStats": { + "screenScale": screen_scale, + "typeCount": len({record.u0 for record in records}), + "spriteKeyCount": len(placeholder_sprites), + "laneCounts": lane_counts, + "u1Range": [min_x, max_x], + "u2Range": [min_y, max_y], + }, + }, + "atlases": sorted(atlases, key=lambda entry: entry["id"]), + "sprites": sorted(sprites, key=lambda entry: entry["id"]), + "shapeDefinitions": sorted(shape_definitions, key=lambda entry: entry["shape"]), + "items": items, + "mapSource": { + "formatVersion": DEBUG_SCENE_VERSION, + "game": game_id, + "mapId": map_id, + "itemRecordSize": 12, + "itemCount": len(map_source_items), + "originalByteLength": len(map_source_items) * 12, + "exportFileName": None, + "defaultTeleportEggShape": None, + "defaultTeleportEggShapeHex": None, + "defaultTeleportEggFrame": None, + "defaultTeleporterEggFrame": None, + "defaultTeleportDestinationEggFrame": None, + "binaryExportSupported": False, + "items": map_source_items, + }, + } + + +def write_catalog_entry( + output_root: Path, + game_id: str, + game_label: str, + map_id: int, + map_label: str, + raw_item_count: int, + shape_definitions: list[dict[str, object]], +) -> None: + catalog_path = output_root / "catalog.json" + catalog = json.loads(catalog_path.read_text(encoding="utf-8")) if catalog_path.exists() else {"games": []} + games = [game for game in catalog.get("games", []) if game.get("id") != game_id] + games.append( + { + "id": game_id, + "label": game_label, + "mapCount": 1, + "maps": [{"id": map_id, "label": map_label, "rawItemCount": raw_item_count}], + } + ) + games.sort(key=lambda game: game["label"]) + catalog["games"] = games + catalog_path.write_text(json.dumps(catalog, indent=2) + "\n", encoding="utf-8") + + catalogs_dir = output_root / "catalogs" + catalogs_dir.mkdir(parents=True, exist_ok=True) + csv_lines = [ + "shape_code,human_readable_id,description,roof,semitransparency,OOB,categorization,qualities" + ] + for definition in shape_definitions: + csv_lines.append( + ",".join( + [ + definition["shapeHex"], + definition["displayName"], + definition["description"], + "", + "", + "", + definition["kind"], + "", + ] + ) + ) + (catalogs_dir / f"{game_id}.csv").write_text("\n".join(csv_lines) + "\n", encoding="utf-8") + + +def main() -> int: + args = parse_args() + records = load_records(args.input) + maps_root = args.output_root / "maps" / args.game_id / f"map-{args.map_id}" + maps_root.mkdir(parents=True, exist_ok=True) + placeholder_sprites = build_placeholder_sprites(args.output_root, records) + scene = build_scene( + records, + placeholder_sprites, + args.game_id, + args.game_label, + args.map_id, + max(1, args.screen_scale), + ) + (maps_root / "scene.json").write_text(json.dumps(scene, indent=2) + "\n", encoding="utf-8") + write_catalog_entry( + args.output_root, + args.game_id, + args.game_label, + args.map_id, + args.map_label, + len(records), + scene["shapeDefinitions"], + ) + print( + f"wrote PSX type-placement probe: game={args.game_id} map={args.map_id} items={len(records)} unique_shapes={len(scene['shapeDefinitions'])} atlases={len(scene['atlases'])}" + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) \ No newline at end of file diff --git a/tools/psx_extract_wdl.py b/tools/psx_extract_wdl.py index 17dcf7d..5063483 100644 --- a/tools/psx_extract_wdl.py +++ b/tools/psx_extract_wdl.py @@ -771,7 +771,7 @@ def annotate_region_tim_counts( def parse_lset_wdl(data: bytes) -> dict[str, object] | None: - if len(data) < 0x34: + if len(data) < 0x38: return None header_size = u32(data, 0) @@ -781,6 +781,22 @@ def parse_lset_wdl(data: bytes) -> dict[str, object] | None: header_words = [u32(data, offset) for offset in range(0, header_size, 4)] audio_size = header_words[1] post_audio_start = header_size + audio_size + section_sizes = [u32(data, offset) for offset in range(0x08, 0x38, 4)] + sections: list[dict[str, int | str]] = [] + cursor = post_audio_start + for index, size in enumerate(section_sizes): + if size <= 0: + continue + if cursor + size > len(data): + break + sections.append( + { + "name": f"post_audio_section_{index:02d}", + "offset": cursor, + "size": size, + } + ) + cursor += size high_boundaries = sorted( { value @@ -815,6 +831,7 @@ def parse_lset_wdl(data: bytes) -> dict[str, object] | None: tim_hits = scan_tims(data) annotate_region_tim_counts(regions, tim_hits) + annotate_region_tim_counts(sections, tim_hits) return { "kind": "lset", @@ -822,6 +839,8 @@ def parse_lset_wdl(data: bytes) -> dict[str, object] | None: "header_words": header_words, "audio_size": audio_size, "post_audio_start": post_audio_start, + "section_sizes": section_sizes, + "sections": sections, "high_offset_boundaries": high_boundaries, "regions": regions, "tim_hits": tim_hits, @@ -880,6 +899,10 @@ def summarize(path: Path, summary: dict[str, object]) -> str: lines.append(f"header_size: 0x{summary['header_size']:X}") lines.append(f"audio_size: 0x{summary['audio_size']:X}") lines.append(f"post_audio_start: 0x{summary['post_audio_start']:X}") + lines.append( + "section_sizes: " + + ", ".join(f"0x{value:X}" for value in summary["section_sizes"]) + ) lines.append( "high_offset_boundaries: " + ", ".join(f"0x{value:X}" for value in summary["high_offset_boundaries"]) @@ -896,6 +919,15 @@ def summarize(path: Path, summary: dict[str, object]) -> str: + f"{region['name']}: offset=0x{region['offset']:X} size=0x{region['size']:X} tims={tim_count}" ) + if summary["kind"] == "lset": + lines.append("sections:") + for section in summary["sections"]: + tim_count = section.get("tim_count", 0) + lines.append( + " " + + f"{section['name']}: offset=0x{section['offset']:X} size=0x{section['size']:X} tims={tim_count}" + ) + lines.append("tim_hits:") for hit in summary["tim_hits"]: lines.append(