From 9860aad646e8a9b892d4484b883a4719dcf08b77 Mon Sep 17 00:00:00 2001 From: Maddo Date: Mon, 29 Jun 2026 18:12:59 +0200 Subject: [PATCH] feat(logging): enhance debug logging and cleanup capabilities with new configuration options --- faceai/.env.development | 6 ++ faceai/.env.example | 6 ++ faceai/.env.production | 6 ++ faceai/README.md | 8 ++- faceai/apps/processor/src/config.js | 3 + faceai/apps/processor/src/worker.js | 92 +++++++++++++++++++++++++++++ faceai/docker-compose.yml | 27 +++++++++ faceai/docker/run-with-log-file.mjs | 77 ++++++++++++++++++++---- 8 files changed, 212 insertions(+), 13 deletions(-) diff --git a/faceai/.env.development b/faceai/.env.development index c5dc6fff..f4f56fc0 100644 --- a/faceai/.env.development +++ b/faceai/.env.development @@ -16,6 +16,12 @@ FACEAI_QUEUE_NAME=faceai-searches FACEAI_RUNTIME_ROOT=/data/runtime FACEAI_UPLOAD_ROOT=/data/runtime/uploads FACEAI_LOG_ROOT=/data/logs +FACEAI_DEBUG_RETENTION_DAYS=3 +FACEAI_DEBUG_CLEANUP_INTERVAL_MS=21600000 +FACEAI_SERVICE_LOG_MAX_BYTES=20971520 +FACEAI_SERVICE_LOG_MAX_FILES=5 +FACEAI_DOCKER_LOG_MAX_SIZE=10m +FACEAI_DOCKER_LOG_MAX_FILE=3 FACEAI_PKL_ROOT=/data/pkl FACEAI_MATCHER_BINARY=/app/bin/face_matcher FACEAI_MATCHER_TOLERANCE=0.5 diff --git a/faceai/.env.example b/faceai/.env.example index 84f33c04..42a1d75d 100644 --- a/faceai/.env.example +++ b/faceai/.env.example @@ -30,6 +30,12 @@ FACEAI_UPLOAD_ROOT=/data/runtime/uploads FACEAI_LOG_ROOT=/data/logs FACEAI_AUDIT_DB_PATH=/data/logs/faceai-audit.sqlite FACEAI_AUDIT_RETENTION_DAYS=730 +FACEAI_DEBUG_RETENTION_DAYS=3 +FACEAI_DEBUG_CLEANUP_INTERVAL_MS=21600000 +FACEAI_SERVICE_LOG_MAX_BYTES=20971520 +FACEAI_SERVICE_LOG_MAX_FILES=5 +FACEAI_DOCKER_LOG_MAX_SIZE=10m +FACEAI_DOCKER_LOG_MAX_FILE=3 FACEAI_PKL_ROOT=/data/pkl FACEAI_RUNTIME_BIND=/mnt/storage/data/faceai/runtime FACEAI_LOG_BIND=/mnt/storage/data/faceai/logs diff --git a/faceai/.env.production b/faceai/.env.production index b0ff931b..9405b639 100644 --- a/faceai/.env.production +++ b/faceai/.env.production @@ -18,6 +18,12 @@ FACEAI_QUEUE_NAME=faceai-searches FACEAI_RUNTIME_ROOT=/data/runtime FACEAI_UPLOAD_ROOT=/data/runtime/uploads FACEAI_LOG_ROOT=/data/logs +FACEAI_DEBUG_RETENTION_DAYS=3 +FACEAI_DEBUG_CLEANUP_INTERVAL_MS=21600000 +FACEAI_SERVICE_LOG_MAX_BYTES=20971520 +FACEAI_SERVICE_LOG_MAX_FILES=5 +FACEAI_DOCKER_LOG_MAX_SIZE=10m +FACEAI_DOCKER_LOG_MAX_FILE=3 FACEAI_PKL_ROOT=/data/pkl FACEAI_RUNTIME_BIND=/mnt/storage/data/faceai/runtime FACEAI_LOG_BIND=/mnt/storage/data/faceai/logs diff --git a/faceai/README.md b/faceai/README.md index ac554117..f16e4324 100644 --- a/faceai/README.md +++ b/faceai/README.md @@ -340,6 +340,12 @@ Shared application settings: | `FACEAI_LOG_ROOT` | recommended | `/data/logs` | persistent host-mounted diagnostics root for backend, processor, and per-search logs | | `FACEAI_AUDIT_DB_PATH` | recommended | `/data/logs/faceai-audit.sqlite` | SQLite audit database shared by backend and processor | | `FACEAI_AUDIT_RETENTION_DAYS` | recommended | `730` | how long structured audit rows are kept before automatic pruning | +| `FACEAI_DEBUG_RETENTION_DAYS` | recommended | `3` | how many days transient debug folders under uploads, runtime search outputs, and per-search logs are kept | +| `FACEAI_DEBUG_CLEANUP_INTERVAL_MS` | optional | `21600000` | how often the processor scans for expired debug artifacts | +| `FACEAI_SERVICE_LOG_MAX_BYTES` | recommended | `20971520` | size cap for `backend.log` and `processor.log` before local rotation | +| `FACEAI_SERVICE_LOG_MAX_FILES` | recommended | `5` | number of rotated `backend.log.N` and `processor.log.N` generations to keep | +| `FACEAI_DOCKER_LOG_MAX_SIZE` | recommended | `10m` | maximum size of each Docker json-file log segment | +| `FACEAI_DOCKER_LOG_MAX_FILE` | recommended | `3` | number of Docker json-file log segments to keep per container | | `FACEAI_SHARED_SECRET` | yes | long random secret | trust boundary between FaceAI and the legacy bridge | Public site settings: @@ -417,7 +423,7 @@ After the Compose stack is up, validate at least the following: This scaffold can now be deployed with the public site, processor, and Redis, but it still has important limitations: - search state is short-lived in Redis and is not backed by a durable database -- runtime uploads and matcher output still need an agreed production retention and cleanup policy +- runtime uploads, matcher output, and per-search debug logs now prune automatically, but the retention window still needs periodic review against support needs - the PKL mount contract is now defined, but final NAS operations and cleanup policy still need to be hardened - the backend currently sets the FaceAI session cookie with `secure: false`, which should be hardened before final public rollout - the local simulator endpoints under `/dev/*` are still present in the app and should be treated as non-production scaffolding diff --git a/faceai/apps/processor/src/config.js b/faceai/apps/processor/src/config.js index 7f6d55d9..fecd540e 100644 --- a/faceai/apps/processor/src/config.js +++ b/faceai/apps/processor/src/config.js @@ -25,9 +25,12 @@ export const config = { workerConcurrency: Number(process.env.FACEAI_WORKER_CONCURRENCY || 2), workerTimeoutMs: Number(process.env.FACEAI_WORKER_TIMEOUT_MS || 5 * 60 * 1000), runtimeRoot: process.env.FACEAI_RUNTIME_ROOT || '/data/runtime', + uploadRoot: process.env.FACEAI_UPLOAD_ROOT || path.join(process.env.FACEAI_RUNTIME_ROOT || '/data/runtime', 'uploads'), logRoot: process.env.FACEAI_LOG_ROOT || path.join(process.env.FACEAI_RUNTIME_ROOT || '/data/runtime', 'logs'), auditDbPath: process.env.FACEAI_AUDIT_DB_PATH || path.join(process.env.FACEAI_LOG_ROOT || '/data/logs', 'faceai-audit.sqlite'), auditRetentionDays: Number(process.env.FACEAI_AUDIT_RETENTION_DAYS || 730), + debugRetentionDays: Number(process.env.FACEAI_DEBUG_RETENTION_DAYS || 3), + debugCleanupIntervalMs: Number(process.env.FACEAI_DEBUG_CLEANUP_INTERVAL_MS || 6 * 60 * 60 * 1000), pklRoot: process.env.FACEAI_PKL_ROOT || '/data/pkl', matcherBinary: process.env.FACEAI_MATCHER_BINARY || '/app/bin/face_matcher', matcherTolerance, diff --git a/faceai/apps/processor/src/worker.js b/faceai/apps/processor/src/worker.js index 8b6ae69b..ee11aec8 100644 --- a/faceai/apps/processor/src/worker.js +++ b/faceai/apps/processor/src/worker.js @@ -18,6 +18,7 @@ import { parseMatcherCsv, resolvePklPath, runFaceMatcher } from './worker-utils. const connection = createRedisConnection(config.redisUrl); const auditStore = createAuditStore({ dbPath: config.auditDbPath, retentionDays: config.auditRetentionDays }); +const ONE_DAY_MS = 24 * 60 * 60 * 1000; async function ensureMatcherBinaryAvailable() { try { @@ -66,6 +67,88 @@ async function appendSearchLog(logPath, message, details) { await fs.appendFile(logPath, formatLogLine(message, details), 'utf8'); } +async function removeDirectoryIfPresent(dirPath) { + if (!dirPath) { + return; + } + + await fs.rm(dirPath, { recursive: true, force: true }); +} + +async function cleanupSearchArtifacts(search, searchDir) { + const cleanupTargets = new Set([searchDir]); + + if (search?.selfiePath) { + cleanupTargets.add(path.dirname(search.selfiePath)); + } + + await Promise.all(Array.from(cleanupTargets, (target) => removeDirectoryIfPresent(target))); +} + +async function listDirectories(rootPath) { + try { + const entries = await fs.readdir(rootPath, { withFileTypes: true }); + return entries.filter((entry) => entry.isDirectory()); + } catch (error) { + if (error?.code === 'ENOENT') { + return []; + } + + throw error; + } +} + +async function pruneExpiredDirectories(rootPath, cutoffTime) { + const directories = await listDirectories(rootPath); + const removed = []; + + await Promise.all(directories.map(async (entry) => { + const targetPath = path.join(rootPath, entry.name); + const stats = await fs.stat(targetPath).catch((error) => { + if (error?.code === 'ENOENT') { + return null; + } + + throw error; + }); + + if (!stats || stats.mtimeMs >= cutoffTime) { + return; + } + + await removeDirectoryIfPresent(targetPath); + removed.push(entry.name); + })); + + return removed; +} + +async function pruneDebugArtifacts(now = Date.now()) { + const retentionMs = Math.max(1, config.debugRetentionDays) * ONE_DAY_MS; + const cutoffTime = now - retentionMs; + + const [uploadDirs, runtimeDirs, logDirs] = await Promise.all([ + pruneExpiredDirectories(config.uploadRoot, cutoffTime), + pruneExpiredDirectories(path.join(config.runtimeRoot, 'searches'), cutoffTime), + pruneExpiredDirectories(path.join(config.logRoot, 'searches'), cutoffTime) + ]); + + return { uploadDirs, runtimeDirs, logDirs }; +} + +async function runScheduledDebugCleanup() { + try { + const removed = await pruneDebugArtifacts(); + const removedCount = removed.uploadDirs.length + removed.runtimeDirs.length + removed.logDirs.length; + + if (removedCount > 0) { + console.log(`Pruned FaceAI debug artifacts: uploads=${removed.uploadDirs.length}, runtime=${removed.runtimeDirs.length}, logs=${removed.logDirs.length}`); + } + } catch (error) { + console.error('Unable to prune FaceAI debug artifacts:', error); + } +} + async function resolveCompletionCode(logPath, matchCount) { if (matchCount > 0) { return null; @@ -212,11 +295,14 @@ async function processJob(job) { }); await releaseActiveSearchLock(connection, search.userId, searchId); throw error; + } finally { + await cleanupSearchArtifacts(search, searchDir); } } await ensureMatcherBinaryAvailable(); await publishProcessorHeartbeat(); +await runScheduledDebugCleanup(); const heartbeatTimer = setInterval(() => { publishProcessorHeartbeat(); @@ -224,6 +310,12 @@ const heartbeatTimer = setInterval(() => { heartbeatTimer.unref(); +const debugCleanupTimer = setInterval(() => { + runScheduledDebugCleanup(); +}, config.debugCleanupIntervalMs); + +debugCleanupTimer.unref(); + const worker = new Worker(config.queueName, processJob, { connection, concurrency: config.workerConcurrency diff --git a/faceai/docker-compose.yml b/faceai/docker-compose.yml index 6911e10c..972df9a7 100644 --- a/faceai/docker-compose.yml +++ b/faceai/docker-compose.yml @@ -26,12 +26,19 @@ services: FACEAI_LOG_ROOT: ${FACEAI_LOG_ROOT:-/data/logs} FACEAI_AUDIT_DB_PATH: ${FACEAI_AUDIT_DB_PATH:-/data/logs/faceai-audit.sqlite} FACEAI_AUDIT_RETENTION_DAYS: ${FACEAI_AUDIT_RETENTION_DAYS:-730} + FACEAI_SERVICE_LOG_MAX_BYTES: ${FACEAI_SERVICE_LOG_MAX_BYTES:-20971520} + FACEAI_SERVICE_LOG_MAX_FILES: ${FACEAI_SERVICE_LOG_MAX_FILES:-5} FACEAI_PKL_ROOT: ${FACEAI_PKL_ROOT:-/data/pkl} FACEAI_ENABLE_LOCAL_LEGACY_STATIC: ${FACEAI_ENABLE_LOCAL_LEGACY_STATIC:-0} volumes: - ${FACEAI_RUNTIME_BIND:-/mnt/storage/data/faceai/runtime}:${FACEAI_RUNTIME_ROOT:-/data/runtime} - ${FACEAI_LOG_BIND:-/mnt/storage/data/faceai/logs}:${FACEAI_LOG_ROOT:-/data/logs} - ${FACEAI_PKL_BIND:-/mnt/nas12/nas2/RUS}:${FACEAI_PKL_ROOT:-/data/pkl}:ro + logging: + driver: json-file + options: + max-size: ${FACEAI_DOCKER_LOG_MAX_SIZE:-10m} + max-file: "${FACEAI_DOCKER_LOG_MAX_FILE:-3}" ports: - "${FACEAI_PUBLISHED_PORT:-3001}:${FACEAI_PORT:-3001}" healthcheck: @@ -59,6 +66,11 @@ services: depends_on: faceai: condition: service_healthy + logging: + driver: json-file + options: + max-size: ${FACEAI_DOCKER_LOG_MAX_SIZE:-10m} + max-file: "${FACEAI_DOCKER_LOG_MAX_FILE:-3}" processor: image: ${FACEAI_PROCESSOR_IMAGE:-forgejo.maddoscientisto.net/maddo/faceai-processor:latest} @@ -76,9 +88,14 @@ services: FACEAI_REDIS_URL: ${FACEAI_REDIS_URL:-redis://redis:6379} FACEAI_QUEUE_NAME: ${FACEAI_QUEUE_NAME:-faceai-searches} FACEAI_RUNTIME_ROOT: ${FACEAI_RUNTIME_ROOT:-/data/runtime} + FACEAI_UPLOAD_ROOT: ${FACEAI_UPLOAD_ROOT:-/data/runtime/uploads} FACEAI_LOG_ROOT: ${FACEAI_LOG_ROOT:-/data/logs} FACEAI_AUDIT_DB_PATH: ${FACEAI_AUDIT_DB_PATH:-/data/logs/faceai-audit.sqlite} FACEAI_AUDIT_RETENTION_DAYS: ${FACEAI_AUDIT_RETENTION_DAYS:-730} + FACEAI_DEBUG_RETENTION_DAYS: ${FACEAI_DEBUG_RETENTION_DAYS:-3} + FACEAI_DEBUG_CLEANUP_INTERVAL_MS: ${FACEAI_DEBUG_CLEANUP_INTERVAL_MS:-21600000} + FACEAI_SERVICE_LOG_MAX_BYTES: ${FACEAI_SERVICE_LOG_MAX_BYTES:-20971520} + FACEAI_SERVICE_LOG_MAX_FILES: ${FACEAI_SERVICE_LOG_MAX_FILES:-5} FACEAI_PKL_ROOT: ${FACEAI_PKL_ROOT:-/data/pkl} FACEAI_MATCHER_BINARY: ${FACEAI_MATCHER_BINARY:-/app/bin/face_matcher} FACEAI_MATCHER_TOLERANCE: ${FACEAI_MATCHER_TOLERANCE:-0.5} @@ -88,6 +105,11 @@ services: - ${FACEAI_RUNTIME_BIND:-/mnt/storage/data/faceai/runtime}:${FACEAI_RUNTIME_ROOT:-/data/runtime} - ${FACEAI_LOG_BIND:-/mnt/storage/data/faceai/logs}:${FACEAI_LOG_ROOT:-/data/logs} - ${FACEAI_PKL_BIND:-/mnt/nas12/nas2/RUS}:${FACEAI_PKL_ROOT:-/data/pkl}:ro + logging: + driver: json-file + options: + max-size: ${FACEAI_DOCKER_LOG_MAX_SIZE:-10m} + max-file: "${FACEAI_DOCKER_LOG_MAX_FILE:-3}" depends_on: redis: condition: service_healthy @@ -97,6 +119,11 @@ services: container_name: ${FACEAI_REDIS_CONTAINER_NAME:-regalami-faceai-redis} restart: unless-stopped command: ${FACEAI_REDIS_COMMAND:-redis-server --appendonly no} + logging: + driver: json-file + options: + max-size: ${FACEAI_DOCKER_LOG_MAX_SIZE:-10m} + max-file: "${FACEAI_DOCKER_LOG_MAX_FILE:-3}" healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 5s diff --git a/faceai/docker/run-with-log-file.mjs b/faceai/docker/run-with-log-file.mjs index 4aec6518..2c945be1 100644 --- a/faceai/docker/run-with-log-file.mjs +++ b/faceai/docker/run-with-log-file.mjs @@ -1,10 +1,11 @@ -import fs from 'node:fs'; import fsp from 'node:fs/promises'; import path from 'node:path'; import process from 'node:process'; import { spawn } from 'node:child_process'; const [, , logPath, ...commandArgs] = process.argv; +const maxBytes = Number(process.env.FACEAI_SERVICE_LOG_MAX_BYTES || 20 * 1024 * 1024); +const maxFiles = Math.max(1, Number(process.env.FACEAI_SERVICE_LOG_MAX_FILES || 5)); if (!logPath || commandArgs.length === 0) { process.stderr.write('Usage: node docker/run-with-log-file.mjs [args...]\n'); @@ -12,8 +13,60 @@ if (!logPath || commandArgs.length === 0) { } await fsp.mkdir(path.dirname(logPath), { recursive: true }); +let currentSize = await fsp.stat(logPath).then((stats) => stats.size).catch(() => 0); +let writeQueue = Promise.resolve(); + +async function pathExists(targetPath) { + try { + await fsp.access(targetPath); + return true; + } catch { + return false; + } +} + +async function rotateLogFile() { + if (!Number.isFinite(maxBytes) || maxBytes <= 0) { + return; + } + + await fsp.rm(`${logPath}.${maxFiles}`, { force: true }).catch(() => {}); + + for (let index = maxFiles - 1; index >= 1; index -= 1) { + const sourcePath = `${logPath}.${index}`; + const destinationPath = `${logPath}.${index + 1}`; + + if (await pathExists(sourcePath)) { + await fsp.rename(sourcePath, destinationPath); + } + } + + if (await pathExists(logPath)) { + await fsp.rename(logPath, `${logPath}.1`); + } + + currentSize = 0; +} + +async function appendChunkToLog(chunk) { + const chunkSize = Buffer.isBuffer(chunk) ? chunk.length : Buffer.byteLength(chunk); + + if (Number.isFinite(maxBytes) && maxBytes > 0 && currentSize > 0 && currentSize + chunkSize > maxBytes) { + await rotateLogFile(); + } + + await fsp.appendFile(logPath, chunk); + currentSize += chunkSize; +} + +function queueChunkWrite(chunk) { + writeQueue = writeQueue + .then(() => appendChunkToLog(chunk)) + .catch((error) => { + process.stderr.write(`${error.stack || error.message}\n`); + }); +} -const logStream = fs.createWriteStream(logPath, { flags: 'a' }); const child = spawn(commandArgs[0], commandArgs.slice(1), { cwd: process.cwd(), env: process.env, @@ -22,7 +75,7 @@ const child = spawn(commandArgs[0], commandArgs.slice(1), { function writeChunk(target, chunk) { target.write(chunk); - logStream.write(chunk); + queueChunkWrite(chunk); } child.stdout.on('data', (chunk) => { @@ -36,18 +89,18 @@ child.stderr.on('data', (chunk) => { child.on('error', (error) => { const message = `${error.stack || error.message}\n`; writeChunk(process.stderr, message); - logStream.end(() => { + writeQueue.finally(() => { process.exit(1); }); }); -child.on('close', (code, signal) => { - logStream.end(() => { - if (signal) { - process.kill(process.pid, signal); - return; - } +child.on('close', async (code, signal) => { + await writeQueue; - process.exit(code ?? 1); - }); + if (signal) { + process.kill(process.pid, signal); + return; + } + + process.exit(code ?? 1); });