feat(logging): enhance debug logging and cleanup capabilities with new configuration options
All checks were successful
Publish FaceAI Container / publish (push) Successful in 19m7s
All checks were successful
Publish FaceAI Container / publish (push) Successful in 19m7s
This commit is contained in:
parent
a95ae56134
commit
9860aad646
8 changed files with 212 additions and 13 deletions
|
|
@ -16,6 +16,12 @@ FACEAI_QUEUE_NAME=faceai-searches
|
|||
FACEAI_RUNTIME_ROOT=/data/runtime
|
||||
FACEAI_UPLOAD_ROOT=/data/runtime/uploads
|
||||
FACEAI_LOG_ROOT=/data/logs
|
||||
FACEAI_DEBUG_RETENTION_DAYS=3
|
||||
FACEAI_DEBUG_CLEANUP_INTERVAL_MS=21600000
|
||||
FACEAI_SERVICE_LOG_MAX_BYTES=20971520
|
||||
FACEAI_SERVICE_LOG_MAX_FILES=5
|
||||
FACEAI_DOCKER_LOG_MAX_SIZE=10m
|
||||
FACEAI_DOCKER_LOG_MAX_FILE=3
|
||||
FACEAI_PKL_ROOT=/data/pkl
|
||||
FACEAI_MATCHER_BINARY=/app/bin/face_matcher
|
||||
FACEAI_MATCHER_TOLERANCE=0.5
|
||||
|
|
|
|||
|
|
@ -30,6 +30,12 @@ FACEAI_UPLOAD_ROOT=/data/runtime/uploads
|
|||
FACEAI_LOG_ROOT=/data/logs
|
||||
FACEAI_AUDIT_DB_PATH=/data/logs/faceai-audit.sqlite
|
||||
FACEAI_AUDIT_RETENTION_DAYS=730
|
||||
FACEAI_DEBUG_RETENTION_DAYS=3
|
||||
FACEAI_DEBUG_CLEANUP_INTERVAL_MS=21600000
|
||||
FACEAI_SERVICE_LOG_MAX_BYTES=20971520
|
||||
FACEAI_SERVICE_LOG_MAX_FILES=5
|
||||
FACEAI_DOCKER_LOG_MAX_SIZE=10m
|
||||
FACEAI_DOCKER_LOG_MAX_FILE=3
|
||||
FACEAI_PKL_ROOT=/data/pkl
|
||||
FACEAI_RUNTIME_BIND=/mnt/storage/data/faceai/runtime
|
||||
FACEAI_LOG_BIND=/mnt/storage/data/faceai/logs
|
||||
|
|
|
|||
|
|
@ -18,6 +18,12 @@ FACEAI_QUEUE_NAME=faceai-searches
|
|||
FACEAI_RUNTIME_ROOT=/data/runtime
|
||||
FACEAI_UPLOAD_ROOT=/data/runtime/uploads
|
||||
FACEAI_LOG_ROOT=/data/logs
|
||||
FACEAI_DEBUG_RETENTION_DAYS=3
|
||||
FACEAI_DEBUG_CLEANUP_INTERVAL_MS=21600000
|
||||
FACEAI_SERVICE_LOG_MAX_BYTES=20971520
|
||||
FACEAI_SERVICE_LOG_MAX_FILES=5
|
||||
FACEAI_DOCKER_LOG_MAX_SIZE=10m
|
||||
FACEAI_DOCKER_LOG_MAX_FILE=3
|
||||
FACEAI_PKL_ROOT=/data/pkl
|
||||
FACEAI_RUNTIME_BIND=/mnt/storage/data/faceai/runtime
|
||||
FACEAI_LOG_BIND=/mnt/storage/data/faceai/logs
|
||||
|
|
|
|||
|
|
@ -340,6 +340,12 @@ Shared application settings:
|
|||
| `FACEAI_LOG_ROOT` | recommended | `/data/logs` | persistent host-mounted diagnostics root for backend, processor, and per-search logs |
|
||||
| `FACEAI_AUDIT_DB_PATH` | recommended | `/data/logs/faceai-audit.sqlite` | SQLite audit database shared by backend and processor |
|
||||
| `FACEAI_AUDIT_RETENTION_DAYS` | recommended | `730` | how long structured audit rows are kept before automatic pruning |
|
||||
| `FACEAI_DEBUG_RETENTION_DAYS` | recommended | `3` | how many days transient debug folders under uploads, runtime search outputs, and per-search logs are kept |
|
||||
| `FACEAI_DEBUG_CLEANUP_INTERVAL_MS` | optional | `21600000` | how often the processor scans for expired debug artifacts |
|
||||
| `FACEAI_SERVICE_LOG_MAX_BYTES` | recommended | `20971520` | size cap for `backend.log` and `processor.log` before local rotation |
|
||||
| `FACEAI_SERVICE_LOG_MAX_FILES` | recommended | `5` | number of rotated `backend.log.N` and `processor.log.N` generations to keep |
|
||||
| `FACEAI_DOCKER_LOG_MAX_SIZE` | recommended | `10m` | maximum size of each Docker json-file log segment |
|
||||
| `FACEAI_DOCKER_LOG_MAX_FILE` | recommended | `3` | number of Docker json-file log segments to keep per container |
|
||||
| `FACEAI_SHARED_SECRET` | yes | long random secret | trust boundary between FaceAI and the legacy bridge |
|
||||
|
||||
Public site settings:
|
||||
|
|
@ -417,7 +423,7 @@ After the Compose stack is up, validate at least the following:
|
|||
This scaffold can now be deployed with the public site, processor, and Redis, but it still has important limitations:
|
||||
|
||||
- search state is short-lived in Redis and is not backed by a durable database
|
||||
- runtime uploads and matcher output still need an agreed production retention and cleanup policy
|
||||
- runtime uploads, matcher output, and per-search debug logs now prune automatically, but the retention window still needs periodic review against support needs
|
||||
- the PKL mount contract is now defined, but final NAS operations and cleanup policy still need to be hardened
|
||||
- the backend currently sets the FaceAI session cookie with `secure: false`, which should be hardened before final public rollout
|
||||
- the local simulator endpoints under `/dev/*` are still present in the app and should be treated as non-production scaffolding
|
||||
|
|
|
|||
|
|
@ -25,9 +25,12 @@ export const config = {
|
|||
workerConcurrency: Number(process.env.FACEAI_WORKER_CONCURRENCY || 2),
|
||||
workerTimeoutMs: Number(process.env.FACEAI_WORKER_TIMEOUT_MS || 5 * 60 * 1000),
|
||||
runtimeRoot: process.env.FACEAI_RUNTIME_ROOT || '/data/runtime',
|
||||
uploadRoot: process.env.FACEAI_UPLOAD_ROOT || path.join(process.env.FACEAI_RUNTIME_ROOT || '/data/runtime', 'uploads'),
|
||||
logRoot: process.env.FACEAI_LOG_ROOT || path.join(process.env.FACEAI_RUNTIME_ROOT || '/data/runtime', 'logs'),
|
||||
auditDbPath: process.env.FACEAI_AUDIT_DB_PATH || path.join(process.env.FACEAI_LOG_ROOT || '/data/logs', 'faceai-audit.sqlite'),
|
||||
auditRetentionDays: Number(process.env.FACEAI_AUDIT_RETENTION_DAYS || 730),
|
||||
debugRetentionDays: Number(process.env.FACEAI_DEBUG_RETENTION_DAYS || 3),
|
||||
debugCleanupIntervalMs: Number(process.env.FACEAI_DEBUG_CLEANUP_INTERVAL_MS || 6 * 60 * 60 * 1000),
|
||||
pklRoot: process.env.FACEAI_PKL_ROOT || '/data/pkl',
|
||||
matcherBinary: process.env.FACEAI_MATCHER_BINARY || '/app/bin/face_matcher',
|
||||
matcherTolerance,
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import { parseMatcherCsv, resolvePklPath, runFaceMatcher } from './worker-utils.
|
|||
|
||||
const connection = createRedisConnection(config.redisUrl);
|
||||
const auditStore = createAuditStore({ dbPath: config.auditDbPath, retentionDays: config.auditRetentionDays });
|
||||
const ONE_DAY_MS = 24 * 60 * 60 * 1000;
|
||||
|
||||
async function ensureMatcherBinaryAvailable() {
|
||||
try {
|
||||
|
|
@ -66,6 +67,88 @@ async function appendSearchLog(logPath, message, details) {
|
|||
await fs.appendFile(logPath, formatLogLine(message, details), 'utf8');
|
||||
}
|
||||
|
||||
async function removeDirectoryIfPresent(dirPath) {
|
||||
if (!dirPath) {
|
||||
return;
|
||||
}
|
||||
|
||||
await fs.rm(dirPath, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
async function cleanupSearchArtifacts(search, searchDir) {
|
||||
const cleanupTargets = new Set([searchDir]);
|
||||
|
||||
if (search?.selfiePath) {
|
||||
cleanupTargets.add(path.dirname(search.selfiePath));
|
||||
}
|
||||
|
||||
await Promise.all(Array.from(cleanupTargets, (target) => removeDirectoryIfPresent(target)));
|
||||
}
|
||||
|
||||
async function listDirectories(rootPath) {
|
||||
try {
|
||||
const entries = await fs.readdir(rootPath, { withFileTypes: true });
|
||||
return entries.filter((entry) => entry.isDirectory());
|
||||
} catch (error) {
|
||||
if (error?.code === 'ENOENT') {
|
||||
return [];
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function pruneExpiredDirectories(rootPath, cutoffTime) {
|
||||
const directories = await listDirectories(rootPath);
|
||||
const removed = [];
|
||||
|
||||
await Promise.all(directories.map(async (entry) => {
|
||||
const targetPath = path.join(rootPath, entry.name);
|
||||
const stats = await fs.stat(targetPath).catch((error) => {
|
||||
if (error?.code === 'ENOENT') {
|
||||
return null;
|
||||
}
|
||||
|
||||
throw error;
|
||||
});
|
||||
|
||||
if (!stats || stats.mtimeMs >= cutoffTime) {
|
||||
return;
|
||||
}
|
||||
|
||||
await removeDirectoryIfPresent(targetPath);
|
||||
removed.push(entry.name);
|
||||
}));
|
||||
|
||||
return removed;
|
||||
}
|
||||
|
||||
async function pruneDebugArtifacts(now = Date.now()) {
|
||||
const retentionMs = Math.max(1, config.debugRetentionDays) * ONE_DAY_MS;
|
||||
const cutoffTime = now - retentionMs;
|
||||
|
||||
const [uploadDirs, runtimeDirs, logDirs] = await Promise.all([
|
||||
pruneExpiredDirectories(config.uploadRoot, cutoffTime),
|
||||
pruneExpiredDirectories(path.join(config.runtimeRoot, 'searches'), cutoffTime),
|
||||
pruneExpiredDirectories(path.join(config.logRoot, 'searches'), cutoffTime)
|
||||
]);
|
||||
|
||||
return { uploadDirs, runtimeDirs, logDirs };
|
||||
}
|
||||
|
||||
async function runScheduledDebugCleanup() {
|
||||
try {
|
||||
const removed = await pruneDebugArtifacts();
|
||||
const removedCount = removed.uploadDirs.length + removed.runtimeDirs.length + removed.logDirs.length;
|
||||
|
||||
if (removedCount > 0) {
|
||||
console.log(`Pruned FaceAI debug artifacts: uploads=${removed.uploadDirs.length}, runtime=${removed.runtimeDirs.length}, logs=${removed.logDirs.length}`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Unable to prune FaceAI debug artifacts:', error);
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveCompletionCode(logPath, matchCount) {
|
||||
if (matchCount > 0) {
|
||||
return null;
|
||||
|
|
@ -212,11 +295,14 @@ async function processJob(job) {
|
|||
});
|
||||
await releaseActiveSearchLock(connection, search.userId, searchId);
|
||||
throw error;
|
||||
} finally {
|
||||
await cleanupSearchArtifacts(search, searchDir);
|
||||
}
|
||||
}
|
||||
|
||||
await ensureMatcherBinaryAvailable();
|
||||
await publishProcessorHeartbeat();
|
||||
await runScheduledDebugCleanup();
|
||||
|
||||
const heartbeatTimer = setInterval(() => {
|
||||
publishProcessorHeartbeat();
|
||||
|
|
@ -224,6 +310,12 @@ const heartbeatTimer = setInterval(() => {
|
|||
|
||||
heartbeatTimer.unref();
|
||||
|
||||
const debugCleanupTimer = setInterval(() => {
|
||||
runScheduledDebugCleanup();
|
||||
}, config.debugCleanupIntervalMs);
|
||||
|
||||
debugCleanupTimer.unref();
|
||||
|
||||
const worker = new Worker(config.queueName, processJob, {
|
||||
connection,
|
||||
concurrency: config.workerConcurrency
|
||||
|
|
|
|||
|
|
@ -26,12 +26,19 @@ services:
|
|||
FACEAI_LOG_ROOT: ${FACEAI_LOG_ROOT:-/data/logs}
|
||||
FACEAI_AUDIT_DB_PATH: ${FACEAI_AUDIT_DB_PATH:-/data/logs/faceai-audit.sqlite}
|
||||
FACEAI_AUDIT_RETENTION_DAYS: ${FACEAI_AUDIT_RETENTION_DAYS:-730}
|
||||
FACEAI_SERVICE_LOG_MAX_BYTES: ${FACEAI_SERVICE_LOG_MAX_BYTES:-20971520}
|
||||
FACEAI_SERVICE_LOG_MAX_FILES: ${FACEAI_SERVICE_LOG_MAX_FILES:-5}
|
||||
FACEAI_PKL_ROOT: ${FACEAI_PKL_ROOT:-/data/pkl}
|
||||
FACEAI_ENABLE_LOCAL_LEGACY_STATIC: ${FACEAI_ENABLE_LOCAL_LEGACY_STATIC:-0}
|
||||
volumes:
|
||||
- ${FACEAI_RUNTIME_BIND:-/mnt/storage/data/faceai/runtime}:${FACEAI_RUNTIME_ROOT:-/data/runtime}
|
||||
- ${FACEAI_LOG_BIND:-/mnt/storage/data/faceai/logs}:${FACEAI_LOG_ROOT:-/data/logs}
|
||||
- ${FACEAI_PKL_BIND:-/mnt/nas12/nas2/RUS}:${FACEAI_PKL_ROOT:-/data/pkl}:ro
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: ${FACEAI_DOCKER_LOG_MAX_SIZE:-10m}
|
||||
max-file: "${FACEAI_DOCKER_LOG_MAX_FILE:-3}"
|
||||
ports:
|
||||
- "${FACEAI_PUBLISHED_PORT:-3001}:${FACEAI_PORT:-3001}"
|
||||
healthcheck:
|
||||
|
|
@ -59,6 +66,11 @@ services:
|
|||
depends_on:
|
||||
faceai:
|
||||
condition: service_healthy
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: ${FACEAI_DOCKER_LOG_MAX_SIZE:-10m}
|
||||
max-file: "${FACEAI_DOCKER_LOG_MAX_FILE:-3}"
|
||||
|
||||
processor:
|
||||
image: ${FACEAI_PROCESSOR_IMAGE:-forgejo.maddoscientisto.net/maddo/faceai-processor:latest}
|
||||
|
|
@ -76,9 +88,14 @@ services:
|
|||
FACEAI_REDIS_URL: ${FACEAI_REDIS_URL:-redis://redis:6379}
|
||||
FACEAI_QUEUE_NAME: ${FACEAI_QUEUE_NAME:-faceai-searches}
|
||||
FACEAI_RUNTIME_ROOT: ${FACEAI_RUNTIME_ROOT:-/data/runtime}
|
||||
FACEAI_UPLOAD_ROOT: ${FACEAI_UPLOAD_ROOT:-/data/runtime/uploads}
|
||||
FACEAI_LOG_ROOT: ${FACEAI_LOG_ROOT:-/data/logs}
|
||||
FACEAI_AUDIT_DB_PATH: ${FACEAI_AUDIT_DB_PATH:-/data/logs/faceai-audit.sqlite}
|
||||
FACEAI_AUDIT_RETENTION_DAYS: ${FACEAI_AUDIT_RETENTION_DAYS:-730}
|
||||
FACEAI_DEBUG_RETENTION_DAYS: ${FACEAI_DEBUG_RETENTION_DAYS:-3}
|
||||
FACEAI_DEBUG_CLEANUP_INTERVAL_MS: ${FACEAI_DEBUG_CLEANUP_INTERVAL_MS:-21600000}
|
||||
FACEAI_SERVICE_LOG_MAX_BYTES: ${FACEAI_SERVICE_LOG_MAX_BYTES:-20971520}
|
||||
FACEAI_SERVICE_LOG_MAX_FILES: ${FACEAI_SERVICE_LOG_MAX_FILES:-5}
|
||||
FACEAI_PKL_ROOT: ${FACEAI_PKL_ROOT:-/data/pkl}
|
||||
FACEAI_MATCHER_BINARY: ${FACEAI_MATCHER_BINARY:-/app/bin/face_matcher}
|
||||
FACEAI_MATCHER_TOLERANCE: ${FACEAI_MATCHER_TOLERANCE:-0.5}
|
||||
|
|
@ -88,6 +105,11 @@ services:
|
|||
- ${FACEAI_RUNTIME_BIND:-/mnt/storage/data/faceai/runtime}:${FACEAI_RUNTIME_ROOT:-/data/runtime}
|
||||
- ${FACEAI_LOG_BIND:-/mnt/storage/data/faceai/logs}:${FACEAI_LOG_ROOT:-/data/logs}
|
||||
- ${FACEAI_PKL_BIND:-/mnt/nas12/nas2/RUS}:${FACEAI_PKL_ROOT:-/data/pkl}:ro
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: ${FACEAI_DOCKER_LOG_MAX_SIZE:-10m}
|
||||
max-file: "${FACEAI_DOCKER_LOG_MAX_FILE:-3}"
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
|
|
@ -97,6 +119,11 @@ services:
|
|||
container_name: ${FACEAI_REDIS_CONTAINER_NAME:-regalami-faceai-redis}
|
||||
restart: unless-stopped
|
||||
command: ${FACEAI_REDIS_COMMAND:-redis-server --appendonly no}
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: ${FACEAI_DOCKER_LOG_MAX_SIZE:-10m}
|
||||
max-file: "${FACEAI_DOCKER_LOG_MAX_FILE:-3}"
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 5s
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
import fs from 'node:fs';
|
||||
import fsp from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import process from 'node:process';
|
||||
import { spawn } from 'node:child_process';
|
||||
|
||||
const [, , logPath, ...commandArgs] = process.argv;
|
||||
const maxBytes = Number(process.env.FACEAI_SERVICE_LOG_MAX_BYTES || 20 * 1024 * 1024);
|
||||
const maxFiles = Math.max(1, Number(process.env.FACEAI_SERVICE_LOG_MAX_FILES || 5));
|
||||
|
||||
if (!logPath || commandArgs.length === 0) {
|
||||
process.stderr.write('Usage: node docker/run-with-log-file.mjs <log-path> <command> [args...]\n');
|
||||
|
|
@ -12,8 +13,60 @@ if (!logPath || commandArgs.length === 0) {
|
|||
}
|
||||
|
||||
await fsp.mkdir(path.dirname(logPath), { recursive: true });
|
||||
let currentSize = await fsp.stat(logPath).then((stats) => stats.size).catch(() => 0);
|
||||
let writeQueue = Promise.resolve();
|
||||
|
||||
async function pathExists(targetPath) {
|
||||
try {
|
||||
await fsp.access(targetPath);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function rotateLogFile() {
|
||||
if (!Number.isFinite(maxBytes) || maxBytes <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
await fsp.rm(`${logPath}.${maxFiles}`, { force: true }).catch(() => {});
|
||||
|
||||
for (let index = maxFiles - 1; index >= 1; index -= 1) {
|
||||
const sourcePath = `${logPath}.${index}`;
|
||||
const destinationPath = `${logPath}.${index + 1}`;
|
||||
|
||||
if (await pathExists(sourcePath)) {
|
||||
await fsp.rename(sourcePath, destinationPath);
|
||||
}
|
||||
}
|
||||
|
||||
if (await pathExists(logPath)) {
|
||||
await fsp.rename(logPath, `${logPath}.1`);
|
||||
}
|
||||
|
||||
currentSize = 0;
|
||||
}
|
||||
|
||||
async function appendChunkToLog(chunk) {
|
||||
const chunkSize = Buffer.isBuffer(chunk) ? chunk.length : Buffer.byteLength(chunk);
|
||||
|
||||
if (Number.isFinite(maxBytes) && maxBytes > 0 && currentSize > 0 && currentSize + chunkSize > maxBytes) {
|
||||
await rotateLogFile();
|
||||
}
|
||||
|
||||
await fsp.appendFile(logPath, chunk);
|
||||
currentSize += chunkSize;
|
||||
}
|
||||
|
||||
function queueChunkWrite(chunk) {
|
||||
writeQueue = writeQueue
|
||||
.then(() => appendChunkToLog(chunk))
|
||||
.catch((error) => {
|
||||
process.stderr.write(`${error.stack || error.message}\n`);
|
||||
});
|
||||
}
|
||||
|
||||
const logStream = fs.createWriteStream(logPath, { flags: 'a' });
|
||||
const child = spawn(commandArgs[0], commandArgs.slice(1), {
|
||||
cwd: process.cwd(),
|
||||
env: process.env,
|
||||
|
|
@ -22,7 +75,7 @@ const child = spawn(commandArgs[0], commandArgs.slice(1), {
|
|||
|
||||
function writeChunk(target, chunk) {
|
||||
target.write(chunk);
|
||||
logStream.write(chunk);
|
||||
queueChunkWrite(chunk);
|
||||
}
|
||||
|
||||
child.stdout.on('data', (chunk) => {
|
||||
|
|
@ -36,18 +89,18 @@ child.stderr.on('data', (chunk) => {
|
|||
child.on('error', (error) => {
|
||||
const message = `${error.stack || error.message}\n`;
|
||||
writeChunk(process.stderr, message);
|
||||
logStream.end(() => {
|
||||
writeQueue.finally(() => {
|
||||
process.exit(1);
|
||||
});
|
||||
});
|
||||
|
||||
child.on('close', (code, signal) => {
|
||||
logStream.end(() => {
|
||||
if (signal) {
|
||||
process.kill(process.pid, signal);
|
||||
return;
|
||||
}
|
||||
child.on('close', async (code, signal) => {
|
||||
await writeQueue;
|
||||
|
||||
process.exit(code ?? 1);
|
||||
});
|
||||
if (signal) {
|
||||
process.kill(process.pid, signal);
|
||||
return;
|
||||
}
|
||||
|
||||
process.exit(code ?? 1);
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue