feat: Implement AI workload settings and enhance AI processing summaries
This commit is contained in:
parent
4230300518
commit
88c193549f
8 changed files with 326 additions and 28 deletions
|
|
@ -4,6 +4,7 @@ using System.IO;
|
|||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Channels;
|
||||
using System.Threading.Tasks;
|
||||
using AIFotoONLUS.Core;
|
||||
using ImageCatalog_2.Models;
|
||||
|
|
@ -20,11 +21,11 @@ public class AiExtractionService : IAiExtractionService
|
|||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task RunAsync(
|
||||
public async Task<AiExtractionRunSummary> RunAsync(
|
||||
AiExtractionRequest request,
|
||||
CancellationToken token,
|
||||
Func<AiResultItem, Task> onResult,
|
||||
Func<double, Task> onProgress)
|
||||
Func<AiExtractionProgressUpdate, Task> onProgress)
|
||||
{
|
||||
var searchOption = request.Recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly;
|
||||
|
||||
|
|
@ -39,38 +40,114 @@ public class AiExtractionService : IAiExtractionService
|
|||
|
||||
var extractedResults = new List<AiResultItem>();
|
||||
var modelConfiguration = BuildModelConfiguration(request.ModelsFolderPath, request.UseGpu);
|
||||
|
||||
using var engine = new NumberRecognitionEngine(modelConfiguration, _logger);
|
||||
var workloadLevel = NormalizeWorkloadLevel(request.WorkloadLevel);
|
||||
var workerCount = ResolveWorkerCount(request.UseGpu, workloadLevel);
|
||||
var total = imageFiles.Count;
|
||||
if (total == 0)
|
||||
{
|
||||
var emptySummary = new AiExtractionRunSummary(0, 0, 0, 0, workloadLevel, workerCount);
|
||||
await onProgress(new AiExtractionProgressUpdate(0, 0, 100, 0, workloadLevel, workerCount)).ConfigureAwait(false);
|
||||
return emptySummary;
|
||||
}
|
||||
|
||||
var processed = 0;
|
||||
var total = imageFiles.Count;
|
||||
var failed = 0;
|
||||
Exception? firstFailure = null;
|
||||
|
||||
foreach (var file in imageFiles)
|
||||
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
|
||||
var resultChannel = Channel.CreateUnbounded<AiResultItem>(new UnboundedChannelOptions
|
||||
{
|
||||
token.ThrowIfCancellationRequested();
|
||||
SingleReader = true,
|
||||
SingleWriter = false
|
||||
});
|
||||
var fileChannel = Channel.CreateBounded<string>(new BoundedChannelOptions(Math.Max(workerCount * 2, 1))
|
||||
{
|
||||
SingleReader = false,
|
||||
SingleWriter = true,
|
||||
FullMode = BoundedChannelFullMode.Wait
|
||||
});
|
||||
var failureLock = new object();
|
||||
var logLock = new object();
|
||||
var lastLoggedElapsed = TimeSpan.Zero;
|
||||
|
||||
var extracted = string.Empty;
|
||||
|
||||
try
|
||||
var reporterTask = Task.Run(async () =>
|
||||
{
|
||||
await foreach (var result in resultChannel.Reader.ReadAllAsync(token).ConfigureAwait(false))
|
||||
{
|
||||
extracted = engine.ProcessImage(file).Text;
|
||||
extractedResults.Add(result);
|
||||
await onResult(result).ConfigureAwait(false);
|
||||
|
||||
var currentProcessed = Interlocked.Increment(ref processed);
|
||||
var averageImagesPerSecond = CalculateAverageImagesPerSecond(currentProcessed, stopwatch.Elapsed);
|
||||
var percent = currentProcessed * 100.0 / total;
|
||||
await onProgress(new AiExtractionProgressUpdate(total, currentProcessed, percent, averageImagesPerSecond, workloadLevel, workerCount)).ConfigureAwait(false);
|
||||
|
||||
var shouldLog = false;
|
||||
lock (logLock)
|
||||
{
|
||||
if (currentProcessed == total || stopwatch.Elapsed - lastLoggedElapsed >= TimeSpan.FromSeconds(2))
|
||||
{
|
||||
lastLoggedElapsed = stopwatch.Elapsed;
|
||||
shouldLog = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (shouldLog)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Number AI progress: {Processed}/{Total} ({Percent:F1}%), {ImagesPerSecond:F2} img/s avg, workload {WorkloadLevel} ({WorkerCount} workers)",
|
||||
currentProcessed,
|
||||
total,
|
||||
percent,
|
||||
averageImagesPerSecond,
|
||||
workloadLevel,
|
||||
workerCount);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
}, token);
|
||||
|
||||
var workerTasks = Enumerable.Range(0, workerCount)
|
||||
.Select(_ => Task.Run(async () =>
|
||||
{
|
||||
failed++;
|
||||
firstFailure ??= ex;
|
||||
_logger.LogWarning(ex, "Error processing AI OCR for {File}", file);
|
||||
using var engine = new NumberRecognitionEngine(modelConfiguration, _logger);
|
||||
await foreach (var file in fileChannel.Reader.ReadAllAsync(token).ConfigureAwait(false))
|
||||
{
|
||||
var extracted = string.Empty;
|
||||
|
||||
try
|
||||
{
|
||||
extracted = engine.ProcessImage(file).Text;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
lock (failureLock)
|
||||
{
|
||||
failed++;
|
||||
firstFailure ??= ex;
|
||||
}
|
||||
|
||||
_logger.LogWarning(ex, "Error processing AI OCR for {File}", file);
|
||||
}
|
||||
|
||||
await resultChannel.Writer.WriteAsync(new AiResultItem { Path = file, Text = extracted }, token).ConfigureAwait(false);
|
||||
}
|
||||
}, token))
|
||||
.ToArray();
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var file in imageFiles)
|
||||
{
|
||||
await fileChannel.Writer.WriteAsync(file, token).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
var result = new AiResultItem { Path = file, Text = extracted };
|
||||
extractedResults.Add(result);
|
||||
await onResult(result).ConfigureAwait(false);
|
||||
|
||||
processed++;
|
||||
var percent = total > 0 ? (processed * 100.0 / total) : 100.0;
|
||||
await onProgress(percent).ConfigureAwait(false);
|
||||
fileChannel.Writer.TryComplete();
|
||||
await Task.WhenAll(workerTasks).ConfigureAwait(false);
|
||||
}
|
||||
finally
|
||||
{
|
||||
fileChannel.Writer.TryComplete();
|
||||
resultChannel.Writer.TryComplete();
|
||||
await reporterTask.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
if (imageFiles.Count > 0 && failed == imageFiles.Count)
|
||||
|
|
@ -78,6 +155,23 @@ public class AiExtractionService : IAiExtractionService
|
|||
throw new InvalidOperationException($"AI OCR failed for all {imageFiles.Count} image(s). See previous log entries for details.", firstFailure);
|
||||
}
|
||||
|
||||
var summary = new AiExtractionRunSummary(
|
||||
total,
|
||||
processed,
|
||||
failed,
|
||||
CalculateAverageImagesPerSecond(processed, stopwatch.Elapsed),
|
||||
workloadLevel,
|
||||
workerCount);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Number AI completed: {Processed}/{Total} processed, {Failed} failures, {ImagesPerSecond:F2} img/s avg, workload {WorkloadLevel} ({WorkerCount} workers)",
|
||||
summary.ProcessedFiles,
|
||||
summary.TotalFiles,
|
||||
summary.FailedFiles,
|
||||
summary.AverageImagesPerSecond,
|
||||
summary.WorkloadLevel,
|
||||
summary.WorkerCount);
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(request.CsvOutputPath))
|
||||
{
|
||||
try
|
||||
|
|
@ -102,6 +196,43 @@ public class AiExtractionService : IAiExtractionService
|
|||
_logger.LogError(ex, "Failed to write CSV to {CsvOutputPath}", request.CsvOutputPath);
|
||||
}
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
private static double CalculateAverageImagesPerSecond(int processed, TimeSpan elapsed)
|
||||
{
|
||||
return elapsed.TotalSeconds > 0 ? processed / elapsed.TotalSeconds : 0;
|
||||
}
|
||||
|
||||
private static int NormalizeWorkloadLevel(int workloadLevel)
|
||||
{
|
||||
return Math.Clamp(workloadLevel, 1, 5);
|
||||
}
|
||||
|
||||
private static int ResolveWorkerCount(bool useGpu, int workloadLevel)
|
||||
{
|
||||
var normalized = NormalizeWorkloadLevel(workloadLevel);
|
||||
var maxWorkers = Math.Max(1, Environment.ProcessorCount);
|
||||
var requestedWorkers = useGpu
|
||||
? normalized switch
|
||||
{
|
||||
1 => 1,
|
||||
2 => 2,
|
||||
3 => 4,
|
||||
4 => 6,
|
||||
_ => 8
|
||||
}
|
||||
: normalized switch
|
||||
{
|
||||
1 => 1,
|
||||
2 => 2,
|
||||
3 => 3,
|
||||
4 => 4,
|
||||
_ => 5
|
||||
};
|
||||
|
||||
return Math.Min(requestedWorkers, maxWorkers);
|
||||
}
|
||||
|
||||
private static ModelConfiguration BuildModelConfiguration(string modelsFolderPath, bool useGpu)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue