using OpenCvSharp; using OpenCvSharp.Dnn; using System; using System.Diagnostics; using System.Collections.Concurrent; using System.Collections.Generic; using System.IO; using System.Linq; using System.Threading; using System.Threading.Tasks; namespace AIFotoONLUS.Core { /// /// NumberRecognitionEngine: loads Darknet models via OpenCvSharp and /// provides methods to detect text regions and recognize digits. /// using Microsoft.Extensions.Logging; public class NumberRecognitionEngine : IDisposable { private readonly Net _detectionNet; private readonly Net _recognitionNet; private readonly object _detectionLock = new(); private readonly object _recognitionLock = new(); private readonly ModelConfiguration _cfg; private readonly ILogger? _logger; private bool _disposed; public NumberRecognitionEngine(ModelConfiguration cfg) : this(cfg, logger: null) { } public NumberRecognitionEngine(ModelConfiguration cfg, ILogger? logger) { _logger = logger; _cfg = cfg ?? throw new ArgumentNullException(nameof(cfg)); if (!File.Exists(_cfg.DetectionCfg) || !File.Exists(_cfg.DetectionWeights)) throw new FileNotFoundException("Detection model files not found.", _cfg.DetectionCfg); if (!File.Exists(_cfg.RecognitionCfg) || !File.Exists(_cfg.RecognitionWeights)) throw new FileNotFoundException("Recognition model files not found.", _cfg.RecognitionCfg); _detectionNet = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights); _recognitionNet = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); _detectionNet.SetPreferableBackend(Backend.OPENCV); _detectionNet.SetPreferableTarget(Target.CPU); _recognitionNet.SetPreferableBackend(Backend.OPENCV); _recognitionNet.SetPreferableTarget(Target.CPU); // Let OpenCV use multiple threads internally (use number of logical processors) try { Cv2.SetNumThreads(Environment.ProcessorCount); } catch { // Ignore if not supported by OpenCvSharp build } } public void Dispose() { if (_disposed) return; _detectionNet?.Dispose(); _recognitionNet?.Dispose(); _disposed = true; GC.SuppressFinalize(this); } private static string SanitizeFileName(string name) { foreach (var c in Path.GetInvalidFileNameChars()) name = name.Replace(c, '_'); return name; } private string[] GetOutputLayerNames(Net net) => net.GetUnconnectedOutLayersNames(); public IEnumerable DetectTextRegions(Mat image) { if (image is null) throw new ArgumentNullException(nameof(image)); return DetectTextRegions(_detectionNet, image); } // Internal variant that accepts a Net instance so it can be used from parallel workers private IEnumerable DetectTextRegions(Net detectionNet, Mat image) { using var blob = CvDnn.BlobFromImage(image, 0.00392, _cfg.DetectionInputSize, new Scalar(0, 0, 0), true, false); detectionNet.SetInput(blob); var outNames = GetOutputLayerNames(detectionNet); var outsList = new List(); detectionNet.Forward(outsList, outNames); Mat[] outs = outsList.ToArray(); if (outs.Length == 0) { // Try per-output Forward calls as a fallback; use their results for detection if (outNames != null) { var fallback = new List(); for (int on = 0; on < outNames.Length; on++) { try { var single = detectionNet.Forward(outNames[on]); fallback.Add(single); } catch (Exception ex) { _logger?.LogError(ex, "Fallback Forward failed for {name}", outNames[on]); } } if (fallback.Count > 0) { outs = fallback.ToArray(); } } } // Diagnostic: dump outs shapes and a sample of values to help debugging try { // diagnostic dumping removed for performance; keep errors only } catch (Exception ex) { _logger?.LogError(ex, "Error dumping outs"); } var boxes = new List(); var confidences = new List(); var classIds = new List(); var centerXList = new List(); int imgW = image.Width; int imgH = image.Height; foreach (var outMat in outs) { for (int i = 0; i < outMat.Rows; i++) { float cx = outMat.At(i, 0) * imgW; float cy = outMat.At(i, 1) * imgH; float w = outMat.At(i, 2) * imgW; float h = outMat.At(i, 3) * imgH; // YOLO output layout: [cx, cy, w, h, objectness, class1, class2, ...] float objectness = outMat.At(i, 4); float maxScore = 0f; int bestClass = -1; for (int c = 5; c < outMat.Cols; c++) { float classProb = outMat.At(i, c); float score = objectness * classProb; // combine objectness and class probability if (score > maxScore) { maxScore = score; bestClass = c - 5; } } if (maxScore > _cfg.ConfidenceThreshold) { int x = (int)Math.Max(0, Math.Round(cx - w / 2)); int y = (int)Math.Max(0, Math.Round(cy - h / 2)); var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h)); boxes.Add(rect); confidences.Add(maxScore); classIds.Add(bestClass); centerXList.Add(cx); } } } if (boxes.Count == 0) return Enumerable.Empty(); CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] indices); var results = new List(); foreach (var idx in indices) { var b = boxes[idx]; double centerX = b.X + b.Width / 2.0; results.Add(new DetectedRegion(b, confidences[idx], classIds[idx], centerX)); } return results; } public string RecognizeDigits(Mat croppedImage, string? context = null) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); // Optionally save crop image for diagnostics when enabled in configuration if (_cfg.EnableCropSaving) { try { var cropsDir = Path.Combine("logs", "crops"); Directory.CreateDirectory(cropsDir); var fname = $"{(string.IsNullOrEmpty(context) ? "crop" : SanitizeFileName(context))}_{DateTime.UtcNow:yyyyMMdd_HHmmss_fff}_{Guid.NewGuid():N}.jpg"; var full = Path.Combine(cropsDir, fname); Cv2.ImWrite(full, croppedImage); } catch (Exception ex) { _logger?.LogError(ex, "Failed saving crop for diagnostics"); } } using var blob = CvDnn.BlobFromImage(croppedImage, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false); _recognitionNet.SetInput(blob); var outNames = GetOutputLayerNames(_recognitionNet); var outsList = new List(); _recognitionNet.Forward(outsList, outNames); Mat[] outs = outsList.ToArray(); // Fallback: try per-output Forward if no mats were returned if (outs.Length == 0 && outNames != null) { var fallback = new List(); foreach (var n in outNames) { try { var m = _recognitionNet.Forward(n); fallback.Add(m); } catch (Exception ex) { _logger?.LogError(ex, "Recognition fallback forward failed for {name}", n); } } if (fallback.Count > 0) outs = fallback.ToArray(); } var boxes = new List(); var confidences = new List(); var classIds = new List(); var centerXList = new List(); int imgW = croppedImage.Width; int imgH = croppedImage.Height; foreach (var outMat in outs) { for (int i = 0; i < outMat.Rows; i++) { float cx = outMat.At(i, 0) * imgW; float cy = outMat.At(i, 1) * imgH; float w = outMat.At(i, 2) * imgW; float h = outMat.At(i, 3) * imgH; float objectness = outMat.At(i, 4); float maxScore = 0f; int bestClass = -1; for (int c = 5; c < outMat.Cols; c++) { float classProb = outMat.At(i, c); float score = objectness * classProb; if (score > maxScore) { maxScore = score; bestClass = c - 5; } } if (maxScore > _cfg.ConfidenceThreshold) { int x = (int)Math.Max(0, Math.Round(cx - w / 2)); int y = (int)Math.Max(0, Math.Round(cy - h / 2)); boxes.Add(new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h))); confidences.Add(maxScore); classIds.Add(bestClass); centerXList.Add(cx); } } } if (classIds.Count == 0) return string.Empty; CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] keep); var ordered = keep.Select(i => new { Idx = i, Cx = centerXList[i], ClassId = classIds[i] }) .OrderBy(x => x.Cx) .Select(x => _cfg.NumberClasses[x.ClassId]); return string.Concat(ordered); } public record DetectionOutput(string Name, int Rows, int Cols); public record DiagnosticResult(ImageResult Result, DetectionOutput[] DetectionOutputs); /// /// Process a single image file and return the recognition result together with /// detection network forward output shapes for diagnostics. /// public DiagnosticResult ProcessFileWithDiagnostics(string filePath) { if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath); using var image = Cv2.ImRead(filePath); // prepare input blob for detection net using var blob = CvDnn.BlobFromImage(image, 0.00392, _cfg.DetectionInputSize, new Scalar(0, 0, 0), true, false); _detectionNet.SetInput(blob); var outNames = GetOutputLayerNames(_detectionNet); var outsList = new List(); _detectionNet.Forward(outsList, outNames); // fallback: if no mats produced, try per-name Forward if (outsList.Count == 0 && outNames != null) { foreach (var n in outNames) { try { var m = _detectionNet.Forward(n); outsList.Add(m); } catch { } } } var outputs = outsList.Select((m, i) => new DetectionOutput(outNames != null && i < outNames.Length ? outNames[i] : $"out{i}", m.Rows, m.Cols)).ToArray(); // run the normal processing to get recognized text var imgRes = ProcessImage(filePath); return new DiagnosticResult(imgRes, outputs); } public ImageResult ProcessImage(string filePath) { if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath); using var image = Cv2.ImRead(filePath); var regions = DetectTextRegions(image).ToArray(); var texts = new List(); foreach (var r in regions) { using var crop = new Mat(image, r.BoundingBox); var ctx = $"{Path.GetFileName(filePath)}_{r.BoundingBox.X}_{r.BoundingBox.Y}_{r.BoundingBox.Width}x{r.BoundingBox.Height}"; var txt = RecognizeDigits(crop, ctx); if (!string.IsNullOrEmpty(txt)) texts.Add(txt); } var result = new ImageResult(Path.GetFileName(filePath), string.Join(",", texts), filePath); if (!string.IsNullOrEmpty(result.Text)) _logger?.LogInformation("Processed image {file} -> {text}", result.FileName, result.Text); else _logger?.LogDebug("Processed image {file} -> (no text)", result.FileName); return result; } public IEnumerable ProcessDirectory(string directoryPath, bool skipTextNegative = false) { // Simple wrapper over async implementation return ProcessDirectoryAsync(directoryPath, skipTextNegative).GetAwaiter().GetResult(); } public async Task> ProcessDirectoryAsync(string directoryPath, bool skipTextNegative = false, bool recursive = false, IProgress? progress = null, IProgress? resultProgress = null, CancellationToken cancellationToken = default) { if (!Directory.Exists(directoryPath)) throw new DirectoryNotFoundException(directoryPath); var searchOption = recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly; var files = Directory.EnumerateFiles(directoryPath, "*.*", searchOption) .Where(f => f.EndsWith(".jpg", StringComparison.OrdinalIgnoreCase) || f.EndsWith(".jpeg", StringComparison.OrdinalIgnoreCase)) .ToArray(); var bag = new ConcurrentBag(); var dop = Environment.ProcessorCount; var total = files.Length; var processed = 0; var sw = System.Diagnostics.Stopwatch.StartNew(); // Per-thread nets (each worker gets its own pair) to allow parallel forward calls var netsBag = new ConcurrentBag<(Net detNet, Net recNet)>(); var threadLocalNets = new ThreadLocal<(Net detNet, Net recNet)>(() => { var det = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights); var rec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); det.SetPreferableBackend(Backend.OPENCV); det.SetPreferableTarget(Target.CPU); rec.SetPreferableBackend(Backend.OPENCV); rec.SetPreferableTarget(Target.CPU); netsBag.Add((det, rec)); return (det, rec); }); await Task.Run(() => { try { Parallel.ForEach(files, new ParallelOptions { MaxDegreeOfParallelism = dop, CancellationToken = cancellationToken }, f => { cancellationToken.ThrowIfCancellationRequested(); var filename = Path.GetFileName(f); if (skipTextNegative && filename.StartsWith("tn_", StringComparison.OrdinalIgnoreCase)) return; try { var nets = threadLocalNets.Value; using var image = Cv2.ImRead(f); var regions = DetectTextRegions(nets.detNet, image).ToArray(); var texts = new List(); // minimal logging for performance foreach (var r in regions) { using var crop = new Mat(image, r.BoundingBox); var ctx = $"{filename}_{r.BoundingBox.X}_{r.BoundingBox.Y}_{r.BoundingBox.Width}x{r.BoundingBox.Height}"; var txt = RecognizeDigits(crop, nets.recNet, ctx); // minimal logging for performance // Fallback: if empty, try a fresh net (diagnostic) if (string.IsNullOrEmpty(txt)) { try { using var tempRec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); tempRec.SetPreferableBackend(Backend.OPENCV); tempRec.SetPreferableTarget(Target.CPU); var alt = RecognizeDigits(crop, tempRec, ctx); if (!string.IsNullOrEmpty(alt)) txt = alt; } catch { } } if (!string.IsNullOrEmpty(txt)) texts.Add(txt); } // If no text was recognized with per-thread nets, try one more time using the shared nets under a lock if (texts.Count == 0) { try { DetectedRegion[] sharedRegions; lock (_detectionLock) { sharedRegions = DetectTextRegions(image).ToArray(); } var sharedTexts = new List(); foreach (var r2 in sharedRegions) { using var crop2 = new Mat(image, r2.BoundingBox); var ctx2 = $"{filename}_{r2.BoundingBox.X}_{r2.BoundingBox.Y}_{r2.BoundingBox.Width}x{r2.BoundingBox.Height}"; string txt2; lock (_recognitionLock) { txt2 = RecognizeDigits(crop2, ctx2); } if (!string.IsNullOrEmpty(txt2)) { sharedTexts.Add(txt2); } } if (sharedTexts.Count > 0) { texts = sharedTexts; } } catch { // ignore fallback errors } } var imgRes = new ImageResult(filename, string.Join(",", texts), f); if (!string.IsNullOrEmpty(imgRes.Text)) _logger?.LogInformation("[{file}] Result: {text}", imgRes.FileName, imgRes.Text); bag.Add(imgRes); resultProgress?.Report(imgRes); } catch (Exception ex) { _logger?.LogError(ex, "Error processing image {file}", filename); bag.Add(new ImageResult(filename, string.Empty, f)); } finally { var proc = Interlocked.Increment(ref processed); if (progress != null) { var elapsed = Math.Max(1, sw.ElapsedMilliseconds); var ips = proc * 1000.0 / elapsed; progress.Report(new ProcessingStats(total, proc, ips)); } } }); } catch (OperationCanceledException) { // Cancellation requested — exit gracefully and return partial results } }, cancellationToken).ConfigureAwait(false); // dispose created nets while (netsBag.TryTake(out var pair)) { try { pair.detNet.Dispose(); } catch { } try { pair.recNet.Dispose(); } catch { } } threadLocalNets.Dispose(); return bag.OrderBy(b => b.FileName).ToList(); } // Overload RecognizeDigits that accepts a Net for worker threads private string RecognizeDigits(Mat croppedImage, Net recognitionNet, string? context = null) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); // Optionally save crop image for diagnostics when enabled in configuration if (_cfg.EnableCropSaving) { try { var cropsDir = Path.Combine("logs", "crops"); Directory.CreateDirectory(cropsDir); var fname = $"{(string.IsNullOrEmpty(context) ? "crop" : SanitizeFileName(context))}_{DateTime.UtcNow:yyyyMMdd_HHmmss_fff}_{Guid.NewGuid():N}.jpg"; var full = Path.Combine(cropsDir, fname); Cv2.ImWrite(full, croppedImage); } catch (Exception ex) { _logger?.LogError(ex, "Failed saving crop for diagnostics"); } } using var blob = CvDnn.BlobFromImage(croppedImage, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false); recognitionNet.SetInput(blob); var outNames = GetOutputLayerNames(recognitionNet); var outsList = new List(); recognitionNet.Forward(outsList, outNames); Mat[] outs = outsList.ToArray(); var boxes = new List(); var confidences = new List(); var classIds = new List(); var centerXList = new List(); int imgW = croppedImage.Width; int imgH = croppedImage.Height; // Diagnostic: if no outs, try per-output Forward if (outs.Length == 0 && outNames != null) { var fallback = new List(); foreach (var n in outNames) { try { var m = recognitionNet.Forward(n); fallback.Add(m); } catch (Exception ex) { _logger?.LogError(ex, "Recognition fallback forward failed for {name}", n); } } if (fallback.Count > 0) outs = fallback.ToArray(); } // Diagnostic: dump outs shapes and a sample of values to help debugging try { // diagnostic dumping removed for performance; keep errors only } catch (Exception ex) { _logger?.LogError(ex, "Error dumping recognition outs"); } foreach (var outMat in outs) { for (int i = 0; i < outMat.Rows; i++) { float cx = outMat.At(i, 0) * imgW; float cy = outMat.At(i, 1) * imgH; float w = outMat.At(i, 2) * imgW; float h = outMat.At(i, 3) * imgH; float objectness = outMat.At(i, 4); float maxScore = 0f; int bestClass = -1; for (int c = 5; c < outMat.Cols; c++) { float classProb = outMat.At(i, c); float score = objectness * classProb; if (score > maxScore) { maxScore = score; bestClass = c - 5; } } if (maxScore > _cfg.ConfidenceThreshold) { int x = (int)Math.Max(0, Math.Round(cx - w / 2)); int y = (int)Math.Max(0, Math.Round(cy - h / 2)); boxes.Add(new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h))); confidences.Add(maxScore); classIds.Add(bestClass); centerXList.Add(cx); } } } if (classIds.Count == 0) return string.Empty; CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] keep); var ordered = keep.Select(i => new { Idx = i, Cx = centerXList[i], ClassId = classIds[i] }) .OrderBy(x => x.Cx) .Select(x => _cfg.NumberClasses[x.ClassId]); return string.Concat(ordered); } } }