using OpenCvSharp; using OpenCvSharp.Dnn; using System; using System.Diagnostics; using System.Collections.Concurrent; using System.Collections.Generic; using System.IO; using System.Linq; using System.Threading; using System.Threading.Tasks; namespace AIFotoONLUS.Core { /// /// NumberRecognitionEngine is a high-level wrapper that loads Darknet (YOLO) /// models through OpenCvSharp's DNN API and exposes simple synchronous and /// asynchronous methods to detect numeric text regions in images and recognize /// the digits contained within those regions. /// /// Overview /// - Loads two Darknet networks: a detection network (finds text regions) /// and a recognition network (recognizes digits inside a cropped region). /// - Uses OpenCvSharp (CvDnn) to create input blobs, run forward passes and /// perform non‑maximum suppression (NMS) on detection candidates. /// - Provides single-image and directory-level processing APIs. Directory /// processing supports parallel workers where each worker uses its own /// per-thread Net instances to allow concurrent forward calls. /// /// Threading and performance notes /// - The class constructs and owns two shared Net instances used by the /// simple (single-threaded) APIs. When doing parallel processing the /// implementation creates per-thread Net instances to avoid concurrent /// calls into the same Net object. A small fallback path exists that will /// call into the shared nets under a lock when needed. /// - OpenCV internal threading is enabled (Cv2.SetNumThreads) when supported. /// /// Diagnostics /// - When enabled via the configuration, crops may be saved to disk for /// debugging. The contains thresholds and /// paths used by the engine. /// using Microsoft.Extensions.Logging; public class NumberRecognitionEngine : IDisposable { private readonly Net _detectionNet; private readonly Net _recognitionNet; private readonly object _detectionLock = new(); private readonly object _recognitionLock = new(); private readonly ModelConfiguration _cfg; private readonly ILogger? _logger; private bool _disposed; /// /// Create a new instance of using the /// provided . The constructor loads the /// detection and recognition Darknet model files and prepares the OpenCV /// DNN nets for CPU inference. /// /// Model configuration containing file paths, thresholds /// and other options. Must not be null. /// /// This constructor will throw when /// any of the expected model files are missing. For logging purposes an /// overload accepting an is available. /// public NumberRecognitionEngine(ModelConfiguration cfg) : this(cfg, logger: null) { } /// /// Create a new instance of with an /// optional . The logger will receive diagnostic /// messages and errors produced by the engine during processing. /// /// Model configuration containing file paths and /// runtime thresholds. /// Optional logger for diagnostic messages. /// May be null. /// Thrown when /// is null. /// Thrown when one of the model /// files referenced by does not exist. public NumberRecognitionEngine(ModelConfiguration cfg, ILogger? logger) { _logger = logger; _cfg = cfg ?? throw new ArgumentNullException(nameof(cfg)); if (!File.Exists(_cfg.DetectionCfg) || !File.Exists(_cfg.DetectionWeights)) throw new FileNotFoundException("Detection model files not found.", _cfg.DetectionCfg); if (!File.Exists(_cfg.RecognitionCfg) || !File.Exists(_cfg.RecognitionWeights)) throw new FileNotFoundException("Recognition model files not found.", _cfg.RecognitionCfg); _detectionNet = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights); _recognitionNet = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); ConfigureNetRuntime(_detectionNet, _cfg.UseGpu); ConfigureNetRuntime(_recognitionNet, _cfg.UseGpu); // Let OpenCV use multiple threads internally (use number of logical processors) try { Cv2.SetNumThreads(Environment.ProcessorCount); } catch { // Ignore if not supported by OpenCvSharp build } } public void Dispose() { if (_disposed) return; _detectionNet?.Dispose(); _recognitionNet?.Dispose(); _disposed = true; GC.SuppressFinalize(this); } private static string SanitizeFileName(string name) { foreach (var c in Path.GetInvalidFileNameChars()) name = name.Replace(c, '_'); return name; } private string[] GetOutputLayerNames(Net net) => net.GetUnconnectedOutLayersNames(); private static void ConfigureNetRuntime(Net net, bool useGpu) { if (useGpu) { net.SetPreferableBackend(Backend.CUDA); net.SetPreferableTarget(Target.CUDA); return; } net.SetPreferableBackend(Backend.OPENCV); net.SetPreferableTarget(Target.CPU); } /// /// Detect text regions in the supplied image using the detection network. /// /// Input image as an OpenCvSharp . /// Must not be null. /// An enumerable of containing the /// bounding boxes, confidence and class information for each detected /// region. The results are already filtered with the configured /// confidence and NMS thresholds. public IEnumerable DetectTextRegions(Mat image) { if (image is null) throw new ArgumentNullException(nameof(image)); return DetectTextRegions(_detectionNet, image); } // Internal variant that accepts a Net instance so it can be used from parallel workers private IEnumerable DetectTextRegions(Net detectionNet, Mat image) { using var blob = CvDnn.BlobFromImage(image, 0.00392, _cfg.DetectionInputSize, new Scalar(0, 0, 0), true, false); detectionNet.SetInput(blob); var outNames = GetOutputLayerNames(detectionNet); var outsList = new List(); detectionNet.Forward(outsList, outNames); Mat[] outs = outsList.ToArray(); if (outs.Length == 0) { // Try per-output Forward calls as a fallback; use their results for detection if (outNames != null) { var fallback = new List(); for (int on = 0; on < outNames.Length; on++) { try { var single = detectionNet.Forward(outNames[on]); fallback.Add(single); } catch (Exception ex) { _logger?.LogError(ex, "Fallback Forward failed for {name}", outNames[on]); } } if (fallback.Count > 0) { outs = fallback.ToArray(); } } } // Diagnostic: dump outs shapes and a sample of values to help debugging try { // diagnostic dumping removed for performance; keep errors only } catch (Exception ex) { _logger?.LogError(ex, "Error dumping outs"); } var boxes = new List(); var confidences = new List(); var classIds = new List(); var centerXList = new List(); int imgW = image.Width; int imgH = image.Height; foreach (var outMat in outs) { for (int i = 0; i < outMat.Rows; i++) { float cx = outMat.At(i, 0) * imgW; float cy = outMat.At(i, 1) * imgH; float w = outMat.At(i, 2) * imgW; float h = outMat.At(i, 3) * imgH; // YOLO output layout: [cx, cy, w, h, objectness, class1, class2, ...] float objectness = outMat.At(i, 4); float maxScore = 0f; int bestClass = -1; for (int c = 5; c < outMat.Cols; c++) { float classProb = outMat.At(i, c); float score = objectness * classProb; // combine objectness and class probability if (score > maxScore) { maxScore = score; bestClass = c - 5; } } if (maxScore > _cfg.ConfidenceThreshold) { int x = (int)Math.Max(0, Math.Round(cx - w / 2)); int y = (int)Math.Max(0, Math.Round(cy - h / 2)); var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h)); boxes.Add(rect); confidences.Add(maxScore); classIds.Add(bestClass); centerXList.Add(cx); } } } if (boxes.Count == 0) return Enumerable.Empty(); CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] indices); var results = new List(); foreach (var idx in indices) { var b = boxes[idx]; double centerX = b.X + b.Width / 2.0; results.Add(new DetectedRegion(b, confidences[idx], classIds[idx], centerX)); } return results; } /// /// Recognize digits inside a cropped image region using the recognition /// network. The method runs the recognition network and returns the /// concatenated sequence of recognized digit labels ordered left-to-right. /// /// Cropped image containing digits as /// . Must not be null. /// Optional context string used for diagnostics /// (e.g. when saving crop image files). /// A string containing recognized digits in left-to-right order. /// Returns an empty string when no digits are recognized above the /// configured confidence threshold. public string RecognizeDigits(Mat croppedImage, string? context = null) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); // Optionally save crop image for diagnostics when enabled in configuration if (_cfg.EnableCropSaving) { try { var cropsDir = Path.Combine("logs", "crops"); Directory.CreateDirectory(cropsDir); var fname = $"{(string.IsNullOrEmpty(context) ? "crop" : SanitizeFileName(context))}_{DateTime.UtcNow:yyyyMMdd_HHmmss_fff}_{Guid.NewGuid():N}.jpg"; var full = Path.Combine(cropsDir, fname); Cv2.ImWrite(full, croppedImage); } catch (Exception ex) { _logger?.LogError(ex, "Failed saving crop for diagnostics"); } } using var blob = CvDnn.BlobFromImage(croppedImage, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false); _recognitionNet.SetInput(blob); var outNames = GetOutputLayerNames(_recognitionNet); var outsList = new List(); _recognitionNet.Forward(outsList, outNames); Mat[] outs = outsList.ToArray(); // Fallback: try per-output Forward if no mats were returned if (outs.Length == 0 && outNames != null) { var fallback = new List(); foreach (var n in outNames) { try { var m = _recognitionNet.Forward(n); fallback.Add(m); } catch (Exception ex) { _logger?.LogError(ex, "Recognition fallback forward failed for {name}", n); } } if (fallback.Count > 0) outs = fallback.ToArray(); } var boxes = new List(); var confidences = new List(); var classIds = new List(); var centerXList = new List(); int imgW = croppedImage.Width; int imgH = croppedImage.Height; foreach (var outMat in outs) { for (int i = 0; i < outMat.Rows; i++) { float cx = outMat.At(i, 0) * imgW; float cy = outMat.At(i, 1) * imgH; float w = outMat.At(i, 2) * imgW; float h = outMat.At(i, 3) * imgH; float objectness = outMat.At(i, 4); float maxScore = 0f; int bestClass = -1; for (int c = 5; c < outMat.Cols; c++) { float classProb = outMat.At(i, c); float score = objectness * classProb; if (score > maxScore) { maxScore = score; bestClass = c - 5; } } if (maxScore > _cfg.ConfidenceThreshold) { int x = (int)Math.Max(0, Math.Round(cx - w / 2)); int y = (int)Math.Max(0, Math.Round(cy - h / 2)); boxes.Add(new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h))); confidences.Add(maxScore); classIds.Add(bestClass); centerXList.Add(cx); } } } if (classIds.Count == 0) return string.Empty; CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] keep); var ordered = keep.Select(i => new { Idx = i, Cx = centerXList[i], ClassId = classIds[i] }) .OrderBy(x => x.Cx) .Select(x => _cfg.NumberClasses[x.ClassId]); return string.Concat(ordered); } /// /// Small DTO that describes the name and shape of a detection network /// forward output used for diagnostics. /// /// Layer/output name. /// Number of rows in the output Mat. /// Number of columns in the output Mat. public record DetectionOutput(string Name, int Rows, int Cols); /// /// Result returned by , contains /// the recognized text result and an array describing detection network /// forward outputs (shapes and names) which are useful for debugging /// model output layout mismatches. /// /// Recognition result for the processed image. /// Array describing detection net outputs. public record DiagnosticResult(ImageResult Result, DetectionOutput[] DetectionOutputs); /// /// Process a single image file and return the recognition result together /// with detection network forward output shapes for diagnostics. This /// method reads the image from disk, runs a forward pass over the /// detection network to capture the raw output Mat shapes and then calls /// the normal processing pipeline to return the recognized text. /// public DiagnosticResult ProcessFileWithDiagnostics(string filePath) { if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath); using var image = Cv2.ImRead(filePath); // prepare input blob for detection net using var blob = CvDnn.BlobFromImage(image, 0.00392, _cfg.DetectionInputSize, new Scalar(0, 0, 0), true, false); _detectionNet.SetInput(blob); var outNames = GetOutputLayerNames(_detectionNet); var outsList = new List(); _detectionNet.Forward(outsList, outNames); // fallback: if no mats produced, try per-name Forward if (outsList.Count == 0 && outNames != null) { foreach (var n in outNames) { try { var m = _detectionNet.Forward(n); outsList.Add(m); } catch { } } } var outputs = outsList.Select((m, i) => new DetectionOutput(outNames != null && i < outNames.Length ? outNames[i] : $"out{i}", m.Rows, m.Cols)).ToArray(); // run the normal processing to get recognized text var imgRes = ProcessImage(filePath); return new DiagnosticResult(imgRes, outputs); } /// /// Process a single image file and return the recognized text as an /// . The method detects candidate text regions /// and runs recognition on each crop. Multiple recognized digit sequences /// are joined with a comma in the returned . /// /// Path to an image file on disk. Supported /// formats depend on OpenCV (typically JPEG, PNG, ...). /// An containing the file name and /// recognized text (possibly empty). public ImageResult ProcessImage(string filePath) { if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath); using var image = Cv2.ImRead(filePath); var regions = DetectTextRegions(image).ToArray(); var texts = new List(); foreach (var r in regions) { using var crop = new Mat(image, r.BoundingBox); var ctx = $"{Path.GetFileName(filePath)}_{r.BoundingBox.X}_{r.BoundingBox.Y}_{r.BoundingBox.Width}x{r.BoundingBox.Height}"; var txt = RecognizeDigits(crop, ctx); if (!string.IsNullOrEmpty(txt)) texts.Add(txt); } var result = new ImageResult(Path.GetFileName(filePath), string.Join(",", texts), filePath); if (!string.IsNullOrEmpty(result.Text)) _logger?.LogInformation("Processed image {file} -> {text}", result.FileName, result.Text); else _logger?.LogDebug("Processed image {file} -> (no text)", result.FileName); return result; } /// /// Process all JPEG images in a directory and return the recognition /// results. This is a blocking wrapper over . /// /// Path to a directory containing images. /// If true, files whose names start with /// "tn_" will be skipped (convention used to mark text-negative images). /// Collection of ordered by file name. public IEnumerable ProcessDirectory(string directoryPath, bool skipTextNegative = false) { // Simple wrapper over async implementation return ProcessDirectoryAsync(directoryPath, skipTextNegative).GetAwaiter().GetResult(); } public async Task> ProcessDirectoryAsync(string directoryPath, bool skipTextNegative = false, bool recursive = false, IProgress? progress = null, IProgress? resultProgress = null, CancellationToken cancellationToken = default) { if (!Directory.Exists(directoryPath)) throw new DirectoryNotFoundException(directoryPath); var searchOption = recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly; var files = Directory.EnumerateFiles(directoryPath, "*.*", searchOption) .Where(f => f.EndsWith(".jpg", StringComparison.OrdinalIgnoreCase) || f.EndsWith(".jpeg", StringComparison.OrdinalIgnoreCase)) .ToArray(); var bag = new ConcurrentBag(); var dop = Environment.ProcessorCount; var total = files.Length; var processed = 0; var sw = System.Diagnostics.Stopwatch.StartNew(); // Per-thread nets (each worker gets its own pair) to allow parallel forward calls var netsBag = new ConcurrentBag<(Net detNet, Net recNet)>(); var threadLocalNets = new ThreadLocal<(Net detNet, Net recNet)>(() => { var det = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights); var rec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); ConfigureNetRuntime(det, _cfg.UseGpu); ConfigureNetRuntime(rec, _cfg.UseGpu); netsBag.Add((det, rec)); return (det, rec); }); await Task.Run(() => { try { Parallel.ForEach(files, new ParallelOptions { MaxDegreeOfParallelism = dop, CancellationToken = cancellationToken }, f => { cancellationToken.ThrowIfCancellationRequested(); var filename = Path.GetFileName(f); if (skipTextNegative && filename.StartsWith("tn_", StringComparison.OrdinalIgnoreCase)) return; try { var nets = threadLocalNets.Value; using var image = Cv2.ImRead(f); var regions = DetectTextRegions(nets.detNet, image).ToArray(); var texts = new List(); // minimal logging for performance foreach (var r in regions) { using var crop = new Mat(image, r.BoundingBox); var ctx = $"{filename}_{r.BoundingBox.X}_{r.BoundingBox.Y}_{r.BoundingBox.Width}x{r.BoundingBox.Height}"; var txt = RecognizeDigits(crop, nets.recNet, ctx); // minimal logging for performance // Fallback: if empty, try a fresh net (diagnostic) if (string.IsNullOrEmpty(txt)) { try { using var tempRec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); ConfigureNetRuntime(tempRec, _cfg.UseGpu); var alt = RecognizeDigits(crop, tempRec, ctx); if (!string.IsNullOrEmpty(alt)) txt = alt; } catch { } } if (!string.IsNullOrEmpty(txt)) texts.Add(txt); } // If no text was recognized with per-thread nets, try one more time using the shared nets under a lock if (texts.Count == 0) { try { DetectedRegion[] sharedRegions; lock (_detectionLock) { sharedRegions = DetectTextRegions(image).ToArray(); } var sharedTexts = new List(); foreach (var r2 in sharedRegions) { using var crop2 = new Mat(image, r2.BoundingBox); var ctx2 = $"{filename}_{r2.BoundingBox.X}_{r2.BoundingBox.Y}_{r2.BoundingBox.Width}x{r2.BoundingBox.Height}"; string txt2; lock (_recognitionLock) { txt2 = RecognizeDigits(crop2, ctx2); } if (!string.IsNullOrEmpty(txt2)) { sharedTexts.Add(txt2); } } if (sharedTexts.Count > 0) { texts = sharedTexts; } } catch { // ignore fallback errors } } var imgRes = new ImageResult(filename, string.Join(",", texts), f); if (!string.IsNullOrEmpty(imgRes.Text)) _logger?.LogInformation("[{file}] Result: {text}", imgRes.FileName, imgRes.Text); bag.Add(imgRes); resultProgress?.Report(imgRes); } catch (Exception ex) { _logger?.LogError(ex, "Error processing image {file}", filename); bag.Add(new ImageResult(filename, string.Empty, f)); } finally { var proc = Interlocked.Increment(ref processed); if (progress != null) { var elapsed = Math.Max(1, sw.ElapsedMilliseconds); var ips = proc * 1000.0 / elapsed; progress.Report(new ProcessingStats(total, proc, ips)); } } }); } catch (OperationCanceledException) { // Cancellation requested — exit gracefully and return partial results } }, cancellationToken).ConfigureAwait(false); // dispose created nets while (netsBag.TryTake(out var pair)) { try { pair.detNet.Dispose(); } catch { } try { pair.recNet.Dispose(); } catch { } } threadLocalNets.Dispose(); return bag.OrderBy(b => b.FileName).ToList(); } // Overload RecognizeDigits that accepts a Net for worker threads /// /// Worker overload of that /// accepts a instance. This is used by the parallel /// processing pipeline where each worker owns its own Net instance. /// /// Cropped region to recognize. /// Recognition to execute /// the forward pass with. /// Optional context string for diagnostics. /// Recognized digit sequence or empty string. private string RecognizeDigits(Mat croppedImage, Net recognitionNet, string? context = null) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); // Optionally save crop image for diagnostics when enabled in configuration if (_cfg.EnableCropSaving) { try { var cropsDir = Path.Combine("logs", "crops"); Directory.CreateDirectory(cropsDir); var fname = $"{(string.IsNullOrEmpty(context) ? "crop" : SanitizeFileName(context))}_{DateTime.UtcNow:yyyyMMdd_HHmmss_fff}_{Guid.NewGuid():N}.jpg"; var full = Path.Combine(cropsDir, fname); Cv2.ImWrite(full, croppedImage); } catch (Exception ex) { _logger?.LogError(ex, "Failed saving crop for diagnostics"); } } using var blob = CvDnn.BlobFromImage(croppedImage, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false); recognitionNet.SetInput(blob); var outNames = GetOutputLayerNames(recognitionNet); var outsList = new List(); recognitionNet.Forward(outsList, outNames); Mat[] outs = outsList.ToArray(); var boxes = new List(); var confidences = new List(); var classIds = new List(); var centerXList = new List(); int imgW = croppedImage.Width; int imgH = croppedImage.Height; // Diagnostic: if no outs, try per-output Forward if (outs.Length == 0 && outNames != null) { var fallback = new List(); foreach (var n in outNames) { try { var m = recognitionNet.Forward(n); fallback.Add(m); } catch (Exception ex) { _logger?.LogError(ex, "Recognition fallback forward failed for {name}", n); } } if (fallback.Count > 0) outs = fallback.ToArray(); } // Diagnostic: dump outs shapes and a sample of values to help debugging try { // diagnostic dumping removed for performance; keep errors only } catch (Exception ex) { _logger?.LogError(ex, "Error dumping recognition outs"); } foreach (var outMat in outs) { for (int i = 0; i < outMat.Rows; i++) { float cx = outMat.At(i, 0) * imgW; float cy = outMat.At(i, 1) * imgH; float w = outMat.At(i, 2) * imgW; float h = outMat.At(i, 3) * imgH; float objectness = outMat.At(i, 4); float maxScore = 0f; int bestClass = -1; for (int c = 5; c < outMat.Cols; c++) { float classProb = outMat.At(i, c); float score = objectness * classProb; if (score > maxScore) { maxScore = score; bestClass = c - 5; } } if (maxScore > _cfg.ConfidenceThreshold) { int x = (int)Math.Max(0, Math.Round(cx - w / 2)); int y = (int)Math.Max(0, Math.Round(cy - h / 2)); boxes.Add(new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h))); confidences.Add(maxScore); classIds.Add(bestClass); centerXList.Add(cx); } } } if (classIds.Count == 0) return string.Empty; CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] keep); var ordered = keep.Select(i => new { Idx = i, Cx = centerXList[i], ClassId = classIds[i] }) .OrderBy(x => x.Cx) .Select(x => _cfg.NumberClasses[x.ClassId]); return string.Concat(ordered); } } }