using OpenCvSharp; using OpenCvSharp.Dnn; using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.IO; using System.Linq; using System.Threading; using System.Threading.Tasks; namespace AIFotoONLUS.Core { /// /// NumberRecognitionEngine: loads Darknet models via OpenCvSharp and /// provides methods to detect text regions and recognize digits. /// public class NumberRecognitionEngine : IDisposable { private readonly Net _detectionNet; private readonly Net _recognitionNet; private readonly ModelConfiguration _cfg; private bool _disposed; public NumberRecognitionEngine(ModelConfiguration cfg) { _cfg = cfg ?? throw new ArgumentNullException(nameof(cfg)); if (!File.Exists(_cfg.DetectionCfg) || !File.Exists(_cfg.DetectionWeights)) throw new FileNotFoundException("Detection model files not found.", _cfg.DetectionCfg); if (!File.Exists(_cfg.RecognitionCfg) || !File.Exists(_cfg.RecognitionWeights)) throw new FileNotFoundException("Recognition model files not found.", _cfg.RecognitionCfg); _detectionNet = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights); _recognitionNet = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); _detectionNet.SetPreferableBackend(Backend.OPENCV); _detectionNet.SetPreferableTarget(Target.CPU); _recognitionNet.SetPreferableBackend(Backend.OPENCV); _recognitionNet.SetPreferableTarget(Target.CPU); // Let OpenCV use multiple threads internally (use number of logical processors) try { Cv2.SetNumThreads(Environment.ProcessorCount); } catch { // Ignore if not supported by OpenCvSharp build } } public void Dispose() { if (_disposed) return; _detectionNet?.Dispose(); _recognitionNet?.Dispose(); _disposed = true; GC.SuppressFinalize(this); } private string[] GetOutputLayerNames(Net net) => net.GetUnconnectedOutLayersNames(); public IEnumerable DetectTextRegions(Mat image) { if (image is null) throw new ArgumentNullException(nameof(image)); return DetectTextRegions(_detectionNet, image); } // Internal variant that accepts a Net instance so it can be used from parallel workers private IEnumerable DetectTextRegions(Net detectionNet, Mat image) { using var blob = CvDnn.BlobFromImage(image, 0.00392, _cfg.DetectionInputSize, new Scalar(0, 0, 0), true, false); detectionNet.SetInput(blob); var outNames = GetOutputLayerNames(detectionNet); var outsList = new List(); detectionNet.Forward(outsList, outNames); Mat[] outs = outsList.ToArray(); var boxes = new List(); var confidences = new List(); var classIds = new List(); var centerXList = new List(); int imgW = image.Width; int imgH = image.Height; foreach (var outMat in outs) { for (int i = 0; i < outMat.Rows; i++) { float cx = outMat.At(i, 0) * imgW; float cy = outMat.At(i, 1) * imgH; float w = outMat.At(i, 2) * imgW; float h = outMat.At(i, 3) * imgH; float maxScore = 0f; int bestClass = -1; for (int c = 5; c < outMat.Cols; c++) { float score = outMat.At(i, c); if (score > maxScore) { maxScore = score; bestClass = c - 5; } } if (maxScore > _cfg.ConfidenceThreshold) { int x = (int)Math.Max(0, Math.Round(cx - w / 2)); int y = (int)Math.Max(0, Math.Round(cy - h / 2)); var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h)); boxes.Add(rect); confidences.Add(maxScore); classIds.Add(bestClass); centerXList.Add(cx); } } } if (boxes.Count == 0) return Enumerable.Empty(); CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] indices); var results = new List(); foreach (var idx in indices) { var b = boxes[idx]; double centerX = b.X + b.Width / 2.0; results.Add(new DetectedRegion(b, confidences[idx], classIds[idx], centerX)); } return results; } public string RecognizeDigits(Mat croppedImage) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); using var blob = CvDnn.BlobFromImage(croppedImage, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false); _recognitionNet.SetInput(blob); var outNames = GetOutputLayerNames(_recognitionNet); var outsList = new List(); _recognitionNet.Forward(outsList, outNames); Mat[] outs = outsList.ToArray(); var boxes = new List(); var confidences = new List(); var classIds = new List(); var centerXList = new List(); int imgW = croppedImage.Width; int imgH = croppedImage.Height; foreach (var outMat in outs) { for (int i = 0; i < outMat.Rows; i++) { float cx = outMat.At(i, 0) * imgW; float cy = outMat.At(i, 1) * imgH; float w = outMat.At(i, 2) * imgW; float h = outMat.At(i, 3) * imgH; float maxScore = 0f; int bestClass = -1; for (int c = 5; c < outMat.Cols; c++) { float score = outMat.At(i, c); if (score > maxScore) { maxScore = score; bestClass = c - 5; } } if (maxScore > _cfg.ConfidenceThreshold) { int x = (int)Math.Max(0, Math.Round(cx - w / 2)); int y = (int)Math.Max(0, Math.Round(cy - h / 2)); boxes.Add(new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h))); confidences.Add(maxScore); classIds.Add(bestClass); centerXList.Add(cx); } } } if (classIds.Count == 0) return string.Empty; CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] keep); var ordered = keep.Select(i => new { Idx = i, Cx = centerXList[i], ClassId = classIds[i] }) .OrderBy(x => x.Cx) .Select(x => _cfg.NumberClasses[x.ClassId]); return string.Concat(ordered); } public ImageResult ProcessImage(string filePath) { if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath); using var image = Cv2.ImRead(filePath); var regions = DetectTextRegions(image).ToArray(); var texts = new List(); foreach (var r in regions) { using var crop = new Mat(image, r.BoundingBox); var txt = RecognizeDigits(crop); if (!string.IsNullOrEmpty(txt)) texts.Add(txt); } return new ImageResult(Path.GetFileName(filePath), string.Join(",", texts), filePath); } public IEnumerable ProcessDirectory(string directoryPath, bool skipTextNegative = false) { // Simple wrapper over async implementation return ProcessDirectoryAsync(directoryPath, skipTextNegative).GetAwaiter().GetResult(); } public async Task> ProcessDirectoryAsync(string directoryPath, bool skipTextNegative = false, bool recursive = false, IProgress? progress = null, IProgress? resultProgress = null, CancellationToken cancellationToken = default) { if (!Directory.Exists(directoryPath)) throw new DirectoryNotFoundException(directoryPath); var searchOption = recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly; var files = Directory.EnumerateFiles(directoryPath, "*.*", searchOption) .Where(f => f.EndsWith(".jpg", StringComparison.OrdinalIgnoreCase) || f.EndsWith(".jpeg", StringComparison.OrdinalIgnoreCase)) .ToArray(); var bag = new ConcurrentBag(); var dop = Environment.ProcessorCount; // Create a ThreadLocal pair of nets to avoid reloading for every file while still avoiding concurrent use of the same Net // Also keep a ConcurrentBag of created nets so we can dispose them safely from this thread var netsBag = new ConcurrentBag<(Net detNet, Net recNet)>(); var threadLocalNets = new ThreadLocal<(Net detNet, Net recNet)>(() => { var det = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights); var rec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); det.SetPreferableBackend(Backend.OPENCV); det.SetPreferableTarget(Target.CPU); rec.SetPreferableBackend(Backend.OPENCV); rec.SetPreferableTarget(Target.CPU); netsBag.Add((det, rec)); return (det, rec); }); var total = files.Length; var processed = 0; var sw = System.Diagnostics.Stopwatch.StartNew(); await Task.Run(() => { try { Parallel.ForEach(files, new ParallelOptions { MaxDegreeOfParallelism = dop, CancellationToken = cancellationToken }, f => { // Parallel will handle cancellation via the provided token; avoid rethrowing OperationCanceledException from workers cancellationToken.ThrowIfCancellationRequested(); var filename = Path.GetFileName(f); if (skipTextNegative && filename.StartsWith("tn_", StringComparison.OrdinalIgnoreCase)) return; try { var nets = threadLocalNets.Value; using var image = Cv2.ImRead(f); var regions = DetectTextRegions(nets.detNet, image).ToArray(); var texts = new List(); foreach (var r in regions) { using var crop = new Mat(image, r.BoundingBox); var txt = RecognizeDigits(crop, nets.recNet); if (!string.IsNullOrEmpty(txt)) texts.Add(txt); } var imgRes = new ImageResult(filename, string.Join(",", texts), f); bag.Add(imgRes); resultProgress?.Report(imgRes); } catch { // swallow per-file errors and report empty result bag.Add(new ImageResult(filename, string.Empty, f)); } finally { var proc = Interlocked.Increment(ref processed); if (progress != null) { var elapsed = Math.Max(1, sw.ElapsedMilliseconds); var ips = proc * 1000.0 / elapsed; progress.Report(new ProcessingStats(total, proc, ips)); } } }); } catch (OperationCanceledException) { // Cancellation requested — exit gracefully and return partial results } }, cancellationToken).ConfigureAwait(false); // dispose created nets while (netsBag.TryTake(out var pair)) { try { pair.detNet.Dispose(); } catch { } try { pair.recNet.Dispose(); } catch { } } threadLocalNets.Dispose(); return bag.OrderBy(b => b.FileName).ToList(); } // Overload RecognizeDigits that accepts a Net for worker threads private string RecognizeDigits(Mat croppedImage, Net recognitionNet) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); using var blob = CvDnn.BlobFromImage(croppedImage, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false); recognitionNet.SetInput(blob); var outNames = GetOutputLayerNames(recognitionNet); var outsList = new List(); recognitionNet.Forward(outsList, outNames); Mat[] outs = outsList.ToArray(); var boxes = new List(); var confidences = new List(); var classIds = new List(); var centerXList = new List(); int imgW = croppedImage.Width; int imgH = croppedImage.Height; foreach (var outMat in outs) { for (int i = 0; i < outMat.Rows; i++) { float cx = outMat.At(i, 0) * imgW; float cy = outMat.At(i, 1) * imgH; float w = outMat.At(i, 2) * imgW; float h = outMat.At(i, 3) * imgH; float maxScore = 0f; int bestClass = -1; for (int c = 5; c < outMat.Cols; c++) { float score = outMat.At(i, c); if (score > maxScore) { maxScore = score; bestClass = c - 5; } } if (maxScore > _cfg.ConfidenceThreshold) { int x = (int)Math.Max(0, Math.Round(cx - w / 2)); int y = (int)Math.Max(0, Math.Round(cy - h / 2)); boxes.Add(new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h))); confidences.Add(maxScore); classIds.Add(bestClass); centerXList.Add(cx); } } } if (classIds.Count == 0) return string.Empty; CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] keep); var ordered = keep.Select(i => new { Idx = i, Cx = centerXList[i], ClassId = classIds[i] }) .OrderBy(x => x.Cx) .Select(x => _cfg.NumberClasses[x.ClassId]); return string.Concat(ordered); } } }