AIFotoOnlus/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs

740 lines
34 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using OpenCvSharp;
using OpenCvSharp.Dnn;
using System;
using System.Diagnostics;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
namespace AIFotoONLUS.Core
{
/// <summary>
/// NumberRecognitionEngine is a high-level wrapper that loads Darknet (YOLO)
/// models through OpenCvSharp's DNN API and exposes simple synchronous and
/// asynchronous methods to detect numeric text regions in images and recognize
/// the digits contained within those regions.
///
/// Overview
/// - Loads two Darknet networks: a detection network (finds text regions)
/// and a recognition network (recognizes digits inside a cropped region).
/// - Uses OpenCvSharp (CvDnn) to create input blobs, run forward passes and
/// perform nonmaximum suppression (NMS) on detection candidates.
/// - Provides single-image and directory-level processing APIs. Directory
/// processing supports parallel workers where each worker uses its own
/// per-thread Net instances to allow concurrent forward calls.
///
/// Threading and performance notes
/// - The class constructs and owns two shared Net instances used by the
/// simple (single-threaded) APIs. When doing parallel processing the
/// implementation creates per-thread Net instances to avoid concurrent
/// calls into the same Net object. A small fallback path exists that will
/// call into the shared nets under a lock when needed.
/// - OpenCV internal threading is enabled (Cv2.SetNumThreads) when supported.
///
/// Diagnostics
/// - When enabled via the configuration, crops may be saved to disk for
/// debugging. The <see cref="ModelConfiguration"/> contains thresholds and
/// paths used by the engine.
/// </summary>
using Microsoft.Extensions.Logging;
public class NumberRecognitionEngine : IDisposable
{
private readonly Net _detectionNet;
private readonly Net _recognitionNet;
private readonly object _detectionLock = new();
private readonly object _recognitionLock = new();
private readonly ModelConfiguration _cfg;
private readonly ILogger? _logger;
private bool _disposed;
/// <summary>
/// Create a new instance of <see cref="NumberRecognitionEngine"/> using the
/// provided <see cref="ModelConfiguration"/>. The constructor loads the
/// detection and recognition Darknet model files and prepares the OpenCV
/// DNN nets for CPU inference.
/// </summary>
/// <param name="cfg">Model configuration containing file paths, thresholds
/// and other options. Must not be <c>null</c>.</param>
/// <remarks>
/// This constructor will throw <see cref="FileNotFoundException"/> when
/// any of the expected model files are missing. For logging purposes an
/// overload accepting an <see cref="ILogger"/> is available.
/// </remarks>
public NumberRecognitionEngine(ModelConfiguration cfg)
: this(cfg, logger: null)
{
}
/// <summary>
/// Create a new instance of <see cref="NumberRecognitionEngine"/> with an
/// optional <see cref="ILogger"/>. The logger will receive diagnostic
/// messages and errors produced by the engine during processing.
/// </summary>
/// <param name="cfg">Model configuration containing file paths and
/// runtime thresholds.</param>
/// <param name="logger">Optional logger for diagnostic messages.
/// May be <c>null</c>.</param>
/// <exception cref="ArgumentNullException">Thrown when <paramref name="cfg"/>
/// is <c>null</c>.</exception>
/// <exception cref="FileNotFoundException">Thrown when one of the model
/// files referenced by <paramref name="cfg"/> does not exist.</exception>
public NumberRecognitionEngine(ModelConfiguration cfg, ILogger? logger)
{
_logger = logger;
_cfg = cfg ?? throw new ArgumentNullException(nameof(cfg));
if (!File.Exists(_cfg.DetectionCfg) || !File.Exists(_cfg.DetectionWeights))
throw new FileNotFoundException("Detection model files not found.", _cfg.DetectionCfg);
if (!File.Exists(_cfg.RecognitionCfg) || !File.Exists(_cfg.RecognitionWeights))
throw new FileNotFoundException("Recognition model files not found.", _cfg.RecognitionCfg);
_detectionNet = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights);
_recognitionNet = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
ConfigureNetRuntime(_detectionNet, _cfg.UseGpu);
ConfigureNetRuntime(_recognitionNet, _cfg.UseGpu);
// Let OpenCV use multiple threads internally (use number of logical processors)
try
{
Cv2.SetNumThreads(Environment.ProcessorCount);
}
catch
{
// Ignore if not supported by OpenCvSharp build
}
}
public void Dispose()
{
if (_disposed) return;
_detectionNet?.Dispose();
_recognitionNet?.Dispose();
_disposed = true;
GC.SuppressFinalize(this);
}
private static string SanitizeFileName(string name)
{
foreach (var c in Path.GetInvalidFileNameChars()) name = name.Replace(c, '_');
return name;
}
private string[] GetOutputLayerNames(Net net) => net.GetUnconnectedOutLayersNames();
private static void ConfigureNetRuntime(Net net, bool useGpu)
{
if (useGpu)
{
net.SetPreferableBackend(Backend.CUDA);
net.SetPreferableTarget(Target.CUDA);
return;
}
net.SetPreferableBackend(Backend.OPENCV);
net.SetPreferableTarget(Target.CPU);
}
/// <summary>
/// Detect text regions in the supplied image using the detection network.
/// </summary>
/// <param name="image">Input image as an OpenCvSharp <see cref="Mat"/>.
/// Must not be <c>null</c>.</param>
/// <returns>An enumerable of <see cref="DetectedRegion"/> containing the
/// bounding boxes, confidence and class information for each detected
/// region. The results are already filtered with the configured
/// confidence and NMS thresholds.</returns>
public IEnumerable<DetectedRegion> DetectTextRegions(Mat image)
{
if (image is null) throw new ArgumentNullException(nameof(image));
return DetectTextRegions(_detectionNet, image);
}
// Internal variant that accepts a Net instance so it can be used from parallel workers
private IEnumerable<DetectedRegion> DetectTextRegions(Net detectionNet, Mat image)
{
using var blob = CvDnn.BlobFromImage(image, 0.00392, _cfg.DetectionInputSize, new Scalar(0, 0, 0), true, false);
detectionNet.SetInput(blob);
var outNames = GetOutputLayerNames(detectionNet);
var outsList = new List<Mat>();
detectionNet.Forward(outsList, outNames);
Mat[] outs = outsList.ToArray();
if (outs.Length == 0)
{
// Try per-output Forward calls as a fallback; use their results for detection
if (outNames != null)
{
var fallback = new List<Mat>();
for (int on = 0; on < outNames.Length; on++)
{
try
{
var single = detectionNet.Forward(outNames[on]);
fallback.Add(single);
}
catch (Exception ex)
{
_logger?.LogError(ex, "Fallback Forward failed for {name}", outNames[on]);
}
}
if (fallback.Count > 0)
{
outs = fallback.ToArray();
}
}
}
// Diagnostic: dump outs shapes and a sample of values to help debugging
try
{
// diagnostic dumping removed for performance; keep errors only
}
catch (Exception ex)
{
_logger?.LogError(ex, "Error dumping outs");
}
var boxes = new List<Rect>();
var confidences = new List<float>();
var classIds = new List<int>();
var centerXList = new List<double>();
int imgW = image.Width;
int imgH = image.Height;
foreach (var outMat in outs)
{
for (int i = 0; i < outMat.Rows; i++)
{
float cx = outMat.At<float>(i, 0) * imgW;
float cy = outMat.At<float>(i, 1) * imgH;
float w = outMat.At<float>(i, 2) * imgW;
float h = outMat.At<float>(i, 3) * imgH;
// YOLO output layout: [cx, cy, w, h, objectness, class1, class2, ...]
float objectness = outMat.At<float>(i, 4);
float maxScore = 0f;
int bestClass = -1;
for (int c = 5; c < outMat.Cols; c++)
{
float classProb = outMat.At<float>(i, c);
float score = objectness * classProb; // combine objectness and class probability
if (score > maxScore)
{
maxScore = score;
bestClass = c - 5;
}
}
if (maxScore > _cfg.ConfidenceThreshold)
{
int x = (int)Math.Max(0, Math.Round(cx - w / 2));
int y = (int)Math.Max(0, Math.Round(cy - h / 2));
var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h));
boxes.Add(rect);
confidences.Add(maxScore);
classIds.Add(bestClass);
centerXList.Add(cx);
}
}
}
if (boxes.Count == 0) return Enumerable.Empty<DetectedRegion>();
CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] indices);
var results = new List<DetectedRegion>();
foreach (var idx in indices)
{
var b = boxes[idx];
double centerX = b.X + b.Width / 2.0;
results.Add(new DetectedRegion(b, confidences[idx], classIds[idx], centerX));
}
return results;
}
/// <summary>
/// Recognize digits inside a cropped image region using the recognition
/// network. The method runs the recognition network and returns the
/// concatenated sequence of recognized digit labels ordered left-to-right.
/// </summary>
/// <param name="croppedImage">Cropped image containing digits as
/// <see cref="Mat"/>. Must not be <c>null</c>.</param>
/// <param name="context">Optional context string used for diagnostics
/// (e.g. when saving crop image files).</param>
/// <returns>A string containing recognized digits in left-to-right order.
/// Returns an empty string when no digits are recognized above the
/// configured confidence threshold.</returns>
public string RecognizeDigits(Mat croppedImage, string? context = null)
{
if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage));
// Optionally save crop image for diagnostics when enabled in configuration
if (_cfg.EnableCropSaving)
{
try
{
var cropsDir = Path.Combine("logs", "crops");
Directory.CreateDirectory(cropsDir);
var fname = $"{(string.IsNullOrEmpty(context) ? "crop" : SanitizeFileName(context))}_{DateTime.UtcNow:yyyyMMdd_HHmmss_fff}_{Guid.NewGuid():N}.jpg";
var full = Path.Combine(cropsDir, fname);
Cv2.ImWrite(full, croppedImage);
}
catch (Exception ex)
{
_logger?.LogError(ex, "Failed saving crop for diagnostics");
}
}
using var blob = CvDnn.BlobFromImage(croppedImage, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false);
_recognitionNet.SetInput(blob);
var outNames = GetOutputLayerNames(_recognitionNet);
var outsList = new List<Mat>();
_recognitionNet.Forward(outsList, outNames);
Mat[] outs = outsList.ToArray();
// Fallback: try per-output Forward if no mats were returned
if (outs.Length == 0 && outNames != null)
{
var fallback = new List<Mat>();
foreach (var n in outNames)
{
try
{
var m = _recognitionNet.Forward(n);
fallback.Add(m);
}
catch (Exception ex)
{
_logger?.LogError(ex, "Recognition fallback forward failed for {name}", n);
}
}
if (fallback.Count > 0) outs = fallback.ToArray();
}
var boxes = new List<Rect>();
var confidences = new List<float>();
var classIds = new List<int>();
var centerXList = new List<double>();
int imgW = croppedImage.Width;
int imgH = croppedImage.Height;
foreach (var outMat in outs)
{
for (int i = 0; i < outMat.Rows; i++)
{
float cx = outMat.At<float>(i, 0) * imgW;
float cy = outMat.At<float>(i, 1) * imgH;
float w = outMat.At<float>(i, 2) * imgW;
float h = outMat.At<float>(i, 3) * imgH;
float objectness = outMat.At<float>(i, 4);
float maxScore = 0f;
int bestClass = -1;
for (int c = 5; c < outMat.Cols; c++)
{
float classProb = outMat.At<float>(i, c);
float score = objectness * classProb;
if (score > maxScore)
{
maxScore = score;
bestClass = c - 5;
}
}
if (maxScore > _cfg.ConfidenceThreshold)
{
int x = (int)Math.Max(0, Math.Round(cx - w / 2));
int y = (int)Math.Max(0, Math.Round(cy - h / 2));
boxes.Add(new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h)));
confidences.Add(maxScore);
classIds.Add(bestClass);
centerXList.Add(cx);
}
}
}
if (classIds.Count == 0) return string.Empty;
CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] keep);
var ordered = keep.Select(i => new { Idx = i, Cx = centerXList[i], ClassId = classIds[i] })
.OrderBy(x => x.Cx)
.Select(x => _cfg.NumberClasses[x.ClassId]);
return string.Concat(ordered);
}
/// <summary>
/// Small DTO that describes the name and shape of a detection network
/// forward output used for diagnostics.
/// </summary>
/// <param name="Name">Layer/output name.</param>
/// <param name="Rows">Number of rows in the output Mat.</param>
/// <param name="Cols">Number of columns in the output Mat.</param>
public record DetectionOutput(string Name, int Rows, int Cols);
/// <summary>
/// Result returned by <see cref="ProcessFileWithDiagnostics"/>, contains
/// the recognized text result and an array describing detection network
/// forward outputs (shapes and names) which are useful for debugging
/// model output layout mismatches.
/// </summary>
/// <param name="Result">Recognition result for the processed image.</param>
/// <param name="DetectionOutputs">Array describing detection net outputs.</param>
public record DiagnosticResult(ImageResult Result, DetectionOutput[] DetectionOutputs);
/// <summary>
/// Process a single image file and return the recognition result together
/// with detection network forward output shapes for diagnostics. This
/// method reads the image from disk, runs a forward pass over the
/// detection network to capture the raw output Mat shapes and then calls
/// the normal processing pipeline to return the recognized text.
/// </summary>
public DiagnosticResult ProcessFileWithDiagnostics(string filePath)
{
if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath);
using var image = Cv2.ImRead(filePath);
// prepare input blob for detection net
using var blob = CvDnn.BlobFromImage(image, 0.00392, _cfg.DetectionInputSize, new Scalar(0, 0, 0), true, false);
_detectionNet.SetInput(blob);
var outNames = GetOutputLayerNames(_detectionNet);
var outsList = new List<Mat>();
_detectionNet.Forward(outsList, outNames);
// fallback: if no mats produced, try per-name Forward
if (outsList.Count == 0 && outNames != null)
{
foreach (var n in outNames)
{
try
{
var m = _detectionNet.Forward(n);
outsList.Add(m);
}
catch { }
}
}
var outputs = outsList.Select((m, i) => new DetectionOutput(outNames != null && i < outNames.Length ? outNames[i] : $"out{i}", m.Rows, m.Cols)).ToArray();
// run the normal processing to get recognized text
var imgRes = ProcessImage(filePath);
return new DiagnosticResult(imgRes, outputs);
}
/// <summary>
/// Process a single image file and return the recognized text as an
/// <see cref="ImageResult"/>. The method detects candidate text regions
/// and runs recognition on each crop. Multiple recognized digit sequences
/// are joined with a comma in the returned <see cref="ImageResult.Text"/>.
/// </summary>
/// <param name="filePath">Path to an image file on disk. Supported
/// formats depend on OpenCV (typically JPEG, PNG, ...).</param>
/// <returns>An <see cref="ImageResult"/> containing the file name and
/// recognized text (possibly empty).</returns>
public ImageResult ProcessImage(string filePath)
{
if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath);
using var image = Cv2.ImRead(filePath);
var regions = DetectTextRegions(image).ToArray();
var texts = new List<string>();
foreach (var r in regions)
{
using var crop = new Mat(image, r.BoundingBox);
var ctx = $"{Path.GetFileName(filePath)}_{r.BoundingBox.X}_{r.BoundingBox.Y}_{r.BoundingBox.Width}x{r.BoundingBox.Height}";
var txt = RecognizeDigits(crop, ctx);
if (!string.IsNullOrEmpty(txt)) texts.Add(txt);
}
var result = new ImageResult(Path.GetFileName(filePath), string.Join(",", texts), filePath);
if (!string.IsNullOrEmpty(result.Text))
_logger?.LogInformation("Processed image {file} -> {text}", result.FileName, result.Text);
else
_logger?.LogDebug("Processed image {file} -> (no text)", result.FileName);
return result;
}
/// <summary>
/// Process all JPEG images in a directory and return the recognition
/// results. This is a blocking wrapper over <see cref="ProcessDirectoryAsync"/>.
/// </summary>
/// <param name="directoryPath">Path to a directory containing images.</param>
/// <param name="skipTextNegative">If true, files whose names start with
/// "tn_" will be skipped (convention used to mark text-negative images).</param>
/// <returns>Collection of <see cref="ImageResult"/> ordered by file name.</returns>
public IEnumerable<ImageResult> ProcessDirectory(string directoryPath, bool skipTextNegative = false)
{
// Simple wrapper over async implementation
return ProcessDirectoryAsync(directoryPath, skipTextNegative).GetAwaiter().GetResult();
}
public async Task<IEnumerable<ImageResult>> ProcessDirectoryAsync(string directoryPath, bool skipTextNegative = false, bool recursive = false, IProgress<ProcessingStats>? progress = null, IProgress<ImageResult>? resultProgress = null, CancellationToken cancellationToken = default)
{
if (!Directory.Exists(directoryPath)) throw new DirectoryNotFoundException(directoryPath);
var searchOption = recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly;
var files = Directory.EnumerateFiles(directoryPath, "*.*", searchOption)
.Where(f => f.EndsWith(".jpg", StringComparison.OrdinalIgnoreCase) || f.EndsWith(".jpeg", StringComparison.OrdinalIgnoreCase))
.ToArray();
var bag = new ConcurrentBag<ImageResult>();
var dop = Environment.ProcessorCount;
var total = files.Length;
var processed = 0;
var sw = System.Diagnostics.Stopwatch.StartNew();
// Per-thread nets (each worker gets its own pair) to allow parallel forward calls
var netsBag = new ConcurrentBag<(Net detNet, Net recNet)>();
var threadLocalNets = new ThreadLocal<(Net detNet, Net recNet)>(() =>
{
var det = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights);
var rec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
ConfigureNetRuntime(det, _cfg.UseGpu);
ConfigureNetRuntime(rec, _cfg.UseGpu);
netsBag.Add((det, rec));
return (det, rec);
});
await Task.Run(() =>
{
try
{
Parallel.ForEach(files, new ParallelOptions { MaxDegreeOfParallelism = dop, CancellationToken = cancellationToken }, f =>
{
cancellationToken.ThrowIfCancellationRequested();
var filename = Path.GetFileName(f);
if (skipTextNegative && filename.StartsWith("tn_", StringComparison.OrdinalIgnoreCase))
return;
try
{
var nets = threadLocalNets.Value;
using var image = Cv2.ImRead(f);
var regions = DetectTextRegions(nets.detNet, image).ToArray();
var texts = new List<string>();
// minimal logging for performance
foreach (var r in regions)
{
using var crop = new Mat(image, r.BoundingBox);
var ctx = $"{filename}_{r.BoundingBox.X}_{r.BoundingBox.Y}_{r.BoundingBox.Width}x{r.BoundingBox.Height}";
var txt = RecognizeDigits(crop, nets.recNet, ctx);
// minimal logging for performance
// Fallback: if empty, try a fresh net (diagnostic)
if (string.IsNullOrEmpty(txt))
{
try
{
using var tempRec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
ConfigureNetRuntime(tempRec, _cfg.UseGpu);
var alt = RecognizeDigits(crop, tempRec, ctx);
if (!string.IsNullOrEmpty(alt)) txt = alt;
}
catch { }
}
if (!string.IsNullOrEmpty(txt)) texts.Add(txt);
}
// If no text was recognized with per-thread nets, try one more time using the shared nets under a lock
if (texts.Count == 0)
{
try
{
DetectedRegion[] sharedRegions;
lock (_detectionLock)
{
sharedRegions = DetectTextRegions(image).ToArray();
}
var sharedTexts = new List<string>();
foreach (var r2 in sharedRegions)
{
using var crop2 = new Mat(image, r2.BoundingBox);
var ctx2 = $"{filename}_{r2.BoundingBox.X}_{r2.BoundingBox.Y}_{r2.BoundingBox.Width}x{r2.BoundingBox.Height}";
string txt2;
lock (_recognitionLock)
{
txt2 = RecognizeDigits(crop2, ctx2);
}
if (!string.IsNullOrEmpty(txt2))
{
sharedTexts.Add(txt2);
}
}
if (sharedTexts.Count > 0)
{
texts = sharedTexts;
}
}
catch
{
// ignore fallback errors
}
}
var imgRes = new ImageResult(filename, string.Join(",", texts), f);
if (!string.IsNullOrEmpty(imgRes.Text))
_logger?.LogInformation("[{file}] Result: {text}", imgRes.FileName, imgRes.Text);
bag.Add(imgRes);
resultProgress?.Report(imgRes);
}
catch (Exception ex)
{
_logger?.LogError(ex, "Error processing image {file}", filename);
bag.Add(new ImageResult(filename, string.Empty, f));
}
finally
{
var proc = Interlocked.Increment(ref processed);
if (progress != null)
{
var elapsed = Math.Max(1, sw.ElapsedMilliseconds);
var ips = proc * 1000.0 / elapsed;
progress.Report(new ProcessingStats(total, proc, ips));
}
}
});
}
catch (OperationCanceledException)
{
// Cancellation requested — exit gracefully and return partial results
}
}, cancellationToken).ConfigureAwait(false);
// dispose created nets
while (netsBag.TryTake(out var pair))
{
try { pair.detNet.Dispose(); } catch { }
try { pair.recNet.Dispose(); } catch { }
}
threadLocalNets.Dispose();
return bag.OrderBy(b => b.FileName).ToList();
}
// Overload RecognizeDigits that accepts a Net for worker threads
/// <summary>
/// Worker overload of <see cref="RecognizeDigits(Mat,string?)"/> that
/// accepts a <see cref="Net"/> instance. This is used by the parallel
/// processing pipeline where each worker owns its own Net instance.
/// </summary>
/// <param name="croppedImage">Cropped region to recognize.</param>
/// <param name="recognitionNet">Recognition <see cref="Net"/> to execute
/// the forward pass with.</param>
/// <param name="context">Optional context string for diagnostics.</param>
/// <returns>Recognized digit sequence or empty string.</returns>
private string RecognizeDigits(Mat croppedImage, Net recognitionNet, string? context = null)
{
if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage));
// Optionally save crop image for diagnostics when enabled in configuration
if (_cfg.EnableCropSaving)
{
try
{
var cropsDir = Path.Combine("logs", "crops");
Directory.CreateDirectory(cropsDir);
var fname = $"{(string.IsNullOrEmpty(context) ? "crop" : SanitizeFileName(context))}_{DateTime.UtcNow:yyyyMMdd_HHmmss_fff}_{Guid.NewGuid():N}.jpg";
var full = Path.Combine(cropsDir, fname);
Cv2.ImWrite(full, croppedImage);
}
catch (Exception ex)
{
_logger?.LogError(ex, "Failed saving crop for diagnostics");
}
}
using var blob = CvDnn.BlobFromImage(croppedImage, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false);
recognitionNet.SetInput(blob);
var outNames = GetOutputLayerNames(recognitionNet);
var outsList = new List<Mat>();
recognitionNet.Forward(outsList, outNames);
Mat[] outs = outsList.ToArray();
var boxes = new List<Rect>();
var confidences = new List<float>();
var classIds = new List<int>();
var centerXList = new List<double>();
int imgW = croppedImage.Width;
int imgH = croppedImage.Height;
// Diagnostic: if no outs, try per-output Forward
if (outs.Length == 0 && outNames != null)
{
var fallback = new List<Mat>();
foreach (var n in outNames)
{
try
{
var m = recognitionNet.Forward(n);
fallback.Add(m);
}
catch (Exception ex)
{
_logger?.LogError(ex, "Recognition fallback forward failed for {name}", n);
}
}
if (fallback.Count > 0) outs = fallback.ToArray();
}
// Diagnostic: dump outs shapes and a sample of values to help debugging
try
{
// diagnostic dumping removed for performance; keep errors only
}
catch (Exception ex)
{
_logger?.LogError(ex, "Error dumping recognition outs");
}
foreach (var outMat in outs)
{
for (int i = 0; i < outMat.Rows; i++)
{
float cx = outMat.At<float>(i, 0) * imgW;
float cy = outMat.At<float>(i, 1) * imgH;
float w = outMat.At<float>(i, 2) * imgW;
float h = outMat.At<float>(i, 3) * imgH;
float objectness = outMat.At<float>(i, 4);
float maxScore = 0f;
int bestClass = -1;
for (int c = 5; c < outMat.Cols; c++)
{
float classProb = outMat.At<float>(i, c);
float score = objectness * classProb;
if (score > maxScore)
{
maxScore = score;
bestClass = c - 5;
}
}
if (maxScore > _cfg.ConfidenceThreshold)
{
int x = (int)Math.Max(0, Math.Round(cx - w / 2));
int y = (int)Math.Max(0, Math.Round(cy - h / 2));
boxes.Add(new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h)));
confidences.Add(maxScore);
classIds.Add(bestClass);
centerXList.Add(cx);
}
}
}
if (classIds.Count == 0) return string.Empty;
CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] keep);
var ordered = keep.Select(i => new { Idx = i, Cx = centerXList[i], ClassId = classIds[i] })
.OrderBy(x => x.Cx)
.Select(x => _cfg.NumberClasses[x.ClassId]);
return string.Concat(ordered);
}
}
}