Initial .NET scaffold: Core, Console, WPF projects
Introduced solution structure for AIFotoONLUS migration to .NET. Added Core library with YOLO-based detection/recognition engine using OpenCvSharp, Console batch runner, and WPF demo frontend with MVVM. Implemented model loading, directory processing, progress reporting, and preferences. Added README with build/run instructions.
This commit is contained in:
parent
314761bf9e
commit
769afc08fb
18 changed files with 976 additions and 0 deletions
369
src/AIFotoONLUS.Core/NumberRecognitionEngine.cs
Normal file
369
src/AIFotoONLUS.Core/NumberRecognitionEngine.cs
Normal file
|
|
@ -0,0 +1,369 @@
|
|||
using OpenCvSharp;
|
||||
using OpenCvSharp.Dnn;
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace AIFotoONLUS.Core
|
||||
{
|
||||
/// <summary>
|
||||
/// NumberRecognitionEngine: loads Darknet models via OpenCvSharp and
|
||||
/// provides methods to detect text regions and recognize digits.
|
||||
/// </summary>
|
||||
public class NumberRecognitionEngine : IDisposable
|
||||
{
|
||||
private readonly Net _detectionNet;
|
||||
private readonly Net _recognitionNet;
|
||||
private readonly ModelConfiguration _cfg;
|
||||
private bool _disposed;
|
||||
|
||||
public NumberRecognitionEngine(ModelConfiguration cfg)
|
||||
{
|
||||
_cfg = cfg ?? throw new ArgumentNullException(nameof(cfg));
|
||||
|
||||
if (!File.Exists(_cfg.DetectionCfg) || !File.Exists(_cfg.DetectionWeights))
|
||||
throw new FileNotFoundException("Detection model files not found.", _cfg.DetectionCfg);
|
||||
if (!File.Exists(_cfg.RecognitionCfg) || !File.Exists(_cfg.RecognitionWeights))
|
||||
throw new FileNotFoundException("Recognition model files not found.", _cfg.RecognitionCfg);
|
||||
|
||||
_detectionNet = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights);
|
||||
_recognitionNet = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
|
||||
|
||||
_detectionNet.SetPreferableBackend(Backend.OPENCV);
|
||||
_detectionNet.SetPreferableTarget(Target.CPU);
|
||||
_recognitionNet.SetPreferableBackend(Backend.OPENCV);
|
||||
_recognitionNet.SetPreferableTarget(Target.CPU);
|
||||
// Let OpenCV use multiple threads internally (use number of logical processors)
|
||||
try
|
||||
{
|
||||
Cv2.SetNumThreads(Environment.ProcessorCount);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Ignore if not supported by OpenCvSharp build
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_detectionNet?.Dispose();
|
||||
_recognitionNet?.Dispose();
|
||||
_disposed = true;
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
private string[] GetOutputLayerNames(Net net) => net.GetUnconnectedOutLayersNames();
|
||||
|
||||
public IEnumerable<DetectedRegion> DetectTextRegions(Mat image)
|
||||
{
|
||||
if (image is null) throw new ArgumentNullException(nameof(image));
|
||||
|
||||
return DetectTextRegions(_detectionNet, image);
|
||||
}
|
||||
|
||||
// Internal variant that accepts a Net instance so it can be used from parallel workers
|
||||
private IEnumerable<DetectedRegion> DetectTextRegions(Net detectionNet, Mat image)
|
||||
{
|
||||
using var blob = CvDnn.BlobFromImage(image, 0.00392, _cfg.DetectionInputSize, new Scalar(0, 0, 0), true, false);
|
||||
detectionNet.SetInput(blob);
|
||||
|
||||
var outNames = GetOutputLayerNames(detectionNet);
|
||||
var outsList = new List<Mat>();
|
||||
detectionNet.Forward(outsList, outNames);
|
||||
Mat[] outs = outsList.ToArray();
|
||||
|
||||
var boxes = new List<Rect>();
|
||||
var confidences = new List<float>();
|
||||
var classIds = new List<int>();
|
||||
var centerXList = new List<double>();
|
||||
|
||||
int imgW = image.Width;
|
||||
int imgH = image.Height;
|
||||
foreach (var outMat in outs)
|
||||
{
|
||||
for (int i = 0; i < outMat.Rows; i++)
|
||||
{
|
||||
float cx = outMat.At<float>(i, 0) * imgW;
|
||||
float cy = outMat.At<float>(i, 1) * imgH;
|
||||
float w = outMat.At<float>(i, 2) * imgW;
|
||||
float h = outMat.At<float>(i, 3) * imgH;
|
||||
|
||||
float maxScore = 0f;
|
||||
int bestClass = -1;
|
||||
for (int c = 5; c < outMat.Cols; c++)
|
||||
{
|
||||
float score = outMat.At<float>(i, c);
|
||||
if (score > maxScore)
|
||||
{
|
||||
maxScore = score;
|
||||
bestClass = c - 5;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxScore > _cfg.ConfidenceThreshold)
|
||||
{
|
||||
int x = (int)Math.Max(0, Math.Round(cx - w / 2));
|
||||
int y = (int)Math.Max(0, Math.Round(cy - h / 2));
|
||||
var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h));
|
||||
boxes.Add(rect);
|
||||
confidences.Add(maxScore);
|
||||
classIds.Add(bestClass);
|
||||
centerXList.Add(cx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (boxes.Count == 0) return Enumerable.Empty<DetectedRegion>();
|
||||
|
||||
CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] indices);
|
||||
|
||||
var results = new List<DetectedRegion>();
|
||||
foreach (var idx in indices)
|
||||
{
|
||||
var b = boxes[idx];
|
||||
double centerX = b.X + b.Width / 2.0;
|
||||
results.Add(new DetectedRegion(b, confidences[idx], classIds[idx], centerX));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
public string RecognizeDigits(Mat croppedImage)
|
||||
{
|
||||
if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage));
|
||||
|
||||
using var blob = CvDnn.BlobFromImage(croppedImage, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false);
|
||||
_recognitionNet.SetInput(blob);
|
||||
|
||||
var outNames = GetOutputLayerNames(_recognitionNet);
|
||||
var outsList = new List<Mat>();
|
||||
_recognitionNet.Forward(outsList, outNames);
|
||||
Mat[] outs = outsList.ToArray();
|
||||
|
||||
var boxes = new List<Rect>();
|
||||
var confidences = new List<float>();
|
||||
var classIds = new List<int>();
|
||||
var centerXList = new List<double>();
|
||||
int imgW = croppedImage.Width;
|
||||
int imgH = croppedImage.Height;
|
||||
|
||||
foreach (var outMat in outs)
|
||||
{
|
||||
for (int i = 0; i < outMat.Rows; i++)
|
||||
{
|
||||
float cx = outMat.At<float>(i, 0) * imgW;
|
||||
float cy = outMat.At<float>(i, 1) * imgH;
|
||||
float w = outMat.At<float>(i, 2) * imgW;
|
||||
float h = outMat.At<float>(i, 3) * imgH;
|
||||
float maxScore = 0f;
|
||||
int bestClass = -1;
|
||||
for (int c = 5; c < outMat.Cols; c++)
|
||||
{
|
||||
float score = outMat.At<float>(i, c);
|
||||
if (score > maxScore)
|
||||
{
|
||||
maxScore = score;
|
||||
bestClass = c - 5;
|
||||
}
|
||||
}
|
||||
if (maxScore > _cfg.ConfidenceThreshold)
|
||||
{
|
||||
int x = (int)Math.Max(0, Math.Round(cx - w / 2));
|
||||
int y = (int)Math.Max(0, Math.Round(cy - h / 2));
|
||||
boxes.Add(new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h)));
|
||||
confidences.Add(maxScore);
|
||||
classIds.Add(bestClass);
|
||||
centerXList.Add(cx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (classIds.Count == 0) return string.Empty;
|
||||
|
||||
CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] keep);
|
||||
var ordered = keep.Select(i => new { Idx = i, Cx = centerXList[i], ClassId = classIds[i] })
|
||||
.OrderBy(x => x.Cx)
|
||||
.Select(x => _cfg.NumberClasses[x.ClassId]);
|
||||
return string.Concat(ordered);
|
||||
}
|
||||
|
||||
public ImageResult ProcessImage(string filePath)
|
||||
{
|
||||
if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath);
|
||||
using var image = Cv2.ImRead(filePath);
|
||||
var regions = DetectTextRegions(image).ToArray();
|
||||
var texts = new List<string>();
|
||||
foreach (var r in regions)
|
||||
{
|
||||
using var crop = new Mat(image, r.BoundingBox);
|
||||
var txt = RecognizeDigits(crop);
|
||||
if (!string.IsNullOrEmpty(txt)) texts.Add(txt);
|
||||
}
|
||||
return new ImageResult(Path.GetFileName(filePath), string.Join(",", texts), filePath);
|
||||
}
|
||||
|
||||
public IEnumerable<ImageResult> ProcessDirectory(string directoryPath, bool skipTextNegative = false)
|
||||
{
|
||||
// Simple wrapper over async implementation
|
||||
return ProcessDirectoryAsync(directoryPath, skipTextNegative).GetAwaiter().GetResult();
|
||||
}
|
||||
|
||||
public async Task<IEnumerable<ImageResult>> ProcessDirectoryAsync(string directoryPath, bool skipTextNegative = false, bool recursive = false, IProgress<ProcessingStats>? progress = null, IProgress<ImageResult>? resultProgress = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!Directory.Exists(directoryPath)) throw new DirectoryNotFoundException(directoryPath);
|
||||
var searchOption = recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly;
|
||||
var files = Directory.EnumerateFiles(directoryPath, "*.*", searchOption)
|
||||
.Where(f => f.EndsWith(".jpg", StringComparison.OrdinalIgnoreCase) || f.EndsWith(".jpeg", StringComparison.OrdinalIgnoreCase))
|
||||
.ToArray();
|
||||
|
||||
var bag = new ConcurrentBag<ImageResult>();
|
||||
|
||||
var dop = Environment.ProcessorCount;
|
||||
|
||||
// Create a ThreadLocal pair of nets to avoid reloading for every file while still avoiding concurrent use of the same Net
|
||||
// Also keep a ConcurrentBag of created nets so we can dispose them safely from this thread
|
||||
var netsBag = new ConcurrentBag<(Net detNet, Net recNet)>();
|
||||
var threadLocalNets = new ThreadLocal<(Net detNet, Net recNet)>(() =>
|
||||
{
|
||||
var det = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights);
|
||||
var rec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
|
||||
det.SetPreferableBackend(Backend.OPENCV);
|
||||
det.SetPreferableTarget(Target.CPU);
|
||||
rec.SetPreferableBackend(Backend.OPENCV);
|
||||
rec.SetPreferableTarget(Target.CPU);
|
||||
netsBag.Add((det, rec));
|
||||
return (det, rec);
|
||||
});
|
||||
|
||||
var total = files.Length;
|
||||
var processed = 0;
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
|
||||
await Task.Run(() =>
|
||||
{
|
||||
try
|
||||
{
|
||||
Parallel.ForEach(files, new ParallelOptions { MaxDegreeOfParallelism = dop, CancellationToken = cancellationToken }, f =>
|
||||
{
|
||||
// Parallel will handle cancellation via the provided token; avoid rethrowing OperationCanceledException from workers
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
var filename = Path.GetFileName(f);
|
||||
if (skipTextNegative && filename.StartsWith("tn_", StringComparison.OrdinalIgnoreCase))
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
var nets = threadLocalNets.Value;
|
||||
using var image = Cv2.ImRead(f);
|
||||
|
||||
var regions = DetectTextRegions(nets.detNet, image).ToArray();
|
||||
var texts = new List<string>();
|
||||
foreach (var r in regions)
|
||||
{
|
||||
using var crop = new Mat(image, r.BoundingBox);
|
||||
var txt = RecognizeDigits(crop, nets.recNet);
|
||||
if (!string.IsNullOrEmpty(txt)) texts.Add(txt);
|
||||
}
|
||||
var imgRes = new ImageResult(filename, string.Join(",", texts), f);
|
||||
bag.Add(imgRes);
|
||||
resultProgress?.Report(imgRes);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// swallow per-file errors and report empty result
|
||||
bag.Add(new ImageResult(filename, string.Empty, f));
|
||||
}
|
||||
finally
|
||||
{
|
||||
var proc = Interlocked.Increment(ref processed);
|
||||
if (progress != null)
|
||||
{
|
||||
var elapsed = Math.Max(1, sw.ElapsedMilliseconds);
|
||||
var ips = proc * 1000.0 / elapsed;
|
||||
progress.Report(new ProcessingStats(total, proc, ips));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Cancellation requested — exit gracefully and return partial results
|
||||
}
|
||||
}, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// dispose created nets
|
||||
while (netsBag.TryTake(out var pair))
|
||||
{
|
||||
try { pair.detNet.Dispose(); } catch { }
|
||||
try { pair.recNet.Dispose(); } catch { }
|
||||
}
|
||||
threadLocalNets.Dispose();
|
||||
|
||||
return bag.OrderBy(b => b.FileName).ToList();
|
||||
}
|
||||
|
||||
// Overload RecognizeDigits that accepts a Net for worker threads
|
||||
private string RecognizeDigits(Mat croppedImage, Net recognitionNet)
|
||||
{
|
||||
if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage));
|
||||
|
||||
using var blob = CvDnn.BlobFromImage(croppedImage, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false);
|
||||
recognitionNet.SetInput(blob);
|
||||
|
||||
var outNames = GetOutputLayerNames(recognitionNet);
|
||||
var outsList = new List<Mat>();
|
||||
recognitionNet.Forward(outsList, outNames);
|
||||
Mat[] outs = outsList.ToArray();
|
||||
|
||||
var boxes = new List<Rect>();
|
||||
var confidences = new List<float>();
|
||||
var classIds = new List<int>();
|
||||
var centerXList = new List<double>();
|
||||
int imgW = croppedImage.Width;
|
||||
int imgH = croppedImage.Height;
|
||||
|
||||
foreach (var outMat in outs)
|
||||
{
|
||||
for (int i = 0; i < outMat.Rows; i++)
|
||||
{
|
||||
float cx = outMat.At<float>(i, 0) * imgW;
|
||||
float cy = outMat.At<float>(i, 1) * imgH;
|
||||
float w = outMat.At<float>(i, 2) * imgW;
|
||||
float h = outMat.At<float>(i, 3) * imgH;
|
||||
float maxScore = 0f;
|
||||
int bestClass = -1;
|
||||
for (int c = 5; c < outMat.Cols; c++)
|
||||
{
|
||||
float score = outMat.At<float>(i, c);
|
||||
if (score > maxScore)
|
||||
{
|
||||
maxScore = score;
|
||||
bestClass = c - 5;
|
||||
}
|
||||
}
|
||||
if (maxScore > _cfg.ConfidenceThreshold)
|
||||
{
|
||||
int x = (int)Math.Max(0, Math.Round(cx - w / 2));
|
||||
int y = (int)Math.Max(0, Math.Round(cy - h / 2));
|
||||
boxes.Add(new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h)));
|
||||
confidences.Add(maxScore);
|
||||
classIds.Add(bestClass);
|
||||
centerXList.Add(cx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (classIds.Count == 0) return string.Empty;
|
||||
|
||||
CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] keep);
|
||||
var ordered = keep.Select(i => new { Idx = i, Cx = centerXList[i], ClassId = classIds[i] })
|
||||
.OrderBy(x => x.Cx)
|
||||
.Select(x => _cfg.NumberClasses[x.ClassId]);
|
||||
return string.Concat(ordered);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue