From dc4ebdaf42f1b92a2f403a41ed2df6392791ac81 Mon Sep 17 00:00:00 2001 From: MaddoScientisto Date: Sun, 15 Feb 2026 23:37:08 +0100 Subject: [PATCH] Add full XML docs and NuGet IntelliSense support Comprehensive XML documentation added to all public types, methods, and properties in AIFotoONLUS.Core. Project updated to generate and pack XML docs for NuGet consumers. README rewritten for clarity. Improves developer experience with rich IntelliSense and API docs. --- README.md | 65 ++++ src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj | 13 + src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml | 325 ++++++++++++++++++ src/AIFotoONLUS.Core/DetectedRegion.cs | 22 ++ src/AIFotoONLUS.Core/ModelConfiguration.cs | 44 ++- .../NumberRecognitionEngine.cs | 126 ++++++- src/AIFotoONLUS.Core/ProcessingStats.cs | 6 + 7 files changed, 596 insertions(+), 5 deletions(-) create mode 100644 README.md create mode 100644 src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml diff --git a/README.md b/README.md new file mode 100644 index 0000000..0bbddec --- /dev/null +++ b/README.md @@ -0,0 +1,65 @@ +# AIFotoONLUS Number Recognition Library + +This library provides a small, focused engine to detect and recognize numeric +text (digits) in images using Darknet (YOLO) models via OpenCvSharp's DNN API. +It is suitable for batch processing folders of images or individual files. + +Features +- Detection network (Darknet/Yolo) to find candidate text regions. +- Recognition network (Darknet/Yolo) to identify digits inside detected crops. +- Single-file and directory-level processing APIs. +- Parallel processing with per-thread network instances for throughput. +- Diagnostic helpers to dump network output shapes and optionally save crop images. + +Basic usage +1. Create a `ModelConfiguration` instance that points to your Darknet `.cfg` +and `.weights` files for both detection and recognition networks, configure +confidence and NMS thresholds and provide a list of number class labels. + +2. Create an instance of `NumberRecognitionEngine`: + +```csharp +using var engine = new NumberRecognitionEngine(modelConfig, logger: null); +``` + +3. Process a single image: + +```csharp +var result = engine.ProcessImage("/path/to/image.jpg"); +Console.WriteLine(result.Text); +``` + +4. Process a directory (parallelized): + +```csharp +var results = await engine.ProcessDirectoryAsync("/path/to/images", recursive: false); +foreach (var r in results) Console.WriteLine($"{r.FileName}: {r.Text}"); +``` + +Configuration notes +- `ModelConfiguration` controls model file paths, input sizes, thresholds and + whether to save cropped images for diagnostics. Make sure the paths are + accessible to the process and the model files match the expected network + architectures. + +- The engine expects detection network outputs in the YOLO-style layout: + `[cx, cy, w, h, objectness, class1, class2, ...]`. + +Threading & diagnostics +- For directory/batch processing the engine creates per-thread Net instances + so OpenCV forward calls can run concurrently. It also contains fallback + logic that will perform processing with shared nets under a lock if needed. + +- When `EnableCropSaving` is enabled in configuration, each recognized crop is + saved to `logs/crops` with a timestamp and optional context label to aid + debugging false positives/negatives. + +Troubleshooting +- If the engine returns no detections, verify the model files are correct and + compatible with the expected output layout. Use + `ProcessFileWithDiagnostics` to inspect output layer shapes. + +License & Notes +This project is provided as-is. See repository for licensing information and +for the model files distribution terms (models are usually not redistributed +with code and must be obtained separately). diff --git a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj index c5f3f96..792a268 100644 --- a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj +++ b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj @@ -3,7 +3,20 @@ net10.0 enable enable + + true + + $(OutputPath)$(AssemblyName).xml + + + + true + lib\$(TargetFramework)\ + + AIFotoONLUS.Core diff --git a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml new file mode 100644 index 0000000..39fc26a --- /dev/null +++ b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml @@ -0,0 +1,325 @@ + + + + AIFotoONLUS.Core + + + + + Represents a detected text region produced by the detection network. + + Bounding rectangle of the detection in image coordinates. + Combined confidence score for the detection (objectness * class probability). + Class index predicted by the network (index into ). + Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. + + + + Represents a detected text region produced by the detection network. + + Bounding rectangle of the detection in image coordinates. + Combined confidence score for the detection (objectness * class probability). + Class index predicted by the network (index into ). + Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. + + + Bounding rectangle of the detection in image coordinates. + + + Combined confidence score for the detection (objectness * class probability). + + + Class index predicted by the network (index into ). + + + Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. + + + + Represents the result of recognizing a single region: recognized text, + its bounding box and confidence. + + Recognized text for the region (usually a sequence of digits). + Bounding rectangle of the recognition result. + Confidence score associated with the recognition. + + + + Represents the result of recognizing a single region: recognized text, + its bounding box and confidence. + + Recognized text for the region (usually a sequence of digits). + Bounding rectangle of the recognition result. + Confidence score associated with the recognition. + + + Recognized text for the region (usually a sequence of digits). + + + Bounding rectangle of the recognition result. + + + Confidence score associated with the recognition. + + + + Aggregated result for a processed image. + + Name of the image file. + Comma-separated recognized texts found in the image (may be empty). + Full path to the processed image file. + + + + Aggregated result for a processed image. + + Name of the image file. + Comma-separated recognized texts found in the image (may be empty). + Full path to the processed image file. + + + Name of the image file. + + + Comma-separated recognized texts found in the image (may be empty). + + + Full path to the processed image file. + + + + Configuration options that control model file locations, input sizes + and runtime thresholds used by . + + + + + Path to the Darknet configuration (.cfg) file for the detection network. + + + + + Path to the Darknet weights (.weights) file for the detection network. + + + + + Path to the Darknet configuration (.cfg) file for the recognition network. + + + + + Path to the Darknet weights (.weights) file for the recognition network. + + + + + Confidence threshold used to filter out low-probability detections. + + + + + Non-maximum suppression (NMS) IoU threshold used to remove overlapping + detection boxes. + + + + + Input size used when preparing the blob for the detection network. + + + + + Input size used when preparing the blob for the recognition network. + + + + + Labels representing digit classes in the recognition model. The order + must match the class ordering used by the trained recognition network. + + + + + When enabled, recognition crops will be saved to disk under + "logs/crops" for diagnostic inspection. Disabled by default. + + + + + Create a new instance of using the + provided . The constructor loads the + detection and recognition Darknet model files and prepares the OpenCV + DNN nets for CPU inference. + + Model configuration containing file paths, thresholds + and other options. Must not be null. + + This constructor will throw when + any of the expected model files are missing. For logging purposes an + overload accepting an is available. + + + + + Create a new instance of with an + optional . The logger will receive diagnostic + messages and errors produced by the engine during processing. + + Model configuration containing file paths and + runtime thresholds. + Optional logger for diagnostic messages. + May be null. + Thrown when + is null. + Thrown when one of the model + files referenced by does not exist. + + + + Detect text regions in the supplied image using the detection network. + + Input image as an OpenCvSharp . + Must not be null. + An enumerable of containing the + bounding boxes, confidence and class information for each detected + region. The results are already filtered with the configured + confidence and NMS thresholds. + + + + Recognize digits inside a cropped image region using the recognition + network. The method runs the recognition network and returns the + concatenated sequence of recognized digit labels ordered left-to-right. + + Cropped image containing digits as + . Must not be null. + Optional context string used for diagnostics + (e.g. when saving crop image files). + A string containing recognized digits in left-to-right order. + Returns an empty string when no digits are recognized above the + configured confidence threshold. + + + + Small DTO that describes the name and shape of a detection network + forward output used for diagnostics. + + Layer/output name. + Number of rows in the output Mat. + Number of columns in the output Mat. + + + + Small DTO that describes the name and shape of a detection network + forward output used for diagnostics. + + Layer/output name. + Number of rows in the output Mat. + Number of columns in the output Mat. + + + Layer/output name. + + + Number of rows in the output Mat. + + + Number of columns in the output Mat. + + + + Result returned by , contains + the recognized text result and an array describing detection network + forward outputs (shapes and names) which are useful for debugging + model output layout mismatches. + + Recognition result for the processed image. + Array describing detection net outputs. + + + + Result returned by , contains + the recognized text result and an array describing detection network + forward outputs (shapes and names) which are useful for debugging + model output layout mismatches. + + Recognition result for the processed image. + Array describing detection net outputs. + + + Recognition result for the processed image. + + + Array describing detection net outputs. + + + + Process a single image file and return the recognition result together + with detection network forward output shapes for diagnostics. This + method reads the image from disk, runs a forward pass over the + detection network to capture the raw output Mat shapes and then calls + the normal processing pipeline to return the recognized text. + + + + + Process a single image file and return the recognized text as an + . The method detects candidate text regions + and runs recognition on each crop. Multiple recognized digit sequences + are joined with a comma in the returned . + + Path to an image file on disk. Supported + formats depend on OpenCV (typically JPEG, PNG, ...). + An containing the file name and + recognized text (possibly empty). + + + + Process all JPEG images in a directory and return the recognition + results. This is a blocking wrapper over . + + Path to a directory containing images. + If true, files whose names start with + "tn_" will be skipped (convention used to mark text-negative images). + Collection of ordered by file name. + + + + Worker overload of that + accepts a instance. This is used by the parallel + processing pipeline where each worker owns its own Net instance. + + Cropped region to recognize. + Recognition to execute + the forward pass with. + Optional context string for diagnostics. + Recognized digit sequence or empty string. + + + + Progress statistics reported during directory processing. + + Total number of image files to process. + Number of files processed so far. + Current processing throughput in images/second. + + + + Progress statistics reported during directory processing. + + Total number of image files to process. + Number of files processed so far. + Current processing throughput in images/second. + + + Total number of image files to process. + + + Number of files processed so far. + + + Current processing throughput in images/second. + + + diff --git a/src/AIFotoONLUS.Core/DetectedRegion.cs b/src/AIFotoONLUS.Core/DetectedRegion.cs index 38fb2b7..7d90b6b 100644 --- a/src/AIFotoONLUS.Core/DetectedRegion.cs +++ b/src/AIFotoONLUS.Core/DetectedRegion.cs @@ -2,7 +2,29 @@ using OpenCvSharp; namespace AIFotoONLUS.Core { + /// + /// Represents a detected text region produced by the detection network. + /// + /// Bounding rectangle of the detection in image coordinates. + /// Combined confidence score for the detection (objectness * class probability). + /// Class index predicted by the network (index into ). + /// Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. public record DetectedRegion(Rect BoundingBox, float Confidence, int ClassId, double CenterX); + + /// + /// Represents the result of recognizing a single region: recognized text, + /// its bounding box and confidence. + /// + /// Recognized text for the region (usually a sequence of digits). + /// Bounding rectangle of the recognition result. + /// Confidence score associated with the recognition. public record RecognitionResult(string Text, Rect BoundingBox, double Confidence); + + /// + /// Aggregated result for a processed image. + /// + /// Name of the image file. + /// Comma-separated recognized texts found in the image (may be empty). + /// Full path to the processed image file. public record ImageResult(string FileName, string Text, string FilePath); } \ No newline at end of file diff --git a/src/AIFotoONLUS.Core/ModelConfiguration.cs b/src/AIFotoONLUS.Core/ModelConfiguration.cs index 4c14e7d..0b0b553 100644 --- a/src/AIFotoONLUS.Core/ModelConfiguration.cs +++ b/src/AIFotoONLUS.Core/ModelConfiguration.cs @@ -2,21 +2,63 @@ using OpenCvSharp; namespace AIFotoONLUS.Core { + /// + /// Configuration options that control model file locations, input sizes + /// and runtime thresholds used by . + /// public class ModelConfiguration { + /// + /// Path to the Darknet configuration (.cfg) file for the detection network. + /// public string DetectionCfg { get; set; } = "models/detection.cfg"; + + /// + /// Path to the Darknet weights (.weights) file for the detection network. + /// public string DetectionWeights { get; set; } = "models/detection.weights"; + + /// + /// Path to the Darknet configuration (.cfg) file for the recognition network. + /// public string RecognitionCfg { get; set; } = "models/recognition.cfg"; + + /// + /// Path to the Darknet weights (.weights) file for the recognition network. + /// public string RecognitionWeights { get; set; } = "models/recognition.weights"; + /// + /// Confidence threshold used to filter out low-probability detections. + /// public double ConfidenceThreshold { get; set; } = 0.5; + + /// + /// Non-maximum suppression (NMS) IoU threshold used to remove overlapping + /// detection boxes. + /// public double NmsThreshold { get; set; } = 0.4; + /// + /// Input size used when preparing the blob for the detection network. + /// public Size DetectionInputSize { get; set; } = new Size(416, 416); + + /// + /// Input size used when preparing the blob for the recognition network. + /// public Size RecognitionInputSize { get; set; } = new Size(140, 120); + /// + /// Labels representing digit classes in the recognition model. The order + /// must match the class ordering used by the trained recognition network. + /// public string[] NumberClasses { get; set; } = new[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }; - // When true, recognition crops will be saved to disk for diagnostics. Disabled by default. + + /// + /// When enabled, recognition crops will be saved to disk under + /// "logs/crops" for diagnostic inspection. Disabled by default. + /// public bool EnableCropSaving { get; set; } = false; } } \ No newline at end of file diff --git a/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs b/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs index 1c1a6ca..b9b9284 100644 --- a/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs +++ b/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs @@ -12,8 +12,32 @@ using System.Threading.Tasks; namespace AIFotoONLUS.Core { /// - /// NumberRecognitionEngine: loads Darknet models via OpenCvSharp and - /// provides methods to detect text regions and recognize digits. + /// NumberRecognitionEngine is a high-level wrapper that loads Darknet (YOLO) + /// models through OpenCvSharp's DNN API and exposes simple synchronous and + /// asynchronous methods to detect numeric text regions in images and recognize + /// the digits contained within those regions. + /// + /// Overview + /// - Loads two Darknet networks: a detection network (finds text regions) + /// and a recognition network (recognizes digits inside a cropped region). + /// - Uses OpenCvSharp (CvDnn) to create input blobs, run forward passes and + /// perform non‑maximum suppression (NMS) on detection candidates. + /// - Provides single-image and directory-level processing APIs. Directory + /// processing supports parallel workers where each worker uses its own + /// per-thread Net instances to allow concurrent forward calls. + /// + /// Threading and performance notes + /// - The class constructs and owns two shared Net instances used by the + /// simple (single-threaded) APIs. When doing parallel processing the + /// implementation creates per-thread Net instances to avoid concurrent + /// calls into the same Net object. A small fallback path exists that will + /// call into the shared nets under a lock when needed. + /// - OpenCV internal threading is enabled (Cv2.SetNumThreads) when supported. + /// + /// Diagnostics + /// - When enabled via the configuration, crops may be saved to disk for + /// debugging. The contains thresholds and + /// paths used by the engine. /// using Microsoft.Extensions.Logging; @@ -27,11 +51,37 @@ namespace AIFotoONLUS.Core private readonly ILogger? _logger; private bool _disposed; + /// + /// Create a new instance of using the + /// provided . The constructor loads the + /// detection and recognition Darknet model files and prepares the OpenCV + /// DNN nets for CPU inference. + /// + /// Model configuration containing file paths, thresholds + /// and other options. Must not be null. + /// + /// This constructor will throw when + /// any of the expected model files are missing. For logging purposes an + /// overload accepting an is available. + /// public NumberRecognitionEngine(ModelConfiguration cfg) : this(cfg, logger: null) { } + /// + /// Create a new instance of with an + /// optional . The logger will receive diagnostic + /// messages and errors produced by the engine during processing. + /// + /// Model configuration containing file paths and + /// runtime thresholds. + /// Optional logger for diagnostic messages. + /// May be null. + /// Thrown when + /// is null. + /// Thrown when one of the model + /// files referenced by does not exist. public NumberRecognitionEngine(ModelConfiguration cfg, ILogger? logger) { _logger = logger; @@ -77,6 +127,15 @@ namespace AIFotoONLUS.Core private string[] GetOutputLayerNames(Net net) => net.GetUnconnectedOutLayersNames(); + /// + /// Detect text regions in the supplied image using the detection network. + /// + /// Input image as an OpenCvSharp . + /// Must not be null. + /// An enumerable of containing the + /// bounding boxes, confidence and class information for each detected + /// region. The results are already filtered with the configured + /// confidence and NMS thresholds. public IEnumerable DetectTextRegions(Mat image) { if (image is null) throw new ArgumentNullException(nameof(image)); @@ -190,6 +249,18 @@ namespace AIFotoONLUS.Core return results; } + /// + /// Recognize digits inside a cropped image region using the recognition + /// network. The method runs the recognition network and returns the + /// concatenated sequence of recognized digit labels ordered left-to-right. + /// + /// Cropped image containing digits as + /// . Must not be null. + /// Optional context string used for diagnostics + /// (e.g. when saving crop image files). + /// A string containing recognized digits in left-to-right order. + /// Returns an empty string when no digits are recognized above the + /// configured confidence threshold. public string RecognizeDigits(Mat croppedImage, string? context = null) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); @@ -287,12 +358,31 @@ namespace AIFotoONLUS.Core return string.Concat(ordered); } + /// + /// Small DTO that describes the name and shape of a detection network + /// forward output used for diagnostics. + /// + /// Layer/output name. + /// Number of rows in the output Mat. + /// Number of columns in the output Mat. public record DetectionOutput(string Name, int Rows, int Cols); + + /// + /// Result returned by , contains + /// the recognized text result and an array describing detection network + /// forward outputs (shapes and names) which are useful for debugging + /// model output layout mismatches. + /// + /// Recognition result for the processed image. + /// Array describing detection net outputs. public record DiagnosticResult(ImageResult Result, DetectionOutput[] DetectionOutputs); /// - /// Process a single image file and return the recognition result together with - /// detection network forward output shapes for diagnostics. + /// Process a single image file and return the recognition result together + /// with detection network forward output shapes for diagnostics. This + /// method reads the image from disk, runs a forward pass over the + /// detection network to capture the raw output Mat shapes and then calls + /// the normal processing pipeline to return the recognized text. /// public DiagnosticResult ProcessFileWithDiagnostics(string filePath) { @@ -330,6 +420,16 @@ namespace AIFotoONLUS.Core return new DiagnosticResult(imgRes, outputs); } + /// + /// Process a single image file and return the recognized text as an + /// . The method detects candidate text regions + /// and runs recognition on each crop. Multiple recognized digit sequences + /// are joined with a comma in the returned . + /// + /// Path to an image file on disk. Supported + /// formats depend on OpenCV (typically JPEG, PNG, ...). + /// An containing the file name and + /// recognized text (possibly empty). public ImageResult ProcessImage(string filePath) { if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath); @@ -351,6 +451,14 @@ namespace AIFotoONLUS.Core return result; } + /// + /// Process all JPEG images in a directory and return the recognition + /// results. This is a blocking wrapper over . + /// + /// Path to a directory containing images. + /// If true, files whose names start with + /// "tn_" will be skipped (convention used to mark text-negative images). + /// Collection of ordered by file name. public IEnumerable ProcessDirectory(string directoryPath, bool skipTextNegative = false) { // Simple wrapper over async implementation @@ -504,6 +612,16 @@ namespace AIFotoONLUS.Core } // Overload RecognizeDigits that accepts a Net for worker threads + /// + /// Worker overload of that + /// accepts a instance. This is used by the parallel + /// processing pipeline where each worker owns its own Net instance. + /// + /// Cropped region to recognize. + /// Recognition to execute + /// the forward pass with. + /// Optional context string for diagnostics. + /// Recognized digit sequence or empty string. private string RecognizeDigits(Mat croppedImage, Net recognitionNet, string? context = null) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); diff --git a/src/AIFotoONLUS.Core/ProcessingStats.cs b/src/AIFotoONLUS.Core/ProcessingStats.cs index fe5aba3..9d0de10 100644 --- a/src/AIFotoONLUS.Core/ProcessingStats.cs +++ b/src/AIFotoONLUS.Core/ProcessingStats.cs @@ -1,4 +1,10 @@ namespace AIFotoONLUS.Core { + /// + /// Progress statistics reported during directory processing. + /// + /// Total number of image files to process. + /// Number of files processed so far. + /// Current processing throughput in images/second. public record ProcessingStats(int TotalFiles, int ProcessedFiles, double ImagesPerSecond); }