diff --git a/.forgejo/workflows/publish-aifotoonlus-core.yml b/.forgejo/workflows/publish-aifotoonlus-core.yml new file mode 100644 index 0000000..04a940d --- /dev/null +++ b/.forgejo/workflows/publish-aifotoonlus-core.yml @@ -0,0 +1,124 @@ +name: Build And Publish AIFotoONLUS.Core + +on: + push: + branches: + - master + - develop + tags: + - '*' + workflow_dispatch: + +env: + DOTNET_VERSION: 10.0.x + PROJECT_PATH: src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj + PACKAGE_OUTPUT_DIR: artifacts/nuget + PACKAGE_ARTIFACT_NAME: aifotoonlus-core-nuget + NUGET_SOURCE_NAME: forgejo-aifotoonlus + NUGET_SOURCE_URL: ${{ vars.AIFOTOONLUS_NUGET_SOURCE_URL || format('{0}/api/packages/{1}/nuget/index.json', github.server_url, vars.AIFOTOONLUS_PACKAGE_OWNER || github.repository_owner) }} + +jobs: + build: + runs-on: docker + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: ${{ env.DOTNET_VERSION }} + + - name: Restore + run: dotnet restore "${{ env.PROJECT_PATH }}" + + - name: Build + run: dotnet build "${{ env.PROJECT_PATH }}" --configuration Release --no-restore /p:GeneratePackageOnBuild=false + + - name: Pack + shell: bash + run: | + set -eu + mkdir -p "${{ env.PACKAGE_OUTPUT_DIR }}" + + if [[ "${GITHUB_REF}" == refs/tags/* ]]; then + package_version="${GITHUB_REF_NAME#v}" + echo "Packing tag version ${package_version}" + dotnet pack "${{ env.PROJECT_PATH }}" \ + --configuration Release \ + --output "${{ env.PACKAGE_OUTPUT_DIR }}" \ + --no-build \ + /p:PackageVersion="${package_version}" + else + echo "Packing with project version or MinVer-derived version" + dotnet pack "${{ env.PROJECT_PATH }}" \ + --configuration Release \ + --output "${{ env.PACKAGE_OUTPUT_DIR }}" \ + --no-build + fi + + - name: Upload package artifact + uses: actions/upload-artifact@v3 + with: + name: ${{ env.PACKAGE_ARTIFACT_NAME }} + path: ${{ env.PACKAGE_OUTPUT_DIR }}/*.nupkg + if-no-files-found: error + + publish: + if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' + needs: build + runs-on: docker + env: + FORGEJO_PACKAGE_USERNAME: ${{ secrets.FORGEJO_PACKAGE_USERNAME }} + FORGEJO_PACKAGE_TOKEN: ${{ secrets.FORGEJO_PACKAGE_TOKEN }} + + steps: + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: ${{ env.DOTNET_VERSION }} + + - name: Download package artifact + uses: actions/download-artifact@v3 + with: + name: ${{ env.PACKAGE_ARTIFACT_NAME }} + path: ${{ env.PACKAGE_OUTPUT_DIR }} + + - name: Validate publish secrets + shell: bash + run: | + set -eu + if [ -z "${FORGEJO_PACKAGE_USERNAME}" ]; then + echo "secrets.FORGEJO_PACKAGE_USERNAME is required" + exit 1 + fi + if [ -z "${FORGEJO_PACKAGE_TOKEN}" ]; then + echo "secrets.FORGEJO_PACKAGE_TOKEN is required" + exit 1 + fi + + - name: Configure Forgejo NuGet source + run: | + dotnet nuget add source "${{ env.NUGET_SOURCE_URL }}" \ + --name "${{ env.NUGET_SOURCE_NAME }}" \ + --username "${FORGEJO_PACKAGE_USERNAME}" \ + --password "${FORGEJO_PACKAGE_TOKEN}" \ + --store-password-in-clear-text + + - name: Publish package to Forgejo NuGet + shell: bash + run: | + set -eu + shopt -s nullglob + packages=("${{ env.PACKAGE_OUTPUT_DIR }}"/*.nupkg) + if [ "${#packages[@]}" -eq 0 ]; then + echo "No NuGet packages found in ${{ env.PACKAGE_OUTPUT_DIR }}" + exit 1 + fi + + dotnet nuget push "${{ env.PACKAGE_OUTPUT_DIR }}"/*.nupkg \ + --source "${{ env.NUGET_SOURCE_NAME }}" \ + --skip-duplicate \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0bbddec --- /dev/null +++ b/README.md @@ -0,0 +1,65 @@ +# AIFotoONLUS Number Recognition Library + +This library provides a small, focused engine to detect and recognize numeric +text (digits) in images using Darknet (YOLO) models via OpenCvSharp's DNN API. +It is suitable for batch processing folders of images or individual files. + +Features +- Detection network (Darknet/Yolo) to find candidate text regions. +- Recognition network (Darknet/Yolo) to identify digits inside detected crops. +- Single-file and directory-level processing APIs. +- Parallel processing with per-thread network instances for throughput. +- Diagnostic helpers to dump network output shapes and optionally save crop images. + +Basic usage +1. Create a `ModelConfiguration` instance that points to your Darknet `.cfg` +and `.weights` files for both detection and recognition networks, configure +confidence and NMS thresholds and provide a list of number class labels. + +2. Create an instance of `NumberRecognitionEngine`: + +```csharp +using var engine = new NumberRecognitionEngine(modelConfig, logger: null); +``` + +3. Process a single image: + +```csharp +var result = engine.ProcessImage("/path/to/image.jpg"); +Console.WriteLine(result.Text); +``` + +4. Process a directory (parallelized): + +```csharp +var results = await engine.ProcessDirectoryAsync("/path/to/images", recursive: false); +foreach (var r in results) Console.WriteLine($"{r.FileName}: {r.Text}"); +``` + +Configuration notes +- `ModelConfiguration` controls model file paths, input sizes, thresholds and + whether to save cropped images for diagnostics. Make sure the paths are + accessible to the process and the model files match the expected network + architectures. + +- The engine expects detection network outputs in the YOLO-style layout: + `[cx, cy, w, h, objectness, class1, class2, ...]`. + +Threading & diagnostics +- For directory/batch processing the engine creates per-thread Net instances + so OpenCV forward calls can run concurrently. It also contains fallback + logic that will perform processing with shared nets under a lock if needed. + +- When `EnableCropSaving` is enabled in configuration, each recognized crop is + saved to `logs/crops` with a timestamp and optional context label to aid + debugging false positives/negatives. + +Troubleshooting +- If the engine returns no detections, verify the model files are correct and + compatible with the expected output layout. Use + `ProcessFileWithDiagnostics` to inspect output layer shapes. + +License & Notes +This project is provided as-is. See repository for licensing information and +for the model files distribution terms (models are usually not redistributed +with code and must be obtained separately). diff --git a/gitversion.json b/gitversion.json new file mode 100644 index 0000000..6a77551 --- /dev/null +++ b/gitversion.json @@ -0,0 +1,27 @@ +{ + "AssemblySemFileVer": "0.1.0.0", + "AssemblySemVer": "0.1.0.0", + "BranchName": "master", + "BuildMetaData": null, + "CommitDate": "2026-02-15", + "CommitsSinceVersionSource": 11, + "EscapedBranchName": "master", + "FullBuildMetaData": "Branch.master.Sha.a90da31e531332a4cf0bafe604f89d0e14f3395a", + "FullSemVer": "0.1.0-{BranchName}.11", + "InformationalVersion": "0.1.0-{BranchName}.11+Branch.master.Sha.a90da31e531332a4cf0bafe604f89d0e14f3395a", + "Major": 0, + "MajorMinorPatch": "0.1.0", + "Minor": 1, + "Patch": 0, + "PreReleaseLabel": "{BranchName}", + "PreReleaseLabelWithDash": "-{BranchName}", + "PreReleaseNumber": 11, + "PreReleaseTag": "{BranchName}.11", + "PreReleaseTagWithDash": "-{BranchName}.11", + "SemVer": "0.1.0-{BranchName}.11", + "Sha": "a90da31e531332a4cf0bafe604f89d0e14f3395a", + "ShortSha": "a90da31", + "UncommittedChanges": 7, + "VersionSourceSha": "", + "WeightedPreReleaseNumber": 11 +} diff --git a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj index c5f3f96..dab45a2 100644 --- a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj +++ b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj @@ -3,6 +3,10 @@ net10.0 enable enable + + true + + $(OutputPath)$(AssemblyName).xml @@ -10,7 +14,7 @@ Maddo Maddo Core library for AIFotoONLUS image processing and recognition. - https://gitlab.com/MaddoScientisto/aifotoonlus + https://forgejo.maddoscientisto.net/maddo/AIFotoONLUS 0.1.0 diff --git a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml new file mode 100644 index 0000000..4b6c8f2 --- /dev/null +++ b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml @@ -0,0 +1,331 @@ + + + + AIFotoONLUS.Core + + + + + Represents a detected text region produced by the detection network. + + Bounding rectangle of the detection in image coordinates. + Combined confidence score for the detection (objectness * class probability). + Class index predicted by the network (index into ). + Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. + + + + Represents a detected text region produced by the detection network. + + Bounding rectangle of the detection in image coordinates. + Combined confidence score for the detection (objectness * class probability). + Class index predicted by the network (index into ). + Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. + + + Bounding rectangle of the detection in image coordinates. + + + Combined confidence score for the detection (objectness * class probability). + + + Class index predicted by the network (index into ). + + + Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. + + + + Represents the result of recognizing a single region: recognized text, + its bounding box and confidence. + + Recognized text for the region (usually a sequence of digits). + Bounding rectangle of the recognition result. + Confidence score associated with the recognition. + + + + Represents the result of recognizing a single region: recognized text, + its bounding box and confidence. + + Recognized text for the region (usually a sequence of digits). + Bounding rectangle of the recognition result. + Confidence score associated with the recognition. + + + Recognized text for the region (usually a sequence of digits). + + + Bounding rectangle of the recognition result. + + + Confidence score associated with the recognition. + + + + Aggregated result for a processed image. + + Name of the image file. + Comma-separated recognized texts found in the image (may be empty). + Full path to the processed image file. + + + + Aggregated result for a processed image. + + Name of the image file. + Comma-separated recognized texts found in the image (may be empty). + Full path to the processed image file. + + + Name of the image file. + + + Comma-separated recognized texts found in the image (may be empty). + + + Full path to the processed image file. + + + + Configuration options that control model file locations, input sizes + and runtime thresholds used by . + + + + + Path to the Darknet configuration (.cfg) file for the detection network. + + + + + Path to the Darknet weights (.weights) file for the detection network. + + + + + Path to the Darknet configuration (.cfg) file for the recognition network. + + + + + Path to the Darknet weights (.weights) file for the recognition network. + + + + + Confidence threshold used to filter out low-probability detections. + + + + + Non-maximum suppression (NMS) IoU threshold used to remove overlapping + detection boxes. + + + + + Input size used when preparing the blob for the detection network. + + + + + Input size used when preparing the blob for the recognition network. + + + + + Labels representing digit classes in the recognition model. The order + must match the class ordering used by the trained recognition network. + + + + + When enabled, request OpenCV DNN CUDA backend/target for inference. + The installed OpenCV runtime must have CUDA support or model loading/forwarding may fail. + + + + + When enabled, recognition crops will be saved to disk under + "logs/crops" for diagnostic inspection. Disabled by default. + + + + + Create a new instance of using the + provided . The constructor loads the + detection and recognition Darknet model files and prepares the OpenCV + DNN nets for CPU inference. + + Model configuration containing file paths, thresholds + and other options. Must not be null. + + This constructor will throw when + any of the expected model files are missing. For logging purposes an + overload accepting an is available. + + + + + Create a new instance of with an + optional . The logger will receive diagnostic + messages and errors produced by the engine during processing. + + Model configuration containing file paths and + runtime thresholds. + Optional logger for diagnostic messages. + May be null. + Thrown when + is null. + Thrown when one of the model + files referenced by does not exist. + + + + Detect text regions in the supplied image using the detection network. + + Input image as an OpenCvSharp . + Must not be null. + An enumerable of containing the + bounding boxes, confidence and class information for each detected + region. The results are already filtered with the configured + confidence and NMS thresholds. + + + + Recognize digits inside a cropped image region using the recognition + network. The method runs the recognition network and returns the + concatenated sequence of recognized digit labels ordered left-to-right. + + Cropped image containing digits as + . Must not be null. + Optional context string used for diagnostics + (e.g. when saving crop image files). + A string containing recognized digits in left-to-right order. + Returns an empty string when no digits are recognized above the + configured confidence threshold. + + + + Small DTO that describes the name and shape of a detection network + forward output used for diagnostics. + + Layer/output name. + Number of rows in the output Mat. + Number of columns in the output Mat. + + + + Small DTO that describes the name and shape of a detection network + forward output used for diagnostics. + + Layer/output name. + Number of rows in the output Mat. + Number of columns in the output Mat. + + + Layer/output name. + + + Number of rows in the output Mat. + + + Number of columns in the output Mat. + + + + Result returned by , contains + the recognized text result and an array describing detection network + forward outputs (shapes and names) which are useful for debugging + model output layout mismatches. + + Recognition result for the processed image. + Array describing detection net outputs. + + + + Result returned by , contains + the recognized text result and an array describing detection network + forward outputs (shapes and names) which are useful for debugging + model output layout mismatches. + + Recognition result for the processed image. + Array describing detection net outputs. + + + Recognition result for the processed image. + + + Array describing detection net outputs. + + + + Process a single image file and return the recognition result together + with detection network forward output shapes for diagnostics. This + method reads the image from disk, runs a forward pass over the + detection network to capture the raw output Mat shapes and then calls + the normal processing pipeline to return the recognized text. + + + + + Process a single image file and return the recognized text as an + . The method detects candidate text regions + and runs recognition on each crop. Multiple recognized digit sequences + are joined with a comma in the returned . + + Path to an image file on disk. Supported + formats depend on OpenCV (typically JPEG, PNG, ...). + An containing the file name and + recognized text (possibly empty). + + + + Process all JPEG images in a directory and return the recognition + results. This is a blocking wrapper over . + + Path to a directory containing images. + If true, files whose names start with + "tn_" will be skipped (convention used to mark text-negative images). + Collection of ordered by file name. + + + + Worker overload of that + accepts a instance. This is used by the parallel + processing pipeline where each worker owns its own Net instance. + + Cropped region to recognize. + Recognition to execute + the forward pass with. + Optional context string for diagnostics. + Recognized digit sequence or empty string. + + + + Progress statistics reported during directory processing. + + Total number of image files to process. + Number of files processed so far. + Current processing throughput in images/second. + + + + Progress statistics reported during directory processing. + + Total number of image files to process. + Number of files processed so far. + Current processing throughput in images/second. + + + Total number of image files to process. + + + Number of files processed so far. + + + Current processing throughput in images/second. + + + diff --git a/src/AIFotoONLUS.Core/DetectedRegion.cs b/src/AIFotoONLUS.Core/DetectedRegion.cs index 38fb2b7..7d90b6b 100644 --- a/src/AIFotoONLUS.Core/DetectedRegion.cs +++ b/src/AIFotoONLUS.Core/DetectedRegion.cs @@ -2,7 +2,29 @@ using OpenCvSharp; namespace AIFotoONLUS.Core { + /// + /// Represents a detected text region produced by the detection network. + /// + /// Bounding rectangle of the detection in image coordinates. + /// Combined confidence score for the detection (objectness * class probability). + /// Class index predicted by the network (index into ). + /// Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. public record DetectedRegion(Rect BoundingBox, float Confidence, int ClassId, double CenterX); + + /// + /// Represents the result of recognizing a single region: recognized text, + /// its bounding box and confidence. + /// + /// Recognized text for the region (usually a sequence of digits). + /// Bounding rectangle of the recognition result. + /// Confidence score associated with the recognition. public record RecognitionResult(string Text, Rect BoundingBox, double Confidence); + + /// + /// Aggregated result for a processed image. + /// + /// Name of the image file. + /// Comma-separated recognized texts found in the image (may be empty). + /// Full path to the processed image file. public record ImageResult(string FileName, string Text, string FilePath); } \ No newline at end of file diff --git a/src/AIFotoONLUS.Core/ModelConfiguration.cs b/src/AIFotoONLUS.Core/ModelConfiguration.cs index 4c14e7d..51d7ac7 100644 --- a/src/AIFotoONLUS.Core/ModelConfiguration.cs +++ b/src/AIFotoONLUS.Core/ModelConfiguration.cs @@ -2,21 +2,69 @@ using OpenCvSharp; namespace AIFotoONLUS.Core { + /// + /// Configuration options that control model file locations, input sizes + /// and runtime thresholds used by . + /// public class ModelConfiguration { + /// + /// Path to the Darknet configuration (.cfg) file for the detection network. + /// public string DetectionCfg { get; set; } = "models/detection.cfg"; + + /// + /// Path to the Darknet weights (.weights) file for the detection network. + /// public string DetectionWeights { get; set; } = "models/detection.weights"; + + /// + /// Path to the Darknet configuration (.cfg) file for the recognition network. + /// public string RecognitionCfg { get; set; } = "models/recognition.cfg"; + + /// + /// Path to the Darknet weights (.weights) file for the recognition network. + /// public string RecognitionWeights { get; set; } = "models/recognition.weights"; + /// + /// Confidence threshold used to filter out low-probability detections. + /// public double ConfidenceThreshold { get; set; } = 0.5; + + /// + /// Non-maximum suppression (NMS) IoU threshold used to remove overlapping + /// detection boxes. + /// public double NmsThreshold { get; set; } = 0.4; + /// + /// Input size used when preparing the blob for the detection network. + /// public Size DetectionInputSize { get; set; } = new Size(416, 416); + + /// + /// Input size used when preparing the blob for the recognition network. + /// public Size RecognitionInputSize { get; set; } = new Size(140, 120); + /// + /// Labels representing digit classes in the recognition model. The order + /// must match the class ordering used by the trained recognition network. + /// public string[] NumberClasses { get; set; } = new[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }; - // When true, recognition crops will be saved to disk for diagnostics. Disabled by default. + + /// + /// When enabled, request OpenCV DNN CUDA backend/target for inference. + /// The installed OpenCV runtime must have CUDA support or model loading/forwarding may fail. + /// + public bool UseGpu { get; set; } = false; + + /// + /// When enabled, recognition crops will be saved to disk under + /// "logs/crops" for diagnostic inspection. Disabled by default. + /// public bool EnableCropSaving { get; set; } = false; } } \ No newline at end of file diff --git a/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs b/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs index 1c1a6ca..d6b2d25 100644 --- a/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs +++ b/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs @@ -12,8 +12,32 @@ using System.Threading.Tasks; namespace AIFotoONLUS.Core { /// - /// NumberRecognitionEngine: loads Darknet models via OpenCvSharp and - /// provides methods to detect text regions and recognize digits. + /// NumberRecognitionEngine is a high-level wrapper that loads Darknet (YOLO) + /// models through OpenCvSharp's DNN API and exposes simple synchronous and + /// asynchronous methods to detect numeric text regions in images and recognize + /// the digits contained within those regions. + /// + /// Overview + /// - Loads two Darknet networks: a detection network (finds text regions) + /// and a recognition network (recognizes digits inside a cropped region). + /// - Uses OpenCvSharp (CvDnn) to create input blobs, run forward passes and + /// perform non‑maximum suppression (NMS) on detection candidates. + /// - Provides single-image and directory-level processing APIs. Directory + /// processing supports parallel workers where each worker uses its own + /// per-thread Net instances to allow concurrent forward calls. + /// + /// Threading and performance notes + /// - The class constructs and owns two shared Net instances used by the + /// simple (single-threaded) APIs. When doing parallel processing the + /// implementation creates per-thread Net instances to avoid concurrent + /// calls into the same Net object. A small fallback path exists that will + /// call into the shared nets under a lock when needed. + /// - OpenCV internal threading is enabled (Cv2.SetNumThreads) when supported. + /// + /// Diagnostics + /// - When enabled via the configuration, crops may be saved to disk for + /// debugging. The contains thresholds and + /// paths used by the engine. /// using Microsoft.Extensions.Logging; @@ -27,11 +51,37 @@ namespace AIFotoONLUS.Core private readonly ILogger? _logger; private bool _disposed; + /// + /// Create a new instance of using the + /// provided . The constructor loads the + /// detection and recognition Darknet model files and prepares the OpenCV + /// DNN nets for CPU inference. + /// + /// Model configuration containing file paths, thresholds + /// and other options. Must not be null. + /// + /// This constructor will throw when + /// any of the expected model files are missing. For logging purposes an + /// overload accepting an is available. + /// public NumberRecognitionEngine(ModelConfiguration cfg) : this(cfg, logger: null) { } + /// + /// Create a new instance of with an + /// optional . The logger will receive diagnostic + /// messages and errors produced by the engine during processing. + /// + /// Model configuration containing file paths and + /// runtime thresholds. + /// Optional logger for diagnostic messages. + /// May be null. + /// Thrown when + /// is null. + /// Thrown when one of the model + /// files referenced by does not exist. public NumberRecognitionEngine(ModelConfiguration cfg, ILogger? logger) { _logger = logger; @@ -45,10 +95,8 @@ namespace AIFotoONLUS.Core _detectionNet = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights); _recognitionNet = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); - _detectionNet.SetPreferableBackend(Backend.OPENCV); - _detectionNet.SetPreferableTarget(Target.CPU); - _recognitionNet.SetPreferableBackend(Backend.OPENCV); - _recognitionNet.SetPreferableTarget(Target.CPU); + ConfigureNetRuntime(_detectionNet, _cfg.UseGpu); + ConfigureNetRuntime(_recognitionNet, _cfg.UseGpu); // Let OpenCV use multiple threads internally (use number of logical processors) try { @@ -58,6 +106,11 @@ namespace AIFotoONLUS.Core { // Ignore if not supported by OpenCvSharp build } + + if (_cfg.UseGpu) + { + ValidateGpuRuntime(); + } } public void Dispose() @@ -69,6 +122,38 @@ namespace AIFotoONLUS.Core GC.SuppressFinalize(this); } + public static bool TryValidateGpuRuntime(ModelConfiguration cfg, ILogger? logger, out string? failureMessage) + { + if (cfg is null) throw new ArgumentNullException(nameof(cfg)); + + var probeConfiguration = new ModelConfiguration + { + DetectionCfg = cfg.DetectionCfg, + DetectionWeights = cfg.DetectionWeights, + RecognitionCfg = cfg.RecognitionCfg, + RecognitionWeights = cfg.RecognitionWeights, + ConfidenceThreshold = cfg.ConfidenceThreshold, + NmsThreshold = cfg.NmsThreshold, + DetectionInputSize = cfg.DetectionInputSize, + RecognitionInputSize = cfg.RecognitionInputSize, + NumberClasses = cfg.NumberClasses, + EnableCropSaving = cfg.EnableCropSaving, + UseGpu = true + }; + + try + { + using var engine = new NumberRecognitionEngine(probeConfiguration, logger); + failureMessage = null; + return true; + } + catch (Exception ex) + { + failureMessage = ex.GetBaseException().Message; + return false; + } + } + private static string SanitizeFileName(string name) { foreach (var c in Path.GetInvalidFileNameChars()) name = name.Replace(c, '_'); @@ -77,6 +162,48 @@ namespace AIFotoONLUS.Core private string[] GetOutputLayerNames(Net net) => net.GetUnconnectedOutLayersNames(); + private static void ConfigureNetRuntime(Net net, bool useGpu) + { + if (useGpu) + { + net.SetPreferableBackend(Backend.CUDA); + net.SetPreferableTarget(Target.CUDA); + return; + } + + net.SetPreferableBackend(Backend.OPENCV); + net.SetPreferableTarget(Target.CPU); + } + + private void ValidateGpuRuntime() + { + try + { + using var detectionProbe = new Mat(_cfg.DetectionInputSize.Height, _cfg.DetectionInputSize.Width, MatType.CV_8UC3, Scalar.All(0)); + _ = DetectTextRegions(_detectionNet, detectionProbe).Take(1).ToArray(); + + using var recognitionProbe = new Mat(_cfg.RecognitionInputSize.Height, _cfg.RecognitionInputSize.Width, MatType.CV_8UC3, Scalar.All(0)); + using var blob = CvDnn.BlobFromImage(recognitionProbe, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false); + _recognitionNet.SetInput(blob); + using var output = _recognitionNet.Forward(); + } + catch (Exception ex) + { + throw new InvalidOperationException( + "OpenCV DNN CUDA runtime validation failed. Disable number AI GPU mode or use an OpenCV runtime built with CUDA DNN support.", + ex); + } + } + + /// + /// Detect text regions in the supplied image using the detection network. + /// + /// Input image as an OpenCvSharp . + /// Must not be null. + /// An enumerable of containing the + /// bounding boxes, confidence and class information for each detected + /// region. The results are already filtered with the configured + /// confidence and NMS thresholds. public IEnumerable DetectTextRegions(Mat image) { if (image is null) throw new ArgumentNullException(nameof(image)); @@ -93,7 +220,7 @@ namespace AIFotoONLUS.Core var outNames = GetOutputLayerNames(detectionNet); var outsList = new List(); detectionNet.Forward(outsList, outNames); - + Mat[] outs = outsList.ToArray(); if (outs.Length == 0) { @@ -103,15 +230,15 @@ namespace AIFotoONLUS.Core var fallback = new List(); for (int on = 0; on < outNames.Length; on++) { - try - { - var single = detectionNet.Forward(outNames[on]); - fallback.Add(single); - } - catch (Exception ex) - { - _logger?.LogError(ex, "Fallback Forward failed for {name}", outNames[on]); - } + try + { + var single = detectionNet.Forward(outNames[on]); + fallback.Add(single); + } + catch (Exception ex) + { + _logger?.LogError(ex, "Fallback Forward failed for {name}", outNames[on]); + } } if (fallback.Count > 0) { @@ -162,21 +289,21 @@ namespace AIFotoONLUS.Core } if (maxScore > _cfg.ConfidenceThreshold) - { - int x = (int)Math.Max(0, Math.Round(cx - w / 2)); - int y = (int)Math.Max(0, Math.Round(cy - h / 2)); - var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h)); - boxes.Add(rect); - confidences.Add(maxScore); - classIds.Add(bestClass); - centerXList.Add(cx); - } + { + int x = (int)Math.Max(0, Math.Round(cx - w / 2)); + int y = (int)Math.Max(0, Math.Round(cy - h / 2)); + var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h)); + boxes.Add(rect); + confidences.Add(maxScore); + classIds.Add(bestClass); + centerXList.Add(cx); + } } } if (boxes.Count == 0) return Enumerable.Empty(); - + CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] indices); @@ -190,6 +317,18 @@ namespace AIFotoONLUS.Core return results; } + /// + /// Recognize digits inside a cropped image region using the recognition + /// network. The method runs the recognition network and returns the + /// concatenated sequence of recognized digit labels ordered left-to-right. + /// + /// Cropped image containing digits as + /// . Must not be null. + /// Optional context string used for diagnostics + /// (e.g. when saving crop image files). + /// A string containing recognized digits in left-to-right order. + /// Returns an empty string when no digits are recognized above the + /// configured confidence threshold. public string RecognizeDigits(Mat croppedImage, string? context = null) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); @@ -287,12 +426,31 @@ namespace AIFotoONLUS.Core return string.Concat(ordered); } + /// + /// Small DTO that describes the name and shape of a detection network + /// forward output used for diagnostics. + /// + /// Layer/output name. + /// Number of rows in the output Mat. + /// Number of columns in the output Mat. public record DetectionOutput(string Name, int Rows, int Cols); + + /// + /// Result returned by , contains + /// the recognized text result and an array describing detection network + /// forward outputs (shapes and names) which are useful for debugging + /// model output layout mismatches. + /// + /// Recognition result for the processed image. + /// Array describing detection net outputs. public record DiagnosticResult(ImageResult Result, DetectionOutput[] DetectionOutputs); /// - /// Process a single image file and return the recognition result together with - /// detection network forward output shapes for diagnostics. + /// Process a single image file and return the recognition result together + /// with detection network forward output shapes for diagnostics. This + /// method reads the image from disk, runs a forward pass over the + /// detection network to capture the raw output Mat shapes and then calls + /// the normal processing pipeline to return the recognized text. /// public DiagnosticResult ProcessFileWithDiagnostics(string filePath) { @@ -330,6 +488,16 @@ namespace AIFotoONLUS.Core return new DiagnosticResult(imgRes, outputs); } + /// + /// Process a single image file and return the recognized text as an + /// . The method detects candidate text regions + /// and runs recognition on each crop. Multiple recognized digit sequences + /// are joined with a comma in the returned . + /// + /// Path to an image file on disk. Supported + /// formats depend on OpenCV (typically JPEG, PNG, ...). + /// An containing the file name and + /// recognized text (possibly empty). public ImageResult ProcessImage(string filePath) { if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath); @@ -351,6 +519,14 @@ namespace AIFotoONLUS.Core return result; } + /// + /// Process all JPEG images in a directory and return the recognition + /// results. This is a blocking wrapper over . + /// + /// Path to a directory containing images. + /// If true, files whose names start with + /// "tn_" will be skipped (convention used to mark text-negative images). + /// Collection of ordered by file name. public IEnumerable ProcessDirectory(string directoryPath, bool skipTextNegative = false) { // Simple wrapper over async implementation @@ -378,10 +554,8 @@ namespace AIFotoONLUS.Core { var det = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights); var rec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); - det.SetPreferableBackend(Backend.OPENCV); - det.SetPreferableTarget(Target.CPU); - rec.SetPreferableBackend(Backend.OPENCV); - rec.SetPreferableTarget(Target.CPU); + ConfigureNetRuntime(det, _cfg.UseGpu); + ConfigureNetRuntime(rec, _cfg.UseGpu); netsBag.Add((det, rec)); return (det, rec); }); @@ -417,8 +591,7 @@ namespace AIFotoONLUS.Core try { using var tempRec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); - tempRec.SetPreferableBackend(Backend.OPENCV); - tempRec.SetPreferableTarget(Target.CPU); + ConfigureNetRuntime(tempRec, _cfg.UseGpu); var alt = RecognizeDigits(crop, tempRec, ctx); if (!string.IsNullOrEmpty(alt)) txt = alt; } @@ -504,6 +677,16 @@ namespace AIFotoONLUS.Core } // Overload RecognizeDigits that accepts a Net for worker threads + /// + /// Worker overload of that + /// accepts a instance. This is used by the parallel + /// processing pipeline where each worker owns its own Net instance. + /// + /// Cropped region to recognize. + /// Recognition to execute + /// the forward pass with. + /// Optional context string for diagnostics. + /// Recognized digit sequence or empty string. private string RecognizeDigits(Mat croppedImage, Net recognitionNet, string? context = null) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); diff --git a/src/AIFotoONLUS.Core/ProcessingStats.cs b/src/AIFotoONLUS.Core/ProcessingStats.cs index fe5aba3..9d0de10 100644 --- a/src/AIFotoONLUS.Core/ProcessingStats.cs +++ b/src/AIFotoONLUS.Core/ProcessingStats.cs @@ -1,4 +1,10 @@ namespace AIFotoONLUS.Core { + /// + /// Progress statistics reported during directory processing. + /// + /// Total number of image files to process. + /// Number of files processed so far. + /// Current processing throughput in images/second. public record ProcessingStats(int TotalFiles, int ProcessedFiles, double ImagesPerSecond); }