diff --git a/.forgejo/workflows/publish-aifotoonlus-core.yml b/.forgejo/workflows/publish-aifotoonlus-core.yml
new file mode 100644
index 0000000..04a940d
--- /dev/null
+++ b/.forgejo/workflows/publish-aifotoonlus-core.yml
@@ -0,0 +1,124 @@
+name: Build And Publish AIFotoONLUS.Core
+
+on:
+ push:
+ branches:
+ - master
+ - develop
+ tags:
+ - '*'
+ workflow_dispatch:
+
+env:
+ DOTNET_VERSION: 10.0.x
+ PROJECT_PATH: src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj
+ PACKAGE_OUTPUT_DIR: artifacts/nuget
+ PACKAGE_ARTIFACT_NAME: aifotoonlus-core-nuget
+ NUGET_SOURCE_NAME: forgejo-aifotoonlus
+ NUGET_SOURCE_URL: ${{ vars.AIFOTOONLUS_NUGET_SOURCE_URL || format('{0}/api/packages/{1}/nuget/index.json', github.server_url, vars.AIFOTOONLUS_PACKAGE_OWNER || github.repository_owner) }}
+
+jobs:
+ build:
+ runs-on: docker
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Setup .NET
+ uses: actions/setup-dotnet@v4
+ with:
+ dotnet-version: ${{ env.DOTNET_VERSION }}
+
+ - name: Restore
+ run: dotnet restore "${{ env.PROJECT_PATH }}"
+
+ - name: Build
+ run: dotnet build "${{ env.PROJECT_PATH }}" --configuration Release --no-restore /p:GeneratePackageOnBuild=false
+
+ - name: Pack
+ shell: bash
+ run: |
+ set -eu
+ mkdir -p "${{ env.PACKAGE_OUTPUT_DIR }}"
+
+ if [[ "${GITHUB_REF}" == refs/tags/* ]]; then
+ package_version="${GITHUB_REF_NAME#v}"
+ echo "Packing tag version ${package_version}"
+ dotnet pack "${{ env.PROJECT_PATH }}" \
+ --configuration Release \
+ --output "${{ env.PACKAGE_OUTPUT_DIR }}" \
+ --no-build \
+ /p:PackageVersion="${package_version}"
+ else
+ echo "Packing with project version or MinVer-derived version"
+ dotnet pack "${{ env.PROJECT_PATH }}" \
+ --configuration Release \
+ --output "${{ env.PACKAGE_OUTPUT_DIR }}" \
+ --no-build
+ fi
+
+ - name: Upload package artifact
+ uses: actions/upload-artifact@v3
+ with:
+ name: ${{ env.PACKAGE_ARTIFACT_NAME }}
+ path: ${{ env.PACKAGE_OUTPUT_DIR }}/*.nupkg
+ if-no-files-found: error
+
+ publish:
+ if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
+ needs: build
+ runs-on: docker
+ env:
+ FORGEJO_PACKAGE_USERNAME: ${{ secrets.FORGEJO_PACKAGE_USERNAME }}
+ FORGEJO_PACKAGE_TOKEN: ${{ secrets.FORGEJO_PACKAGE_TOKEN }}
+
+ steps:
+ - name: Setup .NET
+ uses: actions/setup-dotnet@v4
+ with:
+ dotnet-version: ${{ env.DOTNET_VERSION }}
+
+ - name: Download package artifact
+ uses: actions/download-artifact@v3
+ with:
+ name: ${{ env.PACKAGE_ARTIFACT_NAME }}
+ path: ${{ env.PACKAGE_OUTPUT_DIR }}
+
+ - name: Validate publish secrets
+ shell: bash
+ run: |
+ set -eu
+ if [ -z "${FORGEJO_PACKAGE_USERNAME}" ]; then
+ echo "secrets.FORGEJO_PACKAGE_USERNAME is required"
+ exit 1
+ fi
+ if [ -z "${FORGEJO_PACKAGE_TOKEN}" ]; then
+ echo "secrets.FORGEJO_PACKAGE_TOKEN is required"
+ exit 1
+ fi
+
+ - name: Configure Forgejo NuGet source
+ run: |
+ dotnet nuget add source "${{ env.NUGET_SOURCE_URL }}" \
+ --name "${{ env.NUGET_SOURCE_NAME }}" \
+ --username "${FORGEJO_PACKAGE_USERNAME}" \
+ --password "${FORGEJO_PACKAGE_TOKEN}" \
+ --store-password-in-clear-text
+
+ - name: Publish package to Forgejo NuGet
+ shell: bash
+ run: |
+ set -eu
+ shopt -s nullglob
+ packages=("${{ env.PACKAGE_OUTPUT_DIR }}"/*.nupkg)
+ if [ "${#packages[@]}" -eq 0 ]; then
+ echo "No NuGet packages found in ${{ env.PACKAGE_OUTPUT_DIR }}"
+ exit 1
+ fi
+
+ dotnet nuget push "${{ env.PACKAGE_OUTPUT_DIR }}"/*.nupkg \
+ --source "${{ env.NUGET_SOURCE_NAME }}" \
+ --skip-duplicate
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0bbddec
--- /dev/null
+++ b/README.md
@@ -0,0 +1,65 @@
+# AIFotoONLUS Number Recognition Library
+
+This library provides a small, focused engine to detect and recognize numeric
+text (digits) in images using Darknet (YOLO) models via OpenCvSharp's DNN API.
+It is suitable for batch processing folders of images or individual files.
+
+Features
+- Detection network (Darknet/Yolo) to find candidate text regions.
+- Recognition network (Darknet/Yolo) to identify digits inside detected crops.
+- Single-file and directory-level processing APIs.
+- Parallel processing with per-thread network instances for throughput.
+- Diagnostic helpers to dump network output shapes and optionally save crop images.
+
+Basic usage
+1. Create a `ModelConfiguration` instance that points to your Darknet `.cfg`
+and `.weights` files for both detection and recognition networks, configure
+confidence and NMS thresholds and provide a list of number class labels.
+
+2. Create an instance of `NumberRecognitionEngine`:
+
+```csharp
+using var engine = new NumberRecognitionEngine(modelConfig, logger: null);
+```
+
+3. Process a single image:
+
+```csharp
+var result = engine.ProcessImage("/path/to/image.jpg");
+Console.WriteLine(result.Text);
+```
+
+4. Process a directory (parallelized):
+
+```csharp
+var results = await engine.ProcessDirectoryAsync("/path/to/images", recursive: false);
+foreach (var r in results) Console.WriteLine($"{r.FileName}: {r.Text}");
+```
+
+Configuration notes
+- `ModelConfiguration` controls model file paths, input sizes, thresholds and
+ whether to save cropped images for diagnostics. Make sure the paths are
+ accessible to the process and the model files match the expected network
+ architectures.
+
+- The engine expects detection network outputs in the YOLO-style layout:
+ `[cx, cy, w, h, objectness, class1, class2, ...]`.
+
+Threading & diagnostics
+- For directory/batch processing the engine creates per-thread Net instances
+ so OpenCV forward calls can run concurrently. It also contains fallback
+ logic that will perform processing with shared nets under a lock if needed.
+
+- When `EnableCropSaving` is enabled in configuration, each recognized crop is
+ saved to `logs/crops` with a timestamp and optional context label to aid
+ debugging false positives/negatives.
+
+Troubleshooting
+- If the engine returns no detections, verify the model files are correct and
+ compatible with the expected output layout. Use
+ `ProcessFileWithDiagnostics` to inspect output layer shapes.
+
+License & Notes
+This project is provided as-is. See repository for licensing information and
+for the model files distribution terms (models are usually not redistributed
+with code and must be obtained separately).
diff --git a/gitversion.json b/gitversion.json
new file mode 100644
index 0000000..6a77551
--- /dev/null
+++ b/gitversion.json
@@ -0,0 +1,27 @@
+{
+ "AssemblySemFileVer": "0.1.0.0",
+ "AssemblySemVer": "0.1.0.0",
+ "BranchName": "master",
+ "BuildMetaData": null,
+ "CommitDate": "2026-02-15",
+ "CommitsSinceVersionSource": 11,
+ "EscapedBranchName": "master",
+ "FullBuildMetaData": "Branch.master.Sha.a90da31e531332a4cf0bafe604f89d0e14f3395a",
+ "FullSemVer": "0.1.0-{BranchName}.11",
+ "InformationalVersion": "0.1.0-{BranchName}.11+Branch.master.Sha.a90da31e531332a4cf0bafe604f89d0e14f3395a",
+ "Major": 0,
+ "MajorMinorPatch": "0.1.0",
+ "Minor": 1,
+ "Patch": 0,
+ "PreReleaseLabel": "{BranchName}",
+ "PreReleaseLabelWithDash": "-{BranchName}",
+ "PreReleaseNumber": 11,
+ "PreReleaseTag": "{BranchName}.11",
+ "PreReleaseTagWithDash": "-{BranchName}.11",
+ "SemVer": "0.1.0-{BranchName}.11",
+ "Sha": "a90da31e531332a4cf0bafe604f89d0e14f3395a",
+ "ShortSha": "a90da31",
+ "UncommittedChanges": 7,
+ "VersionSourceSha": "",
+ "WeightedPreReleaseNumber": 11
+}
diff --git a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj
index c5f3f96..dab45a2 100644
--- a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj
+++ b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj
@@ -3,6 +3,10 @@
net10.0
enable
enable
+
+ true
+
+ $(OutputPath)$(AssemblyName).xml
@@ -10,7 +14,7 @@
Maddo
Maddo
Core library for AIFotoONLUS image processing and recognition.
- https://gitlab.com/MaddoScientisto/aifotoonlus
+ https://forgejo.maddoscientisto.net/maddo/AIFotoONLUS
0.1.0
diff --git a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml
new file mode 100644
index 0000000..4b6c8f2
--- /dev/null
+++ b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml
@@ -0,0 +1,331 @@
+
+
+
+ AIFotoONLUS.Core
+
+
+
+
+ Represents a detected text region produced by the detection network.
+
+ Bounding rectangle of the detection in image coordinates.
+ Combined confidence score for the detection (objectness * class probability).
+ Class index predicted by the network (index into ).
+ Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right.
+
+
+
+ Represents a detected text region produced by the detection network.
+
+ Bounding rectangle of the detection in image coordinates.
+ Combined confidence score for the detection (objectness * class probability).
+ Class index predicted by the network (index into ).
+ Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right.
+
+
+ Bounding rectangle of the detection in image coordinates.
+
+
+ Combined confidence score for the detection (objectness * class probability).
+
+
+ Class index predicted by the network (index into ).
+
+
+ Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right.
+
+
+
+ Represents the result of recognizing a single region: recognized text,
+ its bounding box and confidence.
+
+ Recognized text for the region (usually a sequence of digits).
+ Bounding rectangle of the recognition result.
+ Confidence score associated with the recognition.
+
+
+
+ Represents the result of recognizing a single region: recognized text,
+ its bounding box and confidence.
+
+ Recognized text for the region (usually a sequence of digits).
+ Bounding rectangle of the recognition result.
+ Confidence score associated with the recognition.
+
+
+ Recognized text for the region (usually a sequence of digits).
+
+
+ Bounding rectangle of the recognition result.
+
+
+ Confidence score associated with the recognition.
+
+
+
+ Aggregated result for a processed image.
+
+ Name of the image file.
+ Comma-separated recognized texts found in the image (may be empty).
+ Full path to the processed image file.
+
+
+
+ Aggregated result for a processed image.
+
+ Name of the image file.
+ Comma-separated recognized texts found in the image (may be empty).
+ Full path to the processed image file.
+
+
+ Name of the image file.
+
+
+ Comma-separated recognized texts found in the image (may be empty).
+
+
+ Full path to the processed image file.
+
+
+
+ Configuration options that control model file locations, input sizes
+ and runtime thresholds used by .
+
+
+
+
+ Path to the Darknet configuration (.cfg) file for the detection network.
+
+
+
+
+ Path to the Darknet weights (.weights) file for the detection network.
+
+
+
+
+ Path to the Darknet configuration (.cfg) file for the recognition network.
+
+
+
+
+ Path to the Darknet weights (.weights) file for the recognition network.
+
+
+
+
+ Confidence threshold used to filter out low-probability detections.
+
+
+
+
+ Non-maximum suppression (NMS) IoU threshold used to remove overlapping
+ detection boxes.
+
+
+
+
+ Input size used when preparing the blob for the detection network.
+
+
+
+
+ Input size used when preparing the blob for the recognition network.
+
+
+
+
+ Labels representing digit classes in the recognition model. The order
+ must match the class ordering used by the trained recognition network.
+
+
+
+
+ When enabled, request OpenCV DNN CUDA backend/target for inference.
+ The installed OpenCV runtime must have CUDA support or model loading/forwarding may fail.
+
+
+
+
+ When enabled, recognition crops will be saved to disk under
+ "logs/crops" for diagnostic inspection. Disabled by default.
+
+
+
+
+ Create a new instance of using the
+ provided . The constructor loads the
+ detection and recognition Darknet model files and prepares the OpenCV
+ DNN nets for CPU inference.
+
+ Model configuration containing file paths, thresholds
+ and other options. Must not be null.
+
+ This constructor will throw when
+ any of the expected model files are missing. For logging purposes an
+ overload accepting an is available.
+
+
+
+
+ Create a new instance of with an
+ optional . The logger will receive diagnostic
+ messages and errors produced by the engine during processing.
+
+ Model configuration containing file paths and
+ runtime thresholds.
+ Optional logger for diagnostic messages.
+ May be null.
+ Thrown when
+ is null.
+ Thrown when one of the model
+ files referenced by does not exist.
+
+
+
+ Detect text regions in the supplied image using the detection network.
+
+ Input image as an OpenCvSharp .
+ Must not be null.
+ An enumerable of containing the
+ bounding boxes, confidence and class information for each detected
+ region. The results are already filtered with the configured
+ confidence and NMS thresholds.
+
+
+
+ Recognize digits inside a cropped image region using the recognition
+ network. The method runs the recognition network and returns the
+ concatenated sequence of recognized digit labels ordered left-to-right.
+
+ Cropped image containing digits as
+ . Must not be null.
+ Optional context string used for diagnostics
+ (e.g. when saving crop image files).
+ A string containing recognized digits in left-to-right order.
+ Returns an empty string when no digits are recognized above the
+ configured confidence threshold.
+
+
+
+ Small DTO that describes the name and shape of a detection network
+ forward output used for diagnostics.
+
+ Layer/output name.
+ Number of rows in the output Mat.
+ Number of columns in the output Mat.
+
+
+
+ Small DTO that describes the name and shape of a detection network
+ forward output used for diagnostics.
+
+ Layer/output name.
+ Number of rows in the output Mat.
+ Number of columns in the output Mat.
+
+
+ Layer/output name.
+
+
+ Number of rows in the output Mat.
+
+
+ Number of columns in the output Mat.
+
+
+
+ Result returned by , contains
+ the recognized text result and an array describing detection network
+ forward outputs (shapes and names) which are useful for debugging
+ model output layout mismatches.
+
+ Recognition result for the processed image.
+ Array describing detection net outputs.
+
+
+
+ Result returned by , contains
+ the recognized text result and an array describing detection network
+ forward outputs (shapes and names) which are useful for debugging
+ model output layout mismatches.
+
+ Recognition result for the processed image.
+ Array describing detection net outputs.
+
+
+ Recognition result for the processed image.
+
+
+ Array describing detection net outputs.
+
+
+
+ Process a single image file and return the recognition result together
+ with detection network forward output shapes for diagnostics. This
+ method reads the image from disk, runs a forward pass over the
+ detection network to capture the raw output Mat shapes and then calls
+ the normal processing pipeline to return the recognized text.
+
+
+
+
+ Process a single image file and return the recognized text as an
+ . The method detects candidate text regions
+ and runs recognition on each crop. Multiple recognized digit sequences
+ are joined with a comma in the returned .
+
+ Path to an image file on disk. Supported
+ formats depend on OpenCV (typically JPEG, PNG, ...).
+ An containing the file name and
+ recognized text (possibly empty).
+
+
+
+ Process all JPEG images in a directory and return the recognition
+ results. This is a blocking wrapper over .
+
+ Path to a directory containing images.
+ If true, files whose names start with
+ "tn_" will be skipped (convention used to mark text-negative images).
+ Collection of ordered by file name.
+
+
+
+ Worker overload of that
+ accepts a instance. This is used by the parallel
+ processing pipeline where each worker owns its own Net instance.
+
+ Cropped region to recognize.
+ Recognition to execute
+ the forward pass with.
+ Optional context string for diagnostics.
+ Recognized digit sequence or empty string.
+
+
+
+ Progress statistics reported during directory processing.
+
+ Total number of image files to process.
+ Number of files processed so far.
+ Current processing throughput in images/second.
+
+
+
+ Progress statistics reported during directory processing.
+
+ Total number of image files to process.
+ Number of files processed so far.
+ Current processing throughput in images/second.
+
+
+ Total number of image files to process.
+
+
+ Number of files processed so far.
+
+
+ Current processing throughput in images/second.
+
+
+
diff --git a/src/AIFotoONLUS.Core/DetectedRegion.cs b/src/AIFotoONLUS.Core/DetectedRegion.cs
index 38fb2b7..7d90b6b 100644
--- a/src/AIFotoONLUS.Core/DetectedRegion.cs
+++ b/src/AIFotoONLUS.Core/DetectedRegion.cs
@@ -2,7 +2,29 @@ using OpenCvSharp;
namespace AIFotoONLUS.Core
{
+ ///
+ /// Represents a detected text region produced by the detection network.
+ ///
+ /// Bounding rectangle of the detection in image coordinates.
+ /// Combined confidence score for the detection (objectness * class probability).
+ /// Class index predicted by the network (index into ).
+ /// Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right.
public record DetectedRegion(Rect BoundingBox, float Confidence, int ClassId, double CenterX);
+
+ ///
+ /// Represents the result of recognizing a single region: recognized text,
+ /// its bounding box and confidence.
+ ///
+ /// Recognized text for the region (usually a sequence of digits).
+ /// Bounding rectangle of the recognition result.
+ /// Confidence score associated with the recognition.
public record RecognitionResult(string Text, Rect BoundingBox, double Confidence);
+
+ ///
+ /// Aggregated result for a processed image.
+ ///
+ /// Name of the image file.
+ /// Comma-separated recognized texts found in the image (may be empty).
+ /// Full path to the processed image file.
public record ImageResult(string FileName, string Text, string FilePath);
}
\ No newline at end of file
diff --git a/src/AIFotoONLUS.Core/ModelConfiguration.cs b/src/AIFotoONLUS.Core/ModelConfiguration.cs
index 4c14e7d..51d7ac7 100644
--- a/src/AIFotoONLUS.Core/ModelConfiguration.cs
+++ b/src/AIFotoONLUS.Core/ModelConfiguration.cs
@@ -2,21 +2,69 @@ using OpenCvSharp;
namespace AIFotoONLUS.Core
{
+ ///
+ /// Configuration options that control model file locations, input sizes
+ /// and runtime thresholds used by .
+ ///
public class ModelConfiguration
{
+ ///
+ /// Path to the Darknet configuration (.cfg) file for the detection network.
+ ///
public string DetectionCfg { get; set; } = "models/detection.cfg";
+
+ ///
+ /// Path to the Darknet weights (.weights) file for the detection network.
+ ///
public string DetectionWeights { get; set; } = "models/detection.weights";
+
+ ///
+ /// Path to the Darknet configuration (.cfg) file for the recognition network.
+ ///
public string RecognitionCfg { get; set; } = "models/recognition.cfg";
+
+ ///
+ /// Path to the Darknet weights (.weights) file for the recognition network.
+ ///
public string RecognitionWeights { get; set; } = "models/recognition.weights";
+ ///
+ /// Confidence threshold used to filter out low-probability detections.
+ ///
public double ConfidenceThreshold { get; set; } = 0.5;
+
+ ///
+ /// Non-maximum suppression (NMS) IoU threshold used to remove overlapping
+ /// detection boxes.
+ ///
public double NmsThreshold { get; set; } = 0.4;
+ ///
+ /// Input size used when preparing the blob for the detection network.
+ ///
public Size DetectionInputSize { get; set; } = new Size(416, 416);
+
+ ///
+ /// Input size used when preparing the blob for the recognition network.
+ ///
public Size RecognitionInputSize { get; set; } = new Size(140, 120);
+ ///
+ /// Labels representing digit classes in the recognition model. The order
+ /// must match the class ordering used by the trained recognition network.
+ ///
public string[] NumberClasses { get; set; } = new[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" };
- // When true, recognition crops will be saved to disk for diagnostics. Disabled by default.
+
+ ///
+ /// When enabled, request OpenCV DNN CUDA backend/target for inference.
+ /// The installed OpenCV runtime must have CUDA support or model loading/forwarding may fail.
+ ///
+ public bool UseGpu { get; set; } = false;
+
+ ///
+ /// When enabled, recognition crops will be saved to disk under
+ /// "logs/crops" for diagnostic inspection. Disabled by default.
+ ///
public bool EnableCropSaving { get; set; } = false;
}
}
\ No newline at end of file
diff --git a/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs b/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs
index 1c1a6ca..d6b2d25 100644
--- a/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs
+++ b/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs
@@ -12,8 +12,32 @@ using System.Threading.Tasks;
namespace AIFotoONLUS.Core
{
///
- /// NumberRecognitionEngine: loads Darknet models via OpenCvSharp and
- /// provides methods to detect text regions and recognize digits.
+ /// NumberRecognitionEngine is a high-level wrapper that loads Darknet (YOLO)
+ /// models through OpenCvSharp's DNN API and exposes simple synchronous and
+ /// asynchronous methods to detect numeric text regions in images and recognize
+ /// the digits contained within those regions.
+ ///
+ /// Overview
+ /// - Loads two Darknet networks: a detection network (finds text regions)
+ /// and a recognition network (recognizes digits inside a cropped region).
+ /// - Uses OpenCvSharp (CvDnn) to create input blobs, run forward passes and
+ /// perform non‑maximum suppression (NMS) on detection candidates.
+ /// - Provides single-image and directory-level processing APIs. Directory
+ /// processing supports parallel workers where each worker uses its own
+ /// per-thread Net instances to allow concurrent forward calls.
+ ///
+ /// Threading and performance notes
+ /// - The class constructs and owns two shared Net instances used by the
+ /// simple (single-threaded) APIs. When doing parallel processing the
+ /// implementation creates per-thread Net instances to avoid concurrent
+ /// calls into the same Net object. A small fallback path exists that will
+ /// call into the shared nets under a lock when needed.
+ /// - OpenCV internal threading is enabled (Cv2.SetNumThreads) when supported.
+ ///
+ /// Diagnostics
+ /// - When enabled via the configuration, crops may be saved to disk for
+ /// debugging. The contains thresholds and
+ /// paths used by the engine.
///
using Microsoft.Extensions.Logging;
@@ -27,11 +51,37 @@ namespace AIFotoONLUS.Core
private readonly ILogger? _logger;
private bool _disposed;
+ ///
+ /// Create a new instance of using the
+ /// provided . The constructor loads the
+ /// detection and recognition Darknet model files and prepares the OpenCV
+ /// DNN nets for CPU inference.
+ ///
+ /// Model configuration containing file paths, thresholds
+ /// and other options. Must not be null.
+ ///
+ /// This constructor will throw when
+ /// any of the expected model files are missing. For logging purposes an
+ /// overload accepting an is available.
+ ///
public NumberRecognitionEngine(ModelConfiguration cfg)
: this(cfg, logger: null)
{
}
+ ///
+ /// Create a new instance of with an
+ /// optional . The logger will receive diagnostic
+ /// messages and errors produced by the engine during processing.
+ ///
+ /// Model configuration containing file paths and
+ /// runtime thresholds.
+ /// Optional logger for diagnostic messages.
+ /// May be null.
+ /// Thrown when
+ /// is null.
+ /// Thrown when one of the model
+ /// files referenced by does not exist.
public NumberRecognitionEngine(ModelConfiguration cfg, ILogger? logger)
{
_logger = logger;
@@ -45,10 +95,8 @@ namespace AIFotoONLUS.Core
_detectionNet = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights);
_recognitionNet = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
- _detectionNet.SetPreferableBackend(Backend.OPENCV);
- _detectionNet.SetPreferableTarget(Target.CPU);
- _recognitionNet.SetPreferableBackend(Backend.OPENCV);
- _recognitionNet.SetPreferableTarget(Target.CPU);
+ ConfigureNetRuntime(_detectionNet, _cfg.UseGpu);
+ ConfigureNetRuntime(_recognitionNet, _cfg.UseGpu);
// Let OpenCV use multiple threads internally (use number of logical processors)
try
{
@@ -58,6 +106,11 @@ namespace AIFotoONLUS.Core
{
// Ignore if not supported by OpenCvSharp build
}
+
+ if (_cfg.UseGpu)
+ {
+ ValidateGpuRuntime();
+ }
}
public void Dispose()
@@ -69,6 +122,38 @@ namespace AIFotoONLUS.Core
GC.SuppressFinalize(this);
}
+ public static bool TryValidateGpuRuntime(ModelConfiguration cfg, ILogger? logger, out string? failureMessage)
+ {
+ if (cfg is null) throw new ArgumentNullException(nameof(cfg));
+
+ var probeConfiguration = new ModelConfiguration
+ {
+ DetectionCfg = cfg.DetectionCfg,
+ DetectionWeights = cfg.DetectionWeights,
+ RecognitionCfg = cfg.RecognitionCfg,
+ RecognitionWeights = cfg.RecognitionWeights,
+ ConfidenceThreshold = cfg.ConfidenceThreshold,
+ NmsThreshold = cfg.NmsThreshold,
+ DetectionInputSize = cfg.DetectionInputSize,
+ RecognitionInputSize = cfg.RecognitionInputSize,
+ NumberClasses = cfg.NumberClasses,
+ EnableCropSaving = cfg.EnableCropSaving,
+ UseGpu = true
+ };
+
+ try
+ {
+ using var engine = new NumberRecognitionEngine(probeConfiguration, logger);
+ failureMessage = null;
+ return true;
+ }
+ catch (Exception ex)
+ {
+ failureMessage = ex.GetBaseException().Message;
+ return false;
+ }
+ }
+
private static string SanitizeFileName(string name)
{
foreach (var c in Path.GetInvalidFileNameChars()) name = name.Replace(c, '_');
@@ -77,6 +162,48 @@ namespace AIFotoONLUS.Core
private string[] GetOutputLayerNames(Net net) => net.GetUnconnectedOutLayersNames();
+ private static void ConfigureNetRuntime(Net net, bool useGpu)
+ {
+ if (useGpu)
+ {
+ net.SetPreferableBackend(Backend.CUDA);
+ net.SetPreferableTarget(Target.CUDA);
+ return;
+ }
+
+ net.SetPreferableBackend(Backend.OPENCV);
+ net.SetPreferableTarget(Target.CPU);
+ }
+
+ private void ValidateGpuRuntime()
+ {
+ try
+ {
+ using var detectionProbe = new Mat(_cfg.DetectionInputSize.Height, _cfg.DetectionInputSize.Width, MatType.CV_8UC3, Scalar.All(0));
+ _ = DetectTextRegions(_detectionNet, detectionProbe).Take(1).ToArray();
+
+ using var recognitionProbe = new Mat(_cfg.RecognitionInputSize.Height, _cfg.RecognitionInputSize.Width, MatType.CV_8UC3, Scalar.All(0));
+ using var blob = CvDnn.BlobFromImage(recognitionProbe, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false);
+ _recognitionNet.SetInput(blob);
+ using var output = _recognitionNet.Forward();
+ }
+ catch (Exception ex)
+ {
+ throw new InvalidOperationException(
+ "OpenCV DNN CUDA runtime validation failed. Disable number AI GPU mode or use an OpenCV runtime built with CUDA DNN support.",
+ ex);
+ }
+ }
+
+ ///
+ /// Detect text regions in the supplied image using the detection network.
+ ///
+ /// Input image as an OpenCvSharp .
+ /// Must not be null.
+ /// An enumerable of containing the
+ /// bounding boxes, confidence and class information for each detected
+ /// region. The results are already filtered with the configured
+ /// confidence and NMS thresholds.
public IEnumerable DetectTextRegions(Mat image)
{
if (image is null) throw new ArgumentNullException(nameof(image));
@@ -93,7 +220,7 @@ namespace AIFotoONLUS.Core
var outNames = GetOutputLayerNames(detectionNet);
var outsList = new List();
detectionNet.Forward(outsList, outNames);
-
+
Mat[] outs = outsList.ToArray();
if (outs.Length == 0)
{
@@ -103,15 +230,15 @@ namespace AIFotoONLUS.Core
var fallback = new List();
for (int on = 0; on < outNames.Length; on++)
{
- try
- {
- var single = detectionNet.Forward(outNames[on]);
- fallback.Add(single);
- }
- catch (Exception ex)
- {
- _logger?.LogError(ex, "Fallback Forward failed for {name}", outNames[on]);
- }
+ try
+ {
+ var single = detectionNet.Forward(outNames[on]);
+ fallback.Add(single);
+ }
+ catch (Exception ex)
+ {
+ _logger?.LogError(ex, "Fallback Forward failed for {name}", outNames[on]);
+ }
}
if (fallback.Count > 0)
{
@@ -162,21 +289,21 @@ namespace AIFotoONLUS.Core
}
if (maxScore > _cfg.ConfidenceThreshold)
- {
- int x = (int)Math.Max(0, Math.Round(cx - w / 2));
- int y = (int)Math.Max(0, Math.Round(cy - h / 2));
- var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h));
- boxes.Add(rect);
- confidences.Add(maxScore);
- classIds.Add(bestClass);
- centerXList.Add(cx);
- }
+ {
+ int x = (int)Math.Max(0, Math.Round(cx - w / 2));
+ int y = (int)Math.Max(0, Math.Round(cy - h / 2));
+ var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h));
+ boxes.Add(rect);
+ confidences.Add(maxScore);
+ classIds.Add(bestClass);
+ centerXList.Add(cx);
+ }
}
}
if (boxes.Count == 0) return Enumerable.Empty();
-
+
CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] indices);
@@ -190,6 +317,18 @@ namespace AIFotoONLUS.Core
return results;
}
+ ///
+ /// Recognize digits inside a cropped image region using the recognition
+ /// network. The method runs the recognition network and returns the
+ /// concatenated sequence of recognized digit labels ordered left-to-right.
+ ///
+ /// Cropped image containing digits as
+ /// . Must not be null.
+ /// Optional context string used for diagnostics
+ /// (e.g. when saving crop image files).
+ /// A string containing recognized digits in left-to-right order.
+ /// Returns an empty string when no digits are recognized above the
+ /// configured confidence threshold.
public string RecognizeDigits(Mat croppedImage, string? context = null)
{
if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage));
@@ -287,12 +426,31 @@ namespace AIFotoONLUS.Core
return string.Concat(ordered);
}
+ ///
+ /// Small DTO that describes the name and shape of a detection network
+ /// forward output used for diagnostics.
+ ///
+ /// Layer/output name.
+ /// Number of rows in the output Mat.
+ /// Number of columns in the output Mat.
public record DetectionOutput(string Name, int Rows, int Cols);
+
+ ///
+ /// Result returned by , contains
+ /// the recognized text result and an array describing detection network
+ /// forward outputs (shapes and names) which are useful for debugging
+ /// model output layout mismatches.
+ ///
+ /// Recognition result for the processed image.
+ /// Array describing detection net outputs.
public record DiagnosticResult(ImageResult Result, DetectionOutput[] DetectionOutputs);
///
- /// Process a single image file and return the recognition result together with
- /// detection network forward output shapes for diagnostics.
+ /// Process a single image file and return the recognition result together
+ /// with detection network forward output shapes for diagnostics. This
+ /// method reads the image from disk, runs a forward pass over the
+ /// detection network to capture the raw output Mat shapes and then calls
+ /// the normal processing pipeline to return the recognized text.
///
public DiagnosticResult ProcessFileWithDiagnostics(string filePath)
{
@@ -330,6 +488,16 @@ namespace AIFotoONLUS.Core
return new DiagnosticResult(imgRes, outputs);
}
+ ///
+ /// Process a single image file and return the recognized text as an
+ /// . The method detects candidate text regions
+ /// and runs recognition on each crop. Multiple recognized digit sequences
+ /// are joined with a comma in the returned .
+ ///
+ /// Path to an image file on disk. Supported
+ /// formats depend on OpenCV (typically JPEG, PNG, ...).
+ /// An containing the file name and
+ /// recognized text (possibly empty).
public ImageResult ProcessImage(string filePath)
{
if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath);
@@ -351,6 +519,14 @@ namespace AIFotoONLUS.Core
return result;
}
+ ///
+ /// Process all JPEG images in a directory and return the recognition
+ /// results. This is a blocking wrapper over .
+ ///
+ /// Path to a directory containing images.
+ /// If true, files whose names start with
+ /// "tn_" will be skipped (convention used to mark text-negative images).
+ /// Collection of ordered by file name.
public IEnumerable ProcessDirectory(string directoryPath, bool skipTextNegative = false)
{
// Simple wrapper over async implementation
@@ -378,10 +554,8 @@ namespace AIFotoONLUS.Core
{
var det = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights);
var rec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
- det.SetPreferableBackend(Backend.OPENCV);
- det.SetPreferableTarget(Target.CPU);
- rec.SetPreferableBackend(Backend.OPENCV);
- rec.SetPreferableTarget(Target.CPU);
+ ConfigureNetRuntime(det, _cfg.UseGpu);
+ ConfigureNetRuntime(rec, _cfg.UseGpu);
netsBag.Add((det, rec));
return (det, rec);
});
@@ -417,8 +591,7 @@ namespace AIFotoONLUS.Core
try
{
using var tempRec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
- tempRec.SetPreferableBackend(Backend.OPENCV);
- tempRec.SetPreferableTarget(Target.CPU);
+ ConfigureNetRuntime(tempRec, _cfg.UseGpu);
var alt = RecognizeDigits(crop, tempRec, ctx);
if (!string.IsNullOrEmpty(alt)) txt = alt;
}
@@ -504,6 +677,16 @@ namespace AIFotoONLUS.Core
}
// Overload RecognizeDigits that accepts a Net for worker threads
+ ///
+ /// Worker overload of that
+ /// accepts a instance. This is used by the parallel
+ /// processing pipeline where each worker owns its own Net instance.
+ ///
+ /// Cropped region to recognize.
+ /// Recognition to execute
+ /// the forward pass with.
+ /// Optional context string for diagnostics.
+ /// Recognized digit sequence or empty string.
private string RecognizeDigits(Mat croppedImage, Net recognitionNet, string? context = null)
{
if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage));
diff --git a/src/AIFotoONLUS.Core/ProcessingStats.cs b/src/AIFotoONLUS.Core/ProcessingStats.cs
index fe5aba3..9d0de10 100644
--- a/src/AIFotoONLUS.Core/ProcessingStats.cs
+++ b/src/AIFotoONLUS.Core/ProcessingStats.cs
@@ -1,4 +1,10 @@
namespace AIFotoONLUS.Core
{
+ ///
+ /// Progress statistics reported during directory processing.
+ ///
+ /// Total number of image files to process.
+ /// Number of files processed so far.
+ /// Current processing throughput in images/second.
public record ProcessingStats(int TotalFiles, int ProcessedFiles, double ImagesPerSecond);
}