diff --git a/.forgejo/workflows/publish-aifotoonlus-core.yml b/.forgejo/workflows/publish-aifotoonlus-core.yml deleted file mode 100644 index 04a940d..0000000 --- a/.forgejo/workflows/publish-aifotoonlus-core.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: Build And Publish AIFotoONLUS.Core - -on: - push: - branches: - - master - - develop - tags: - - '*' - workflow_dispatch: - -env: - DOTNET_VERSION: 10.0.x - PROJECT_PATH: src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj - PACKAGE_OUTPUT_DIR: artifacts/nuget - PACKAGE_ARTIFACT_NAME: aifotoonlus-core-nuget - NUGET_SOURCE_NAME: forgejo-aifotoonlus - NUGET_SOURCE_URL: ${{ vars.AIFOTOONLUS_NUGET_SOURCE_URL || format('{0}/api/packages/{1}/nuget/index.json', github.server_url, vars.AIFOTOONLUS_PACKAGE_OWNER || github.repository_owner) }} - -jobs: - build: - runs-on: docker - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Setup .NET - uses: actions/setup-dotnet@v4 - with: - dotnet-version: ${{ env.DOTNET_VERSION }} - - - name: Restore - run: dotnet restore "${{ env.PROJECT_PATH }}" - - - name: Build - run: dotnet build "${{ env.PROJECT_PATH }}" --configuration Release --no-restore /p:GeneratePackageOnBuild=false - - - name: Pack - shell: bash - run: | - set -eu - mkdir -p "${{ env.PACKAGE_OUTPUT_DIR }}" - - if [[ "${GITHUB_REF}" == refs/tags/* ]]; then - package_version="${GITHUB_REF_NAME#v}" - echo "Packing tag version ${package_version}" - dotnet pack "${{ env.PROJECT_PATH }}" \ - --configuration Release \ - --output "${{ env.PACKAGE_OUTPUT_DIR }}" \ - --no-build \ - /p:PackageVersion="${package_version}" - else - echo "Packing with project version or MinVer-derived version" - dotnet pack "${{ env.PROJECT_PATH }}" \ - --configuration Release \ - --output "${{ env.PACKAGE_OUTPUT_DIR }}" \ - --no-build - fi - - - name: Upload package artifact - uses: actions/upload-artifact@v3 - with: - name: ${{ env.PACKAGE_ARTIFACT_NAME }} - path: ${{ env.PACKAGE_OUTPUT_DIR }}/*.nupkg - if-no-files-found: error - - publish: - if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' - needs: build - runs-on: docker - env: - FORGEJO_PACKAGE_USERNAME: ${{ secrets.FORGEJO_PACKAGE_USERNAME }} - FORGEJO_PACKAGE_TOKEN: ${{ secrets.FORGEJO_PACKAGE_TOKEN }} - - steps: - - name: Setup .NET - uses: actions/setup-dotnet@v4 - with: - dotnet-version: ${{ env.DOTNET_VERSION }} - - - name: Download package artifact - uses: actions/download-artifact@v3 - with: - name: ${{ env.PACKAGE_ARTIFACT_NAME }} - path: ${{ env.PACKAGE_OUTPUT_DIR }} - - - name: Validate publish secrets - shell: bash - run: | - set -eu - if [ -z "${FORGEJO_PACKAGE_USERNAME}" ]; then - echo "secrets.FORGEJO_PACKAGE_USERNAME is required" - exit 1 - fi - if [ -z "${FORGEJO_PACKAGE_TOKEN}" ]; then - echo "secrets.FORGEJO_PACKAGE_TOKEN is required" - exit 1 - fi - - - name: Configure Forgejo NuGet source - run: | - dotnet nuget add source "${{ env.NUGET_SOURCE_URL }}" \ - --name "${{ env.NUGET_SOURCE_NAME }}" \ - --username "${FORGEJO_PACKAGE_USERNAME}" \ - --password "${FORGEJO_PACKAGE_TOKEN}" \ - --store-password-in-clear-text - - - name: Publish package to Forgejo NuGet - shell: bash - run: | - set -eu - shopt -s nullglob - packages=("${{ env.PACKAGE_OUTPUT_DIR }}"/*.nupkg) - if [ "${#packages[@]}" -eq 0 ]; then - echo "No NuGet packages found in ${{ env.PACKAGE_OUTPUT_DIR }}" - exit 1 - fi - - dotnet nuget push "${{ env.PACKAGE_OUTPUT_DIR }}"/*.nupkg \ - --source "${{ env.NUGET_SOURCE_NAME }}" \ - --skip-duplicate \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 0bbddec..0000000 --- a/README.md +++ /dev/null @@ -1,65 +0,0 @@ -# AIFotoONLUS Number Recognition Library - -This library provides a small, focused engine to detect and recognize numeric -text (digits) in images using Darknet (YOLO) models via OpenCvSharp's DNN API. -It is suitable for batch processing folders of images or individual files. - -Features -- Detection network (Darknet/Yolo) to find candidate text regions. -- Recognition network (Darknet/Yolo) to identify digits inside detected crops. -- Single-file and directory-level processing APIs. -- Parallel processing with per-thread network instances for throughput. -- Diagnostic helpers to dump network output shapes and optionally save crop images. - -Basic usage -1. Create a `ModelConfiguration` instance that points to your Darknet `.cfg` -and `.weights` files for both detection and recognition networks, configure -confidence and NMS thresholds and provide a list of number class labels. - -2. Create an instance of `NumberRecognitionEngine`: - -```csharp -using var engine = new NumberRecognitionEngine(modelConfig, logger: null); -``` - -3. Process a single image: - -```csharp -var result = engine.ProcessImage("/path/to/image.jpg"); -Console.WriteLine(result.Text); -``` - -4. Process a directory (parallelized): - -```csharp -var results = await engine.ProcessDirectoryAsync("/path/to/images", recursive: false); -foreach (var r in results) Console.WriteLine($"{r.FileName}: {r.Text}"); -``` - -Configuration notes -- `ModelConfiguration` controls model file paths, input sizes, thresholds and - whether to save cropped images for diagnostics. Make sure the paths are - accessible to the process and the model files match the expected network - architectures. - -- The engine expects detection network outputs in the YOLO-style layout: - `[cx, cy, w, h, objectness, class1, class2, ...]`. - -Threading & diagnostics -- For directory/batch processing the engine creates per-thread Net instances - so OpenCV forward calls can run concurrently. It also contains fallback - logic that will perform processing with shared nets under a lock if needed. - -- When `EnableCropSaving` is enabled in configuration, each recognized crop is - saved to `logs/crops` with a timestamp and optional context label to aid - debugging false positives/negatives. - -Troubleshooting -- If the engine returns no detections, verify the model files are correct and - compatible with the expected output layout. Use - `ProcessFileWithDiagnostics` to inspect output layer shapes. - -License & Notes -This project is provided as-is. See repository for licensing information and -for the model files distribution terms (models are usually not redistributed -with code and must be obtained separately). diff --git a/gitversion.json b/gitversion.json deleted file mode 100644 index 6a77551..0000000 --- a/gitversion.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "AssemblySemFileVer": "0.1.0.0", - "AssemblySemVer": "0.1.0.0", - "BranchName": "master", - "BuildMetaData": null, - "CommitDate": "2026-02-15", - "CommitsSinceVersionSource": 11, - "EscapedBranchName": "master", - "FullBuildMetaData": "Branch.master.Sha.a90da31e531332a4cf0bafe604f89d0e14f3395a", - "FullSemVer": "0.1.0-{BranchName}.11", - "InformationalVersion": "0.1.0-{BranchName}.11+Branch.master.Sha.a90da31e531332a4cf0bafe604f89d0e14f3395a", - "Major": 0, - "MajorMinorPatch": "0.1.0", - "Minor": 1, - "Patch": 0, - "PreReleaseLabel": "{BranchName}", - "PreReleaseLabelWithDash": "-{BranchName}", - "PreReleaseNumber": 11, - "PreReleaseTag": "{BranchName}.11", - "PreReleaseTagWithDash": "-{BranchName}.11", - "SemVer": "0.1.0-{BranchName}.11", - "Sha": "a90da31e531332a4cf0bafe604f89d0e14f3395a", - "ShortSha": "a90da31", - "UncommittedChanges": 7, - "VersionSourceSha": "", - "WeightedPreReleaseNumber": 11 -} diff --git a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj index dab45a2..c5f3f96 100644 --- a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj +++ b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj @@ -3,10 +3,6 @@ net10.0 enable enable - - true - - $(OutputPath)$(AssemblyName).xml @@ -14,7 +10,7 @@ Maddo Maddo Core library for AIFotoONLUS image processing and recognition. - https://forgejo.maddoscientisto.net/maddo/AIFotoONLUS + https://gitlab.com/MaddoScientisto/aifotoonlus 0.1.0 diff --git a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml b/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml deleted file mode 100644 index 4b6c8f2..0000000 --- a/src/AIFotoONLUS.Core/AIFotoONLUS.Core.xml +++ /dev/null @@ -1,331 +0,0 @@ - - - - AIFotoONLUS.Core - - - - - Represents a detected text region produced by the detection network. - - Bounding rectangle of the detection in image coordinates. - Combined confidence score for the detection (objectness * class probability). - Class index predicted by the network (index into ). - Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. - - - - Represents a detected text region produced by the detection network. - - Bounding rectangle of the detection in image coordinates. - Combined confidence score for the detection (objectness * class probability). - Class index predicted by the network (index into ). - Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. - - - Bounding rectangle of the detection in image coordinates. - - - Combined confidence score for the detection (objectness * class probability). - - - Class index predicted by the network (index into ). - - - Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. - - - - Represents the result of recognizing a single region: recognized text, - its bounding box and confidence. - - Recognized text for the region (usually a sequence of digits). - Bounding rectangle of the recognition result. - Confidence score associated with the recognition. - - - - Represents the result of recognizing a single region: recognized text, - its bounding box and confidence. - - Recognized text for the region (usually a sequence of digits). - Bounding rectangle of the recognition result. - Confidence score associated with the recognition. - - - Recognized text for the region (usually a sequence of digits). - - - Bounding rectangle of the recognition result. - - - Confidence score associated with the recognition. - - - - Aggregated result for a processed image. - - Name of the image file. - Comma-separated recognized texts found in the image (may be empty). - Full path to the processed image file. - - - - Aggregated result for a processed image. - - Name of the image file. - Comma-separated recognized texts found in the image (may be empty). - Full path to the processed image file. - - - Name of the image file. - - - Comma-separated recognized texts found in the image (may be empty). - - - Full path to the processed image file. - - - - Configuration options that control model file locations, input sizes - and runtime thresholds used by . - - - - - Path to the Darknet configuration (.cfg) file for the detection network. - - - - - Path to the Darknet weights (.weights) file for the detection network. - - - - - Path to the Darknet configuration (.cfg) file for the recognition network. - - - - - Path to the Darknet weights (.weights) file for the recognition network. - - - - - Confidence threshold used to filter out low-probability detections. - - - - - Non-maximum suppression (NMS) IoU threshold used to remove overlapping - detection boxes. - - - - - Input size used when preparing the blob for the detection network. - - - - - Input size used when preparing the blob for the recognition network. - - - - - Labels representing digit classes in the recognition model. The order - must match the class ordering used by the trained recognition network. - - - - - When enabled, request OpenCV DNN CUDA backend/target for inference. - The installed OpenCV runtime must have CUDA support or model loading/forwarding may fail. - - - - - When enabled, recognition crops will be saved to disk under - "logs/crops" for diagnostic inspection. Disabled by default. - - - - - Create a new instance of using the - provided . The constructor loads the - detection and recognition Darknet model files and prepares the OpenCV - DNN nets for CPU inference. - - Model configuration containing file paths, thresholds - and other options. Must not be null. - - This constructor will throw when - any of the expected model files are missing. For logging purposes an - overload accepting an is available. - - - - - Create a new instance of with an - optional . The logger will receive diagnostic - messages and errors produced by the engine during processing. - - Model configuration containing file paths and - runtime thresholds. - Optional logger for diagnostic messages. - May be null. - Thrown when - is null. - Thrown when one of the model - files referenced by does not exist. - - - - Detect text regions in the supplied image using the detection network. - - Input image as an OpenCvSharp . - Must not be null. - An enumerable of containing the - bounding boxes, confidence and class information for each detected - region. The results are already filtered with the configured - confidence and NMS thresholds. - - - - Recognize digits inside a cropped image region using the recognition - network. The method runs the recognition network and returns the - concatenated sequence of recognized digit labels ordered left-to-right. - - Cropped image containing digits as - . Must not be null. - Optional context string used for diagnostics - (e.g. when saving crop image files). - A string containing recognized digits in left-to-right order. - Returns an empty string when no digits are recognized above the - configured confidence threshold. - - - - Small DTO that describes the name and shape of a detection network - forward output used for diagnostics. - - Layer/output name. - Number of rows in the output Mat. - Number of columns in the output Mat. - - - - Small DTO that describes the name and shape of a detection network - forward output used for diagnostics. - - Layer/output name. - Number of rows in the output Mat. - Number of columns in the output Mat. - - - Layer/output name. - - - Number of rows in the output Mat. - - - Number of columns in the output Mat. - - - - Result returned by , contains - the recognized text result and an array describing detection network - forward outputs (shapes and names) which are useful for debugging - model output layout mismatches. - - Recognition result for the processed image. - Array describing detection net outputs. - - - - Result returned by , contains - the recognized text result and an array describing detection network - forward outputs (shapes and names) which are useful for debugging - model output layout mismatches. - - Recognition result for the processed image. - Array describing detection net outputs. - - - Recognition result for the processed image. - - - Array describing detection net outputs. - - - - Process a single image file and return the recognition result together - with detection network forward output shapes for diagnostics. This - method reads the image from disk, runs a forward pass over the - detection network to capture the raw output Mat shapes and then calls - the normal processing pipeline to return the recognized text. - - - - - Process a single image file and return the recognized text as an - . The method detects candidate text regions - and runs recognition on each crop. Multiple recognized digit sequences - are joined with a comma in the returned . - - Path to an image file on disk. Supported - formats depend on OpenCV (typically JPEG, PNG, ...). - An containing the file name and - recognized text (possibly empty). - - - - Process all JPEG images in a directory and return the recognition - results. This is a blocking wrapper over . - - Path to a directory containing images. - If true, files whose names start with - "tn_" will be skipped (convention used to mark text-negative images). - Collection of ordered by file name. - - - - Worker overload of that - accepts a instance. This is used by the parallel - processing pipeline where each worker owns its own Net instance. - - Cropped region to recognize. - Recognition to execute - the forward pass with. - Optional context string for diagnostics. - Recognized digit sequence or empty string. - - - - Progress statistics reported during directory processing. - - Total number of image files to process. - Number of files processed so far. - Current processing throughput in images/second. - - - - Progress statistics reported during directory processing. - - Total number of image files to process. - Number of files processed so far. - Current processing throughput in images/second. - - - Total number of image files to process. - - - Number of files processed so far. - - - Current processing throughput in images/second. - - - diff --git a/src/AIFotoONLUS.Core/DetectedRegion.cs b/src/AIFotoONLUS.Core/DetectedRegion.cs index 7d90b6b..38fb2b7 100644 --- a/src/AIFotoONLUS.Core/DetectedRegion.cs +++ b/src/AIFotoONLUS.Core/DetectedRegion.cs @@ -2,29 +2,7 @@ using OpenCvSharp; namespace AIFotoONLUS.Core { - /// - /// Represents a detected text region produced by the detection network. - /// - /// Bounding rectangle of the detection in image coordinates. - /// Combined confidence score for the detection (objectness * class probability). - /// Class index predicted by the network (index into ). - /// Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right. public record DetectedRegion(Rect BoundingBox, float Confidence, int ClassId, double CenterX); - - /// - /// Represents the result of recognizing a single region: recognized text, - /// its bounding box and confidence. - /// - /// Recognized text for the region (usually a sequence of digits). - /// Bounding rectangle of the recognition result. - /// Confidence score associated with the recognition. public record RecognitionResult(string Text, Rect BoundingBox, double Confidence); - - /// - /// Aggregated result for a processed image. - /// - /// Name of the image file. - /// Comma-separated recognized texts found in the image (may be empty). - /// Full path to the processed image file. public record ImageResult(string FileName, string Text, string FilePath); } \ No newline at end of file diff --git a/src/AIFotoONLUS.Core/ModelConfiguration.cs b/src/AIFotoONLUS.Core/ModelConfiguration.cs index 51d7ac7..4c14e7d 100644 --- a/src/AIFotoONLUS.Core/ModelConfiguration.cs +++ b/src/AIFotoONLUS.Core/ModelConfiguration.cs @@ -2,69 +2,21 @@ using OpenCvSharp; namespace AIFotoONLUS.Core { - /// - /// Configuration options that control model file locations, input sizes - /// and runtime thresholds used by . - /// public class ModelConfiguration { - /// - /// Path to the Darknet configuration (.cfg) file for the detection network. - /// public string DetectionCfg { get; set; } = "models/detection.cfg"; - - /// - /// Path to the Darknet weights (.weights) file for the detection network. - /// public string DetectionWeights { get; set; } = "models/detection.weights"; - - /// - /// Path to the Darknet configuration (.cfg) file for the recognition network. - /// public string RecognitionCfg { get; set; } = "models/recognition.cfg"; - - /// - /// Path to the Darknet weights (.weights) file for the recognition network. - /// public string RecognitionWeights { get; set; } = "models/recognition.weights"; - /// - /// Confidence threshold used to filter out low-probability detections. - /// public double ConfidenceThreshold { get; set; } = 0.5; - - /// - /// Non-maximum suppression (NMS) IoU threshold used to remove overlapping - /// detection boxes. - /// public double NmsThreshold { get; set; } = 0.4; - /// - /// Input size used when preparing the blob for the detection network. - /// public Size DetectionInputSize { get; set; } = new Size(416, 416); - - /// - /// Input size used when preparing the blob for the recognition network. - /// public Size RecognitionInputSize { get; set; } = new Size(140, 120); - /// - /// Labels representing digit classes in the recognition model. The order - /// must match the class ordering used by the trained recognition network. - /// public string[] NumberClasses { get; set; } = new[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }; - - /// - /// When enabled, request OpenCV DNN CUDA backend/target for inference. - /// The installed OpenCV runtime must have CUDA support or model loading/forwarding may fail. - /// - public bool UseGpu { get; set; } = false; - - /// - /// When enabled, recognition crops will be saved to disk under - /// "logs/crops" for diagnostic inspection. Disabled by default. - /// + // When true, recognition crops will be saved to disk for diagnostics. Disabled by default. public bool EnableCropSaving { get; set; } = false; } } \ No newline at end of file diff --git a/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs b/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs index d6b2d25..1c1a6ca 100644 --- a/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs +++ b/src/AIFotoONLUS.Core/NumberRecognitionEngine.cs @@ -12,32 +12,8 @@ using System.Threading.Tasks; namespace AIFotoONLUS.Core { /// - /// NumberRecognitionEngine is a high-level wrapper that loads Darknet (YOLO) - /// models through OpenCvSharp's DNN API and exposes simple synchronous and - /// asynchronous methods to detect numeric text regions in images and recognize - /// the digits contained within those regions. - /// - /// Overview - /// - Loads two Darknet networks: a detection network (finds text regions) - /// and a recognition network (recognizes digits inside a cropped region). - /// - Uses OpenCvSharp (CvDnn) to create input blobs, run forward passes and - /// perform non‑maximum suppression (NMS) on detection candidates. - /// - Provides single-image and directory-level processing APIs. Directory - /// processing supports parallel workers where each worker uses its own - /// per-thread Net instances to allow concurrent forward calls. - /// - /// Threading and performance notes - /// - The class constructs and owns two shared Net instances used by the - /// simple (single-threaded) APIs. When doing parallel processing the - /// implementation creates per-thread Net instances to avoid concurrent - /// calls into the same Net object. A small fallback path exists that will - /// call into the shared nets under a lock when needed. - /// - OpenCV internal threading is enabled (Cv2.SetNumThreads) when supported. - /// - /// Diagnostics - /// - When enabled via the configuration, crops may be saved to disk for - /// debugging. The contains thresholds and - /// paths used by the engine. + /// NumberRecognitionEngine: loads Darknet models via OpenCvSharp and + /// provides methods to detect text regions and recognize digits. /// using Microsoft.Extensions.Logging; @@ -51,37 +27,11 @@ namespace AIFotoONLUS.Core private readonly ILogger? _logger; private bool _disposed; - /// - /// Create a new instance of using the - /// provided . The constructor loads the - /// detection and recognition Darknet model files and prepares the OpenCV - /// DNN nets for CPU inference. - /// - /// Model configuration containing file paths, thresholds - /// and other options. Must not be null. - /// - /// This constructor will throw when - /// any of the expected model files are missing. For logging purposes an - /// overload accepting an is available. - /// public NumberRecognitionEngine(ModelConfiguration cfg) : this(cfg, logger: null) { } - /// - /// Create a new instance of with an - /// optional . The logger will receive diagnostic - /// messages and errors produced by the engine during processing. - /// - /// Model configuration containing file paths and - /// runtime thresholds. - /// Optional logger for diagnostic messages. - /// May be null. - /// Thrown when - /// is null. - /// Thrown when one of the model - /// files referenced by does not exist. public NumberRecognitionEngine(ModelConfiguration cfg, ILogger? logger) { _logger = logger; @@ -95,8 +45,10 @@ namespace AIFotoONLUS.Core _detectionNet = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights); _recognitionNet = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); - ConfigureNetRuntime(_detectionNet, _cfg.UseGpu); - ConfigureNetRuntime(_recognitionNet, _cfg.UseGpu); + _detectionNet.SetPreferableBackend(Backend.OPENCV); + _detectionNet.SetPreferableTarget(Target.CPU); + _recognitionNet.SetPreferableBackend(Backend.OPENCV); + _recognitionNet.SetPreferableTarget(Target.CPU); // Let OpenCV use multiple threads internally (use number of logical processors) try { @@ -106,11 +58,6 @@ namespace AIFotoONLUS.Core { // Ignore if not supported by OpenCvSharp build } - - if (_cfg.UseGpu) - { - ValidateGpuRuntime(); - } } public void Dispose() @@ -122,38 +69,6 @@ namespace AIFotoONLUS.Core GC.SuppressFinalize(this); } - public static bool TryValidateGpuRuntime(ModelConfiguration cfg, ILogger? logger, out string? failureMessage) - { - if (cfg is null) throw new ArgumentNullException(nameof(cfg)); - - var probeConfiguration = new ModelConfiguration - { - DetectionCfg = cfg.DetectionCfg, - DetectionWeights = cfg.DetectionWeights, - RecognitionCfg = cfg.RecognitionCfg, - RecognitionWeights = cfg.RecognitionWeights, - ConfidenceThreshold = cfg.ConfidenceThreshold, - NmsThreshold = cfg.NmsThreshold, - DetectionInputSize = cfg.DetectionInputSize, - RecognitionInputSize = cfg.RecognitionInputSize, - NumberClasses = cfg.NumberClasses, - EnableCropSaving = cfg.EnableCropSaving, - UseGpu = true - }; - - try - { - using var engine = new NumberRecognitionEngine(probeConfiguration, logger); - failureMessage = null; - return true; - } - catch (Exception ex) - { - failureMessage = ex.GetBaseException().Message; - return false; - } - } - private static string SanitizeFileName(string name) { foreach (var c in Path.GetInvalidFileNameChars()) name = name.Replace(c, '_'); @@ -162,48 +77,6 @@ namespace AIFotoONLUS.Core private string[] GetOutputLayerNames(Net net) => net.GetUnconnectedOutLayersNames(); - private static void ConfigureNetRuntime(Net net, bool useGpu) - { - if (useGpu) - { - net.SetPreferableBackend(Backend.CUDA); - net.SetPreferableTarget(Target.CUDA); - return; - } - - net.SetPreferableBackend(Backend.OPENCV); - net.SetPreferableTarget(Target.CPU); - } - - private void ValidateGpuRuntime() - { - try - { - using var detectionProbe = new Mat(_cfg.DetectionInputSize.Height, _cfg.DetectionInputSize.Width, MatType.CV_8UC3, Scalar.All(0)); - _ = DetectTextRegions(_detectionNet, detectionProbe).Take(1).ToArray(); - - using var recognitionProbe = new Mat(_cfg.RecognitionInputSize.Height, _cfg.RecognitionInputSize.Width, MatType.CV_8UC3, Scalar.All(0)); - using var blob = CvDnn.BlobFromImage(recognitionProbe, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false); - _recognitionNet.SetInput(blob); - using var output = _recognitionNet.Forward(); - } - catch (Exception ex) - { - throw new InvalidOperationException( - "OpenCV DNN CUDA runtime validation failed. Disable number AI GPU mode or use an OpenCV runtime built with CUDA DNN support.", - ex); - } - } - - /// - /// Detect text regions in the supplied image using the detection network. - /// - /// Input image as an OpenCvSharp . - /// Must not be null. - /// An enumerable of containing the - /// bounding boxes, confidence and class information for each detected - /// region. The results are already filtered with the configured - /// confidence and NMS thresholds. public IEnumerable DetectTextRegions(Mat image) { if (image is null) throw new ArgumentNullException(nameof(image)); @@ -220,7 +93,7 @@ namespace AIFotoONLUS.Core var outNames = GetOutputLayerNames(detectionNet); var outsList = new List(); detectionNet.Forward(outsList, outNames); - + Mat[] outs = outsList.ToArray(); if (outs.Length == 0) { @@ -230,15 +103,15 @@ namespace AIFotoONLUS.Core var fallback = new List(); for (int on = 0; on < outNames.Length; on++) { - try - { - var single = detectionNet.Forward(outNames[on]); - fallback.Add(single); - } - catch (Exception ex) - { - _logger?.LogError(ex, "Fallback Forward failed for {name}", outNames[on]); - } + try + { + var single = detectionNet.Forward(outNames[on]); + fallback.Add(single); + } + catch (Exception ex) + { + _logger?.LogError(ex, "Fallback Forward failed for {name}", outNames[on]); + } } if (fallback.Count > 0) { @@ -289,21 +162,21 @@ namespace AIFotoONLUS.Core } if (maxScore > _cfg.ConfidenceThreshold) - { - int x = (int)Math.Max(0, Math.Round(cx - w / 2)); - int y = (int)Math.Max(0, Math.Round(cy - h / 2)); - var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h)); - boxes.Add(rect); - confidences.Add(maxScore); - classIds.Add(bestClass); - centerXList.Add(cx); - } + { + int x = (int)Math.Max(0, Math.Round(cx - w / 2)); + int y = (int)Math.Max(0, Math.Round(cy - h / 2)); + var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h)); + boxes.Add(rect); + confidences.Add(maxScore); + classIds.Add(bestClass); + centerXList.Add(cx); + } } } if (boxes.Count == 0) return Enumerable.Empty(); - + CvDnn.NMSBoxes(boxes, confidences, (float)_cfg.ConfidenceThreshold, (float)_cfg.NmsThreshold, out int[] indices); @@ -317,18 +190,6 @@ namespace AIFotoONLUS.Core return results; } - /// - /// Recognize digits inside a cropped image region using the recognition - /// network. The method runs the recognition network and returns the - /// concatenated sequence of recognized digit labels ordered left-to-right. - /// - /// Cropped image containing digits as - /// . Must not be null. - /// Optional context string used for diagnostics - /// (e.g. when saving crop image files). - /// A string containing recognized digits in left-to-right order. - /// Returns an empty string when no digits are recognized above the - /// configured confidence threshold. public string RecognizeDigits(Mat croppedImage, string? context = null) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); @@ -426,31 +287,12 @@ namespace AIFotoONLUS.Core return string.Concat(ordered); } - /// - /// Small DTO that describes the name and shape of a detection network - /// forward output used for diagnostics. - /// - /// Layer/output name. - /// Number of rows in the output Mat. - /// Number of columns in the output Mat. public record DetectionOutput(string Name, int Rows, int Cols); - - /// - /// Result returned by , contains - /// the recognized text result and an array describing detection network - /// forward outputs (shapes and names) which are useful for debugging - /// model output layout mismatches. - /// - /// Recognition result for the processed image. - /// Array describing detection net outputs. public record DiagnosticResult(ImageResult Result, DetectionOutput[] DetectionOutputs); /// - /// Process a single image file and return the recognition result together - /// with detection network forward output shapes for diagnostics. This - /// method reads the image from disk, runs a forward pass over the - /// detection network to capture the raw output Mat shapes and then calls - /// the normal processing pipeline to return the recognized text. + /// Process a single image file and return the recognition result together with + /// detection network forward output shapes for diagnostics. /// public DiagnosticResult ProcessFileWithDiagnostics(string filePath) { @@ -488,16 +330,6 @@ namespace AIFotoONLUS.Core return new DiagnosticResult(imgRes, outputs); } - /// - /// Process a single image file and return the recognized text as an - /// . The method detects candidate text regions - /// and runs recognition on each crop. Multiple recognized digit sequences - /// are joined with a comma in the returned . - /// - /// Path to an image file on disk. Supported - /// formats depend on OpenCV (typically JPEG, PNG, ...). - /// An containing the file name and - /// recognized text (possibly empty). public ImageResult ProcessImage(string filePath) { if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath); @@ -519,14 +351,6 @@ namespace AIFotoONLUS.Core return result; } - /// - /// Process all JPEG images in a directory and return the recognition - /// results. This is a blocking wrapper over . - /// - /// Path to a directory containing images. - /// If true, files whose names start with - /// "tn_" will be skipped (convention used to mark text-negative images). - /// Collection of ordered by file name. public IEnumerable ProcessDirectory(string directoryPath, bool skipTextNegative = false) { // Simple wrapper over async implementation @@ -554,8 +378,10 @@ namespace AIFotoONLUS.Core { var det = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights); var rec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); - ConfigureNetRuntime(det, _cfg.UseGpu); - ConfigureNetRuntime(rec, _cfg.UseGpu); + det.SetPreferableBackend(Backend.OPENCV); + det.SetPreferableTarget(Target.CPU); + rec.SetPreferableBackend(Backend.OPENCV); + rec.SetPreferableTarget(Target.CPU); netsBag.Add((det, rec)); return (det, rec); }); @@ -591,7 +417,8 @@ namespace AIFotoONLUS.Core try { using var tempRec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights); - ConfigureNetRuntime(tempRec, _cfg.UseGpu); + tempRec.SetPreferableBackend(Backend.OPENCV); + tempRec.SetPreferableTarget(Target.CPU); var alt = RecognizeDigits(crop, tempRec, ctx); if (!string.IsNullOrEmpty(alt)) txt = alt; } @@ -677,16 +504,6 @@ namespace AIFotoONLUS.Core } // Overload RecognizeDigits that accepts a Net for worker threads - /// - /// Worker overload of that - /// accepts a instance. This is used by the parallel - /// processing pipeline where each worker owns its own Net instance. - /// - /// Cropped region to recognize. - /// Recognition to execute - /// the forward pass with. - /// Optional context string for diagnostics. - /// Recognized digit sequence or empty string. private string RecognizeDigits(Mat croppedImage, Net recognitionNet, string? context = null) { if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage)); diff --git a/src/AIFotoONLUS.Core/ProcessingStats.cs b/src/AIFotoONLUS.Core/ProcessingStats.cs index 9d0de10..fe5aba3 100644 --- a/src/AIFotoONLUS.Core/ProcessingStats.cs +++ b/src/AIFotoONLUS.Core/ProcessingStats.cs @@ -1,10 +1,4 @@ namespace AIFotoONLUS.Core { - /// - /// Progress statistics reported during directory processing. - /// - /// Total number of image files to process. - /// Number of files processed so far. - /// Current processing throughput in images/second. public record ProcessingStats(int TotalFiles, int ProcessedFiles, double ImagesPerSecond); }