Compare commits
No commits in common. "master" and "0.1.0" have entirely different histories.
9 changed files with 36 additions and 846 deletions
|
|
@ -1,124 +0,0 @@
|
||||||
name: Build And Publish AIFotoONLUS.Core
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
- develop
|
|
||||||
tags:
|
|
||||||
- '*'
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
env:
|
|
||||||
DOTNET_VERSION: 10.0.x
|
|
||||||
PROJECT_PATH: src/AIFotoONLUS.Core/AIFotoONLUS.Core.csproj
|
|
||||||
PACKAGE_OUTPUT_DIR: artifacts/nuget
|
|
||||||
PACKAGE_ARTIFACT_NAME: aifotoonlus-core-nuget
|
|
||||||
NUGET_SOURCE_NAME: forgejo-aifotoonlus
|
|
||||||
NUGET_SOURCE_URL: ${{ vars.AIFOTOONLUS_NUGET_SOURCE_URL || format('{0}/api/packages/{1}/nuget/index.json', github.server_url, vars.AIFOTOONLUS_PACKAGE_OWNER || github.repository_owner) }}
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
runs-on: docker
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Setup .NET
|
|
||||||
uses: actions/setup-dotnet@v4
|
|
||||||
with:
|
|
||||||
dotnet-version: ${{ env.DOTNET_VERSION }}
|
|
||||||
|
|
||||||
- name: Restore
|
|
||||||
run: dotnet restore "${{ env.PROJECT_PATH }}"
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
run: dotnet build "${{ env.PROJECT_PATH }}" --configuration Release --no-restore /p:GeneratePackageOnBuild=false
|
|
||||||
|
|
||||||
- name: Pack
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
set -eu
|
|
||||||
mkdir -p "${{ env.PACKAGE_OUTPUT_DIR }}"
|
|
||||||
|
|
||||||
if [[ "${GITHUB_REF}" == refs/tags/* ]]; then
|
|
||||||
package_version="${GITHUB_REF_NAME#v}"
|
|
||||||
echo "Packing tag version ${package_version}"
|
|
||||||
dotnet pack "${{ env.PROJECT_PATH }}" \
|
|
||||||
--configuration Release \
|
|
||||||
--output "${{ env.PACKAGE_OUTPUT_DIR }}" \
|
|
||||||
--no-build \
|
|
||||||
/p:PackageVersion="${package_version}"
|
|
||||||
else
|
|
||||||
echo "Packing with project version or MinVer-derived version"
|
|
||||||
dotnet pack "${{ env.PROJECT_PATH }}" \
|
|
||||||
--configuration Release \
|
|
||||||
--output "${{ env.PACKAGE_OUTPUT_DIR }}" \
|
|
||||||
--no-build
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Upload package artifact
|
|
||||||
uses: actions/upload-artifact@v3
|
|
||||||
with:
|
|
||||||
name: ${{ env.PACKAGE_ARTIFACT_NAME }}
|
|
||||||
path: ${{ env.PACKAGE_OUTPUT_DIR }}/*.nupkg
|
|
||||||
if-no-files-found: error
|
|
||||||
|
|
||||||
publish:
|
|
||||||
if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
|
|
||||||
needs: build
|
|
||||||
runs-on: docker
|
|
||||||
env:
|
|
||||||
FORGEJO_PACKAGE_USERNAME: ${{ secrets.FORGEJO_PACKAGE_USERNAME }}
|
|
||||||
FORGEJO_PACKAGE_TOKEN: ${{ secrets.FORGEJO_PACKAGE_TOKEN }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Setup .NET
|
|
||||||
uses: actions/setup-dotnet@v4
|
|
||||||
with:
|
|
||||||
dotnet-version: ${{ env.DOTNET_VERSION }}
|
|
||||||
|
|
||||||
- name: Download package artifact
|
|
||||||
uses: actions/download-artifact@v3
|
|
||||||
with:
|
|
||||||
name: ${{ env.PACKAGE_ARTIFACT_NAME }}
|
|
||||||
path: ${{ env.PACKAGE_OUTPUT_DIR }}
|
|
||||||
|
|
||||||
- name: Validate publish secrets
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
set -eu
|
|
||||||
if [ -z "${FORGEJO_PACKAGE_USERNAME}" ]; then
|
|
||||||
echo "secrets.FORGEJO_PACKAGE_USERNAME is required"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ -z "${FORGEJO_PACKAGE_TOKEN}" ]; then
|
|
||||||
echo "secrets.FORGEJO_PACKAGE_TOKEN is required"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Configure Forgejo NuGet source
|
|
||||||
run: |
|
|
||||||
dotnet nuget add source "${{ env.NUGET_SOURCE_URL }}" \
|
|
||||||
--name "${{ env.NUGET_SOURCE_NAME }}" \
|
|
||||||
--username "${FORGEJO_PACKAGE_USERNAME}" \
|
|
||||||
--password "${FORGEJO_PACKAGE_TOKEN}" \
|
|
||||||
--store-password-in-clear-text
|
|
||||||
|
|
||||||
- name: Publish package to Forgejo NuGet
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
set -eu
|
|
||||||
shopt -s nullglob
|
|
||||||
packages=("${{ env.PACKAGE_OUTPUT_DIR }}"/*.nupkg)
|
|
||||||
if [ "${#packages[@]}" -eq 0 ]; then
|
|
||||||
echo "No NuGet packages found in ${{ env.PACKAGE_OUTPUT_DIR }}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
dotnet nuget push "${{ env.PACKAGE_OUTPUT_DIR }}"/*.nupkg \
|
|
||||||
--source "${{ env.NUGET_SOURCE_NAME }}" \
|
|
||||||
--skip-duplicate
|
|
||||||
65
README.md
65
README.md
|
|
@ -1,65 +0,0 @@
|
||||||
# AIFotoONLUS Number Recognition Library
|
|
||||||
|
|
||||||
This library provides a small, focused engine to detect and recognize numeric
|
|
||||||
text (digits) in images using Darknet (YOLO) models via OpenCvSharp's DNN API.
|
|
||||||
It is suitable for batch processing folders of images or individual files.
|
|
||||||
|
|
||||||
Features
|
|
||||||
- Detection network (Darknet/Yolo) to find candidate text regions.
|
|
||||||
- Recognition network (Darknet/Yolo) to identify digits inside detected crops.
|
|
||||||
- Single-file and directory-level processing APIs.
|
|
||||||
- Parallel processing with per-thread network instances for throughput.
|
|
||||||
- Diagnostic helpers to dump network output shapes and optionally save crop images.
|
|
||||||
|
|
||||||
Basic usage
|
|
||||||
1. Create a `ModelConfiguration` instance that points to your Darknet `.cfg`
|
|
||||||
and `.weights` files for both detection and recognition networks, configure
|
|
||||||
confidence and NMS thresholds and provide a list of number class labels.
|
|
||||||
|
|
||||||
2. Create an instance of `NumberRecognitionEngine`:
|
|
||||||
|
|
||||||
```csharp
|
|
||||||
using var engine = new NumberRecognitionEngine(modelConfig, logger: null);
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Process a single image:
|
|
||||||
|
|
||||||
```csharp
|
|
||||||
var result = engine.ProcessImage("/path/to/image.jpg");
|
|
||||||
Console.WriteLine(result.Text);
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Process a directory (parallelized):
|
|
||||||
|
|
||||||
```csharp
|
|
||||||
var results = await engine.ProcessDirectoryAsync("/path/to/images", recursive: false);
|
|
||||||
foreach (var r in results) Console.WriteLine($"{r.FileName}: {r.Text}");
|
|
||||||
```
|
|
||||||
|
|
||||||
Configuration notes
|
|
||||||
- `ModelConfiguration` controls model file paths, input sizes, thresholds and
|
|
||||||
whether to save cropped images for diagnostics. Make sure the paths are
|
|
||||||
accessible to the process and the model files match the expected network
|
|
||||||
architectures.
|
|
||||||
|
|
||||||
- The engine expects detection network outputs in the YOLO-style layout:
|
|
||||||
`[cx, cy, w, h, objectness, class1, class2, ...]`.
|
|
||||||
|
|
||||||
Threading & diagnostics
|
|
||||||
- For directory/batch processing the engine creates per-thread Net instances
|
|
||||||
so OpenCV forward calls can run concurrently. It also contains fallback
|
|
||||||
logic that will perform processing with shared nets under a lock if needed.
|
|
||||||
|
|
||||||
- When `EnableCropSaving` is enabled in configuration, each recognized crop is
|
|
||||||
saved to `logs/crops` with a timestamp and optional context label to aid
|
|
||||||
debugging false positives/negatives.
|
|
||||||
|
|
||||||
Troubleshooting
|
|
||||||
- If the engine returns no detections, verify the model files are correct and
|
|
||||||
compatible with the expected output layout. Use
|
|
||||||
`ProcessFileWithDiagnostics` to inspect output layer shapes.
|
|
||||||
|
|
||||||
License & Notes
|
|
||||||
This project is provided as-is. See repository for licensing information and
|
|
||||||
for the model files distribution terms (models are usually not redistributed
|
|
||||||
with code and must be obtained separately).
|
|
||||||
|
|
@ -1,27 +0,0 @@
|
||||||
{
|
|
||||||
"AssemblySemFileVer": "0.1.0.0",
|
|
||||||
"AssemblySemVer": "0.1.0.0",
|
|
||||||
"BranchName": "master",
|
|
||||||
"BuildMetaData": null,
|
|
||||||
"CommitDate": "2026-02-15",
|
|
||||||
"CommitsSinceVersionSource": 11,
|
|
||||||
"EscapedBranchName": "master",
|
|
||||||
"FullBuildMetaData": "Branch.master.Sha.a90da31e531332a4cf0bafe604f89d0e14f3395a",
|
|
||||||
"FullSemVer": "0.1.0-{BranchName}.11",
|
|
||||||
"InformationalVersion": "0.1.0-{BranchName}.11+Branch.master.Sha.a90da31e531332a4cf0bafe604f89d0e14f3395a",
|
|
||||||
"Major": 0,
|
|
||||||
"MajorMinorPatch": "0.1.0",
|
|
||||||
"Minor": 1,
|
|
||||||
"Patch": 0,
|
|
||||||
"PreReleaseLabel": "{BranchName}",
|
|
||||||
"PreReleaseLabelWithDash": "-{BranchName}",
|
|
||||||
"PreReleaseNumber": 11,
|
|
||||||
"PreReleaseTag": "{BranchName}.11",
|
|
||||||
"PreReleaseTagWithDash": "-{BranchName}.11",
|
|
||||||
"SemVer": "0.1.0-{BranchName}.11",
|
|
||||||
"Sha": "a90da31e531332a4cf0bafe604f89d0e14f3395a",
|
|
||||||
"ShortSha": "a90da31",
|
|
||||||
"UncommittedChanges": 7,
|
|
||||||
"VersionSourceSha": "",
|
|
||||||
"WeightedPreReleaseNumber": 11
|
|
||||||
}
|
|
||||||
|
|
@ -3,10 +3,6 @@
|
||||||
<TargetFramework>net10.0</TargetFramework>
|
<TargetFramework>net10.0</TargetFramework>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<!-- Generate XML documentation file for the public API -->
|
|
||||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
|
||||||
<!-- Ensure the documentation file path is predictable so it can be packed -->
|
|
||||||
<DocumentationFile>$(OutputPath)$(AssemblyName).xml</DocumentationFile>
|
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<!-- NuGet package metadata -->
|
<!-- NuGet package metadata -->
|
||||||
|
|
@ -14,7 +10,7 @@
|
||||||
<Authors>Maddo</Authors>
|
<Authors>Maddo</Authors>
|
||||||
<Company>Maddo</Company>
|
<Company>Maddo</Company>
|
||||||
<Description>Core library for AIFotoONLUS image processing and recognition.</Description>
|
<Description>Core library for AIFotoONLUS image processing and recognition.</Description>
|
||||||
<RepositoryUrl>https://forgejo.maddoscientisto.net/maddo/AIFotoONLUS</RepositoryUrl>
|
<RepositoryUrl>https://gitlab.com/MaddoScientisto/aifotoonlus</RepositoryUrl>
|
||||||
<!-- Versioning: use MinVer to infer semantic versions from Git tags. When no tag is present,
|
<!-- Versioning: use MinVer to infer semantic versions from Git tags. When no tag is present,
|
||||||
projects will fall back to the default below. -->
|
projects will fall back to the default below. -->
|
||||||
<Version>0.1.0</Version>
|
<Version>0.1.0</Version>
|
||||||
|
|
|
||||||
|
|
@ -1,331 +0,0 @@
|
||||||
<?xml version="1.0"?>
|
|
||||||
<doc>
|
|
||||||
<assembly>
|
|
||||||
<name>AIFotoONLUS.Core</name>
|
|
||||||
</assembly>
|
|
||||||
<members>
|
|
||||||
<member name="T:AIFotoONLUS.Core.DetectedRegion">
|
|
||||||
<summary>
|
|
||||||
Represents a detected text region produced by the detection network.
|
|
||||||
</summary>
|
|
||||||
<param name="BoundingBox">Bounding rectangle of the detection in image coordinates.</param>
|
|
||||||
<param name="Confidence">Combined confidence score for the detection (objectness * class probability).</param>
|
|
||||||
<param name="ClassId">Class index predicted by the network (index into <see cref="P:AIFotoONLUS.Core.ModelConfiguration.NumberClasses"/>).</param>
|
|
||||||
<param name="CenterX">Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right.</param>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.DetectedRegion.#ctor(OpenCvSharp.Rect,System.Single,System.Int32,System.Double)">
|
|
||||||
<summary>
|
|
||||||
Represents a detected text region produced by the detection network.
|
|
||||||
</summary>
|
|
||||||
<param name="BoundingBox">Bounding rectangle of the detection in image coordinates.</param>
|
|
||||||
<param name="Confidence">Combined confidence score for the detection (objectness * class probability).</param>
|
|
||||||
<param name="ClassId">Class index predicted by the network (index into <see cref="P:AIFotoONLUS.Core.ModelConfiguration.NumberClasses"/>).</param>
|
|
||||||
<param name="CenterX">Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right.</param>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.DetectedRegion.BoundingBox">
|
|
||||||
<summary>Bounding rectangle of the detection in image coordinates.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.DetectedRegion.Confidence">
|
|
||||||
<summary>Combined confidence score for the detection (objectness * class probability).</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.DetectedRegion.ClassId">
|
|
||||||
<summary>Class index predicted by the network (index into <see cref="P:AIFotoONLUS.Core.ModelConfiguration.NumberClasses"/>).</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.DetectedRegion.CenterX">
|
|
||||||
<summary>Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="T:AIFotoONLUS.Core.RecognitionResult">
|
|
||||||
<summary>
|
|
||||||
Represents the result of recognizing a single region: recognized text,
|
|
||||||
its bounding box and confidence.
|
|
||||||
</summary>
|
|
||||||
<param name="Text">Recognized text for the region (usually a sequence of digits).</param>
|
|
||||||
<param name="BoundingBox">Bounding rectangle of the recognition result.</param>
|
|
||||||
<param name="Confidence">Confidence score associated with the recognition.</param>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.RecognitionResult.#ctor(System.String,OpenCvSharp.Rect,System.Double)">
|
|
||||||
<summary>
|
|
||||||
Represents the result of recognizing a single region: recognized text,
|
|
||||||
its bounding box and confidence.
|
|
||||||
</summary>
|
|
||||||
<param name="Text">Recognized text for the region (usually a sequence of digits).</param>
|
|
||||||
<param name="BoundingBox">Bounding rectangle of the recognition result.</param>
|
|
||||||
<param name="Confidence">Confidence score associated with the recognition.</param>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.RecognitionResult.Text">
|
|
||||||
<summary>Recognized text for the region (usually a sequence of digits).</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.RecognitionResult.BoundingBox">
|
|
||||||
<summary>Bounding rectangle of the recognition result.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.RecognitionResult.Confidence">
|
|
||||||
<summary>Confidence score associated with the recognition.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="T:AIFotoONLUS.Core.ImageResult">
|
|
||||||
<summary>
|
|
||||||
Aggregated result for a processed image.
|
|
||||||
</summary>
|
|
||||||
<param name="FileName">Name of the image file.</param>
|
|
||||||
<param name="Text">Comma-separated recognized texts found in the image (may be empty).</param>
|
|
||||||
<param name="FilePath">Full path to the processed image file.</param>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.ImageResult.#ctor(System.String,System.String,System.String)">
|
|
||||||
<summary>
|
|
||||||
Aggregated result for a processed image.
|
|
||||||
</summary>
|
|
||||||
<param name="FileName">Name of the image file.</param>
|
|
||||||
<param name="Text">Comma-separated recognized texts found in the image (may be empty).</param>
|
|
||||||
<param name="FilePath">Full path to the processed image file.</param>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ImageResult.FileName">
|
|
||||||
<summary>Name of the image file.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ImageResult.Text">
|
|
||||||
<summary>Comma-separated recognized texts found in the image (may be empty).</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ImageResult.FilePath">
|
|
||||||
<summary>Full path to the processed image file.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="T:AIFotoONLUS.Core.ModelConfiguration">
|
|
||||||
<summary>
|
|
||||||
Configuration options that control model file locations, input sizes
|
|
||||||
and runtime thresholds used by <see cref="T:AIFotoONLUS.Core.NumberRecognitionEngine"/>.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ModelConfiguration.DetectionCfg">
|
|
||||||
<summary>
|
|
||||||
Path to the Darknet configuration (.cfg) file for the detection network.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ModelConfiguration.DetectionWeights">
|
|
||||||
<summary>
|
|
||||||
Path to the Darknet weights (.weights) file for the detection network.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ModelConfiguration.RecognitionCfg">
|
|
||||||
<summary>
|
|
||||||
Path to the Darknet configuration (.cfg) file for the recognition network.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ModelConfiguration.RecognitionWeights">
|
|
||||||
<summary>
|
|
||||||
Path to the Darknet weights (.weights) file for the recognition network.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ModelConfiguration.ConfidenceThreshold">
|
|
||||||
<summary>
|
|
||||||
Confidence threshold used to filter out low-probability detections.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ModelConfiguration.NmsThreshold">
|
|
||||||
<summary>
|
|
||||||
Non-maximum suppression (NMS) IoU threshold used to remove overlapping
|
|
||||||
detection boxes.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ModelConfiguration.DetectionInputSize">
|
|
||||||
<summary>
|
|
||||||
Input size used when preparing the blob for the detection network.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ModelConfiguration.RecognitionInputSize">
|
|
||||||
<summary>
|
|
||||||
Input size used when preparing the blob for the recognition network.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ModelConfiguration.NumberClasses">
|
|
||||||
<summary>
|
|
||||||
Labels representing digit classes in the recognition model. The order
|
|
||||||
must match the class ordering used by the trained recognition network.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ModelConfiguration.UseGpu">
|
|
||||||
<summary>
|
|
||||||
When enabled, request OpenCV DNN CUDA backend/target for inference.
|
|
||||||
The installed OpenCV runtime must have CUDA support or model loading/forwarding may fail.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ModelConfiguration.EnableCropSaving">
|
|
||||||
<summary>
|
|
||||||
When enabled, recognition crops will be saved to disk under
|
|
||||||
"logs/crops" for diagnostic inspection. Disabled by default.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.NumberRecognitionEngine.#ctor(AIFotoONLUS.Core.ModelConfiguration)">
|
|
||||||
<summary>
|
|
||||||
Create a new instance of <see cref="T:AIFotoONLUS.Core.NumberRecognitionEngine"/> using the
|
|
||||||
provided <see cref="T:AIFotoONLUS.Core.ModelConfiguration"/>. The constructor loads the
|
|
||||||
detection and recognition Darknet model files and prepares the OpenCV
|
|
||||||
DNN nets for CPU inference.
|
|
||||||
</summary>
|
|
||||||
<param name="cfg">Model configuration containing file paths, thresholds
|
|
||||||
and other options. Must not be <c>null</c>.</param>
|
|
||||||
<remarks>
|
|
||||||
This constructor will throw <see cref="T:System.IO.FileNotFoundException"/> when
|
|
||||||
any of the expected model files are missing. For logging purposes an
|
|
||||||
overload accepting an <see cref="T:Microsoft.Extensions.Logging.ILogger"/> is available.
|
|
||||||
</remarks>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.NumberRecognitionEngine.#ctor(AIFotoONLUS.Core.ModelConfiguration,Microsoft.Extensions.Logging.ILogger)">
|
|
||||||
<summary>
|
|
||||||
Create a new instance of <see cref="T:AIFotoONLUS.Core.NumberRecognitionEngine"/> with an
|
|
||||||
optional <see cref="T:Microsoft.Extensions.Logging.ILogger"/>. The logger will receive diagnostic
|
|
||||||
messages and errors produced by the engine during processing.
|
|
||||||
</summary>
|
|
||||||
<param name="cfg">Model configuration containing file paths and
|
|
||||||
runtime thresholds.</param>
|
|
||||||
<param name="logger">Optional logger for diagnostic messages.
|
|
||||||
May be <c>null</c>.</param>
|
|
||||||
<exception cref="T:System.ArgumentNullException">Thrown when <paramref name="cfg"/>
|
|
||||||
is <c>null</c>.</exception>
|
|
||||||
<exception cref="T:System.IO.FileNotFoundException">Thrown when one of the model
|
|
||||||
files referenced by <paramref name="cfg"/> does not exist.</exception>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.NumberRecognitionEngine.DetectTextRegions(OpenCvSharp.Mat)">
|
|
||||||
<summary>
|
|
||||||
Detect text regions in the supplied image using the detection network.
|
|
||||||
</summary>
|
|
||||||
<param name="image">Input image as an OpenCvSharp <see cref="T:OpenCvSharp.Mat"/>.
|
|
||||||
Must not be <c>null</c>.</param>
|
|
||||||
<returns>An enumerable of <see cref="T:AIFotoONLUS.Core.DetectedRegion"/> containing the
|
|
||||||
bounding boxes, confidence and class information for each detected
|
|
||||||
region. The results are already filtered with the configured
|
|
||||||
confidence and NMS thresholds.</returns>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.NumberRecognitionEngine.RecognizeDigits(OpenCvSharp.Mat,System.String)">
|
|
||||||
<summary>
|
|
||||||
Recognize digits inside a cropped image region using the recognition
|
|
||||||
network. The method runs the recognition network and returns the
|
|
||||||
concatenated sequence of recognized digit labels ordered left-to-right.
|
|
||||||
</summary>
|
|
||||||
<param name="croppedImage">Cropped image containing digits as
|
|
||||||
<see cref="T:OpenCvSharp.Mat"/>. Must not be <c>null</c>.</param>
|
|
||||||
<param name="context">Optional context string used for diagnostics
|
|
||||||
(e.g. when saving crop image files).</param>
|
|
||||||
<returns>A string containing recognized digits in left-to-right order.
|
|
||||||
Returns an empty string when no digits are recognized above the
|
|
||||||
configured confidence threshold.</returns>
|
|
||||||
</member>
|
|
||||||
<member name="T:AIFotoONLUS.Core.NumberRecognitionEngine.DetectionOutput">
|
|
||||||
<summary>
|
|
||||||
Small DTO that describes the name and shape of a detection network
|
|
||||||
forward output used for diagnostics.
|
|
||||||
</summary>
|
|
||||||
<param name="Name">Layer/output name.</param>
|
|
||||||
<param name="Rows">Number of rows in the output Mat.</param>
|
|
||||||
<param name="Cols">Number of columns in the output Mat.</param>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.NumberRecognitionEngine.DetectionOutput.#ctor(System.String,System.Int32,System.Int32)">
|
|
||||||
<summary>
|
|
||||||
Small DTO that describes the name and shape of a detection network
|
|
||||||
forward output used for diagnostics.
|
|
||||||
</summary>
|
|
||||||
<param name="Name">Layer/output name.</param>
|
|
||||||
<param name="Rows">Number of rows in the output Mat.</param>
|
|
||||||
<param name="Cols">Number of columns in the output Mat.</param>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.NumberRecognitionEngine.DetectionOutput.Name">
|
|
||||||
<summary>Layer/output name.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.NumberRecognitionEngine.DetectionOutput.Rows">
|
|
||||||
<summary>Number of rows in the output Mat.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.NumberRecognitionEngine.DetectionOutput.Cols">
|
|
||||||
<summary>Number of columns in the output Mat.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="T:AIFotoONLUS.Core.NumberRecognitionEngine.DiagnosticResult">
|
|
||||||
<summary>
|
|
||||||
Result returned by <see cref="M:AIFotoONLUS.Core.NumberRecognitionEngine.ProcessFileWithDiagnostics(System.String)"/>, contains
|
|
||||||
the recognized text result and an array describing detection network
|
|
||||||
forward outputs (shapes and names) which are useful for debugging
|
|
||||||
model output layout mismatches.
|
|
||||||
</summary>
|
|
||||||
<param name="Result">Recognition result for the processed image.</param>
|
|
||||||
<param name="DetectionOutputs">Array describing detection net outputs.</param>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.NumberRecognitionEngine.DiagnosticResult.#ctor(AIFotoONLUS.Core.ImageResult,AIFotoONLUS.Core.NumberRecognitionEngine.DetectionOutput[])">
|
|
||||||
<summary>
|
|
||||||
Result returned by <see cref="M:AIFotoONLUS.Core.NumberRecognitionEngine.ProcessFileWithDiagnostics(System.String)"/>, contains
|
|
||||||
the recognized text result and an array describing detection network
|
|
||||||
forward outputs (shapes and names) which are useful for debugging
|
|
||||||
model output layout mismatches.
|
|
||||||
</summary>
|
|
||||||
<param name="Result">Recognition result for the processed image.</param>
|
|
||||||
<param name="DetectionOutputs">Array describing detection net outputs.</param>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.NumberRecognitionEngine.DiagnosticResult.Result">
|
|
||||||
<summary>Recognition result for the processed image.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.NumberRecognitionEngine.DiagnosticResult.DetectionOutputs">
|
|
||||||
<summary>Array describing detection net outputs.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.NumberRecognitionEngine.ProcessFileWithDiagnostics(System.String)">
|
|
||||||
<summary>
|
|
||||||
Process a single image file and return the recognition result together
|
|
||||||
with detection network forward output shapes for diagnostics. This
|
|
||||||
method reads the image from disk, runs a forward pass over the
|
|
||||||
detection network to capture the raw output Mat shapes and then calls
|
|
||||||
the normal processing pipeline to return the recognized text.
|
|
||||||
</summary>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.NumberRecognitionEngine.ProcessImage(System.String)">
|
|
||||||
<summary>
|
|
||||||
Process a single image file and return the recognized text as an
|
|
||||||
<see cref="T:AIFotoONLUS.Core.ImageResult"/>. The method detects candidate text regions
|
|
||||||
and runs recognition on each crop. Multiple recognized digit sequences
|
|
||||||
are joined with a comma in the returned <see cref="P:AIFotoONLUS.Core.ImageResult.Text"/>.
|
|
||||||
</summary>
|
|
||||||
<param name="filePath">Path to an image file on disk. Supported
|
|
||||||
formats depend on OpenCV (typically JPEG, PNG, ...).</param>
|
|
||||||
<returns>An <see cref="T:AIFotoONLUS.Core.ImageResult"/> containing the file name and
|
|
||||||
recognized text (possibly empty).</returns>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.NumberRecognitionEngine.ProcessDirectory(System.String,System.Boolean)">
|
|
||||||
<summary>
|
|
||||||
Process all JPEG images in a directory and return the recognition
|
|
||||||
results. This is a blocking wrapper over <see cref="M:AIFotoONLUS.Core.NumberRecognitionEngine.ProcessDirectoryAsync(System.String,System.Boolean,System.Boolean,System.IProgress{AIFotoONLUS.Core.ProcessingStats},System.IProgress{AIFotoONLUS.Core.ImageResult},System.Threading.CancellationToken)"/>.
|
|
||||||
</summary>
|
|
||||||
<param name="directoryPath">Path to a directory containing images.</param>
|
|
||||||
<param name="skipTextNegative">If true, files whose names start with
|
|
||||||
"tn_" will be skipped (convention used to mark text-negative images).</param>
|
|
||||||
<returns>Collection of <see cref="T:AIFotoONLUS.Core.ImageResult"/> ordered by file name.</returns>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.NumberRecognitionEngine.RecognizeDigits(OpenCvSharp.Mat,OpenCvSharp.Dnn.Net,System.String)">
|
|
||||||
<summary>
|
|
||||||
Worker overload of <see cref="M:AIFotoONLUS.Core.NumberRecognitionEngine.RecognizeDigits(OpenCvSharp.Mat,System.String)"/> that
|
|
||||||
accepts a <see cref="T:OpenCvSharp.Dnn.Net"/> instance. This is used by the parallel
|
|
||||||
processing pipeline where each worker owns its own Net instance.
|
|
||||||
</summary>
|
|
||||||
<param name="croppedImage">Cropped region to recognize.</param>
|
|
||||||
<param name="recognitionNet">Recognition <see cref="T:OpenCvSharp.Dnn.Net"/> to execute
|
|
||||||
the forward pass with.</param>
|
|
||||||
<param name="context">Optional context string for diagnostics.</param>
|
|
||||||
<returns>Recognized digit sequence or empty string.</returns>
|
|
||||||
</member>
|
|
||||||
<member name="T:AIFotoONLUS.Core.ProcessingStats">
|
|
||||||
<summary>
|
|
||||||
Progress statistics reported during directory processing.
|
|
||||||
</summary>
|
|
||||||
<param name="TotalFiles">Total number of image files to process.</param>
|
|
||||||
<param name="ProcessedFiles">Number of files processed so far.</param>
|
|
||||||
<param name="ImagesPerSecond">Current processing throughput in images/second.</param>
|
|
||||||
</member>
|
|
||||||
<member name="M:AIFotoONLUS.Core.ProcessingStats.#ctor(System.Int32,System.Int32,System.Double)">
|
|
||||||
<summary>
|
|
||||||
Progress statistics reported during directory processing.
|
|
||||||
</summary>
|
|
||||||
<param name="TotalFiles">Total number of image files to process.</param>
|
|
||||||
<param name="ProcessedFiles">Number of files processed so far.</param>
|
|
||||||
<param name="ImagesPerSecond">Current processing throughput in images/second.</param>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ProcessingStats.TotalFiles">
|
|
||||||
<summary>Total number of image files to process.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ProcessingStats.ProcessedFiles">
|
|
||||||
<summary>Number of files processed so far.</summary>
|
|
||||||
</member>
|
|
||||||
<member name="P:AIFotoONLUS.Core.ProcessingStats.ImagesPerSecond">
|
|
||||||
<summary>Current processing throughput in images/second.</summary>
|
|
||||||
</member>
|
|
||||||
</members>
|
|
||||||
</doc>
|
|
||||||
|
|
@ -2,29 +2,7 @@ using OpenCvSharp;
|
||||||
|
|
||||||
namespace AIFotoONLUS.Core
|
namespace AIFotoONLUS.Core
|
||||||
{
|
{
|
||||||
/// <summary>
|
|
||||||
/// Represents a detected text region produced by the detection network.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="BoundingBox">Bounding rectangle of the detection in image coordinates.</param>
|
|
||||||
/// <param name="Confidence">Combined confidence score for the detection (objectness * class probability).</param>
|
|
||||||
/// <param name="ClassId">Class index predicted by the network (index into <see cref="ModelConfiguration.NumberClasses"/>).</param>
|
|
||||||
/// <param name="CenterX">Center X coordinate (in pixels) of the bounding box, used to order detections left-to-right.</param>
|
|
||||||
public record DetectedRegion(Rect BoundingBox, float Confidence, int ClassId, double CenterX);
|
public record DetectedRegion(Rect BoundingBox, float Confidence, int ClassId, double CenterX);
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Represents the result of recognizing a single region: recognized text,
|
|
||||||
/// its bounding box and confidence.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="Text">Recognized text for the region (usually a sequence of digits).</param>
|
|
||||||
/// <param name="BoundingBox">Bounding rectangle of the recognition result.</param>
|
|
||||||
/// <param name="Confidence">Confidence score associated with the recognition.</param>
|
|
||||||
public record RecognitionResult(string Text, Rect BoundingBox, double Confidence);
|
public record RecognitionResult(string Text, Rect BoundingBox, double Confidence);
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Aggregated result for a processed image.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="FileName">Name of the image file.</param>
|
|
||||||
/// <param name="Text">Comma-separated recognized texts found in the image (may be empty).</param>
|
|
||||||
/// <param name="FilePath">Full path to the processed image file.</param>
|
|
||||||
public record ImageResult(string FileName, string Text, string FilePath);
|
public record ImageResult(string FileName, string Text, string FilePath);
|
||||||
}
|
}
|
||||||
|
|
@ -2,69 +2,21 @@ using OpenCvSharp;
|
||||||
|
|
||||||
namespace AIFotoONLUS.Core
|
namespace AIFotoONLUS.Core
|
||||||
{
|
{
|
||||||
/// <summary>
|
|
||||||
/// Configuration options that control model file locations, input sizes
|
|
||||||
/// and runtime thresholds used by <see cref="NumberRecognitionEngine"/>.
|
|
||||||
/// </summary>
|
|
||||||
public class ModelConfiguration
|
public class ModelConfiguration
|
||||||
{
|
{
|
||||||
/// <summary>
|
|
||||||
/// Path to the Darknet configuration (.cfg) file for the detection network.
|
|
||||||
/// </summary>
|
|
||||||
public string DetectionCfg { get; set; } = "models/detection.cfg";
|
public string DetectionCfg { get; set; } = "models/detection.cfg";
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Path to the Darknet weights (.weights) file for the detection network.
|
|
||||||
/// </summary>
|
|
||||||
public string DetectionWeights { get; set; } = "models/detection.weights";
|
public string DetectionWeights { get; set; } = "models/detection.weights";
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Path to the Darknet configuration (.cfg) file for the recognition network.
|
|
||||||
/// </summary>
|
|
||||||
public string RecognitionCfg { get; set; } = "models/recognition.cfg";
|
public string RecognitionCfg { get; set; } = "models/recognition.cfg";
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Path to the Darknet weights (.weights) file for the recognition network.
|
|
||||||
/// </summary>
|
|
||||||
public string RecognitionWeights { get; set; } = "models/recognition.weights";
|
public string RecognitionWeights { get; set; } = "models/recognition.weights";
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Confidence threshold used to filter out low-probability detections.
|
|
||||||
/// </summary>
|
|
||||||
public double ConfidenceThreshold { get; set; } = 0.5;
|
public double ConfidenceThreshold { get; set; } = 0.5;
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Non-maximum suppression (NMS) IoU threshold used to remove overlapping
|
|
||||||
/// detection boxes.
|
|
||||||
/// </summary>
|
|
||||||
public double NmsThreshold { get; set; } = 0.4;
|
public double NmsThreshold { get; set; } = 0.4;
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Input size used when preparing the blob for the detection network.
|
|
||||||
/// </summary>
|
|
||||||
public Size DetectionInputSize { get; set; } = new Size(416, 416);
|
public Size DetectionInputSize { get; set; } = new Size(416, 416);
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Input size used when preparing the blob for the recognition network.
|
|
||||||
/// </summary>
|
|
||||||
public Size RecognitionInputSize { get; set; } = new Size(140, 120);
|
public Size RecognitionInputSize { get; set; } = new Size(140, 120);
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Labels representing digit classes in the recognition model. The order
|
|
||||||
/// must match the class ordering used by the trained recognition network.
|
|
||||||
/// </summary>
|
|
||||||
public string[] NumberClasses { get; set; } = new[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" };
|
public string[] NumberClasses { get; set; } = new[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" };
|
||||||
|
// When true, recognition crops will be saved to disk for diagnostics. Disabled by default.
|
||||||
/// <summary>
|
|
||||||
/// When enabled, request OpenCV DNN CUDA backend/target for inference.
|
|
||||||
/// The installed OpenCV runtime must have CUDA support or model loading/forwarding may fail.
|
|
||||||
/// </summary>
|
|
||||||
public bool UseGpu { get; set; } = false;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// When enabled, recognition crops will be saved to disk under
|
|
||||||
/// "logs/crops" for diagnostic inspection. Disabled by default.
|
|
||||||
/// </summary>
|
|
||||||
public bool EnableCropSaving { get; set; } = false;
|
public bool EnableCropSaving { get; set; } = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -12,32 +12,8 @@ using System.Threading.Tasks;
|
||||||
namespace AIFotoONLUS.Core
|
namespace AIFotoONLUS.Core
|
||||||
{
|
{
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// NumberRecognitionEngine is a high-level wrapper that loads Darknet (YOLO)
|
/// NumberRecognitionEngine: loads Darknet models via OpenCvSharp and
|
||||||
/// models through OpenCvSharp's DNN API and exposes simple synchronous and
|
/// provides methods to detect text regions and recognize digits.
|
||||||
/// asynchronous methods to detect numeric text regions in images and recognize
|
|
||||||
/// the digits contained within those regions.
|
|
||||||
///
|
|
||||||
/// Overview
|
|
||||||
/// - Loads two Darknet networks: a detection network (finds text regions)
|
|
||||||
/// and a recognition network (recognizes digits inside a cropped region).
|
|
||||||
/// - Uses OpenCvSharp (CvDnn) to create input blobs, run forward passes and
|
|
||||||
/// perform non‑maximum suppression (NMS) on detection candidates.
|
|
||||||
/// - Provides single-image and directory-level processing APIs. Directory
|
|
||||||
/// processing supports parallel workers where each worker uses its own
|
|
||||||
/// per-thread Net instances to allow concurrent forward calls.
|
|
||||||
///
|
|
||||||
/// Threading and performance notes
|
|
||||||
/// - The class constructs and owns two shared Net instances used by the
|
|
||||||
/// simple (single-threaded) APIs. When doing parallel processing the
|
|
||||||
/// implementation creates per-thread Net instances to avoid concurrent
|
|
||||||
/// calls into the same Net object. A small fallback path exists that will
|
|
||||||
/// call into the shared nets under a lock when needed.
|
|
||||||
/// - OpenCV internal threading is enabled (Cv2.SetNumThreads) when supported.
|
|
||||||
///
|
|
||||||
/// Diagnostics
|
|
||||||
/// - When enabled via the configuration, crops may be saved to disk for
|
|
||||||
/// debugging. The <see cref="ModelConfiguration"/> contains thresholds and
|
|
||||||
/// paths used by the engine.
|
|
||||||
/// </summary>
|
/// </summary>
|
||||||
using Microsoft.Extensions.Logging;
|
using Microsoft.Extensions.Logging;
|
||||||
|
|
||||||
|
|
@ -51,37 +27,11 @@ namespace AIFotoONLUS.Core
|
||||||
private readonly ILogger? _logger;
|
private readonly ILogger? _logger;
|
||||||
private bool _disposed;
|
private bool _disposed;
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Create a new instance of <see cref="NumberRecognitionEngine"/> using the
|
|
||||||
/// provided <see cref="ModelConfiguration"/>. The constructor loads the
|
|
||||||
/// detection and recognition Darknet model files and prepares the OpenCV
|
|
||||||
/// DNN nets for CPU inference.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="cfg">Model configuration containing file paths, thresholds
|
|
||||||
/// and other options. Must not be <c>null</c>.</param>
|
|
||||||
/// <remarks>
|
|
||||||
/// This constructor will throw <see cref="FileNotFoundException"/> when
|
|
||||||
/// any of the expected model files are missing. For logging purposes an
|
|
||||||
/// overload accepting an <see cref="ILogger"/> is available.
|
|
||||||
/// </remarks>
|
|
||||||
public NumberRecognitionEngine(ModelConfiguration cfg)
|
public NumberRecognitionEngine(ModelConfiguration cfg)
|
||||||
: this(cfg, logger: null)
|
: this(cfg, logger: null)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Create a new instance of <see cref="NumberRecognitionEngine"/> with an
|
|
||||||
/// optional <see cref="ILogger"/>. The logger will receive diagnostic
|
|
||||||
/// messages and errors produced by the engine during processing.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="cfg">Model configuration containing file paths and
|
|
||||||
/// runtime thresholds.</param>
|
|
||||||
/// <param name="logger">Optional logger for diagnostic messages.
|
|
||||||
/// May be <c>null</c>.</param>
|
|
||||||
/// <exception cref="ArgumentNullException">Thrown when <paramref name="cfg"/>
|
|
||||||
/// is <c>null</c>.</exception>
|
|
||||||
/// <exception cref="FileNotFoundException">Thrown when one of the model
|
|
||||||
/// files referenced by <paramref name="cfg"/> does not exist.</exception>
|
|
||||||
public NumberRecognitionEngine(ModelConfiguration cfg, ILogger? logger)
|
public NumberRecognitionEngine(ModelConfiguration cfg, ILogger? logger)
|
||||||
{
|
{
|
||||||
_logger = logger;
|
_logger = logger;
|
||||||
|
|
@ -95,8 +45,10 @@ namespace AIFotoONLUS.Core
|
||||||
_detectionNet = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights);
|
_detectionNet = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights);
|
||||||
_recognitionNet = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
|
_recognitionNet = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
|
||||||
|
|
||||||
ConfigureNetRuntime(_detectionNet, _cfg.UseGpu);
|
_detectionNet.SetPreferableBackend(Backend.OPENCV);
|
||||||
ConfigureNetRuntime(_recognitionNet, _cfg.UseGpu);
|
_detectionNet.SetPreferableTarget(Target.CPU);
|
||||||
|
_recognitionNet.SetPreferableBackend(Backend.OPENCV);
|
||||||
|
_recognitionNet.SetPreferableTarget(Target.CPU);
|
||||||
// Let OpenCV use multiple threads internally (use number of logical processors)
|
// Let OpenCV use multiple threads internally (use number of logical processors)
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
|
@ -106,11 +58,6 @@ namespace AIFotoONLUS.Core
|
||||||
{
|
{
|
||||||
// Ignore if not supported by OpenCvSharp build
|
// Ignore if not supported by OpenCvSharp build
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_cfg.UseGpu)
|
|
||||||
{
|
|
||||||
ValidateGpuRuntime();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Dispose()
|
public void Dispose()
|
||||||
|
|
@ -122,38 +69,6 @@ namespace AIFotoONLUS.Core
|
||||||
GC.SuppressFinalize(this);
|
GC.SuppressFinalize(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static bool TryValidateGpuRuntime(ModelConfiguration cfg, ILogger? logger, out string? failureMessage)
|
|
||||||
{
|
|
||||||
if (cfg is null) throw new ArgumentNullException(nameof(cfg));
|
|
||||||
|
|
||||||
var probeConfiguration = new ModelConfiguration
|
|
||||||
{
|
|
||||||
DetectionCfg = cfg.DetectionCfg,
|
|
||||||
DetectionWeights = cfg.DetectionWeights,
|
|
||||||
RecognitionCfg = cfg.RecognitionCfg,
|
|
||||||
RecognitionWeights = cfg.RecognitionWeights,
|
|
||||||
ConfidenceThreshold = cfg.ConfidenceThreshold,
|
|
||||||
NmsThreshold = cfg.NmsThreshold,
|
|
||||||
DetectionInputSize = cfg.DetectionInputSize,
|
|
||||||
RecognitionInputSize = cfg.RecognitionInputSize,
|
|
||||||
NumberClasses = cfg.NumberClasses,
|
|
||||||
EnableCropSaving = cfg.EnableCropSaving,
|
|
||||||
UseGpu = true
|
|
||||||
};
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
using var engine = new NumberRecognitionEngine(probeConfiguration, logger);
|
|
||||||
failureMessage = null;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
failureMessage = ex.GetBaseException().Message;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string SanitizeFileName(string name)
|
private static string SanitizeFileName(string name)
|
||||||
{
|
{
|
||||||
foreach (var c in Path.GetInvalidFileNameChars()) name = name.Replace(c, '_');
|
foreach (var c in Path.GetInvalidFileNameChars()) name = name.Replace(c, '_');
|
||||||
|
|
@ -162,48 +77,6 @@ namespace AIFotoONLUS.Core
|
||||||
|
|
||||||
private string[] GetOutputLayerNames(Net net) => net.GetUnconnectedOutLayersNames();
|
private string[] GetOutputLayerNames(Net net) => net.GetUnconnectedOutLayersNames();
|
||||||
|
|
||||||
private static void ConfigureNetRuntime(Net net, bool useGpu)
|
|
||||||
{
|
|
||||||
if (useGpu)
|
|
||||||
{
|
|
||||||
net.SetPreferableBackend(Backend.CUDA);
|
|
||||||
net.SetPreferableTarget(Target.CUDA);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
net.SetPreferableBackend(Backend.OPENCV);
|
|
||||||
net.SetPreferableTarget(Target.CPU);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void ValidateGpuRuntime()
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
using var detectionProbe = new Mat(_cfg.DetectionInputSize.Height, _cfg.DetectionInputSize.Width, MatType.CV_8UC3, Scalar.All(0));
|
|
||||||
_ = DetectTextRegions(_detectionNet, detectionProbe).Take(1).ToArray();
|
|
||||||
|
|
||||||
using var recognitionProbe = new Mat(_cfg.RecognitionInputSize.Height, _cfg.RecognitionInputSize.Width, MatType.CV_8UC3, Scalar.All(0));
|
|
||||||
using var blob = CvDnn.BlobFromImage(recognitionProbe, 0.00392, _cfg.RecognitionInputSize, new Scalar(0, 0, 0), true, false);
|
|
||||||
_recognitionNet.SetInput(blob);
|
|
||||||
using var output = _recognitionNet.Forward();
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
throw new InvalidOperationException(
|
|
||||||
"OpenCV DNN CUDA runtime validation failed. Disable number AI GPU mode or use an OpenCV runtime built with CUDA DNN support.",
|
|
||||||
ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Detect text regions in the supplied image using the detection network.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="image">Input image as an OpenCvSharp <see cref="Mat"/>.
|
|
||||||
/// Must not be <c>null</c>.</param>
|
|
||||||
/// <returns>An enumerable of <see cref="DetectedRegion"/> containing the
|
|
||||||
/// bounding boxes, confidence and class information for each detected
|
|
||||||
/// region. The results are already filtered with the configured
|
|
||||||
/// confidence and NMS thresholds.</returns>
|
|
||||||
public IEnumerable<DetectedRegion> DetectTextRegions(Mat image)
|
public IEnumerable<DetectedRegion> DetectTextRegions(Mat image)
|
||||||
{
|
{
|
||||||
if (image is null) throw new ArgumentNullException(nameof(image));
|
if (image is null) throw new ArgumentNullException(nameof(image));
|
||||||
|
|
@ -230,15 +103,15 @@ namespace AIFotoONLUS.Core
|
||||||
var fallback = new List<Mat>();
|
var fallback = new List<Mat>();
|
||||||
for (int on = 0; on < outNames.Length; on++)
|
for (int on = 0; on < outNames.Length; on++)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
var single = detectionNet.Forward(outNames[on]);
|
var single = detectionNet.Forward(outNames[on]);
|
||||||
fallback.Add(single);
|
fallback.Add(single);
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
_logger?.LogError(ex, "Fallback Forward failed for {name}", outNames[on]);
|
_logger?.LogError(ex, "Fallback Forward failed for {name}", outNames[on]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (fallback.Count > 0)
|
if (fallback.Count > 0)
|
||||||
{
|
{
|
||||||
|
|
@ -289,15 +162,15 @@ namespace AIFotoONLUS.Core
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maxScore > _cfg.ConfidenceThreshold)
|
if (maxScore > _cfg.ConfidenceThreshold)
|
||||||
{
|
{
|
||||||
int x = (int)Math.Max(0, Math.Round(cx - w / 2));
|
int x = (int)Math.Max(0, Math.Round(cx - w / 2));
|
||||||
int y = (int)Math.Max(0, Math.Round(cy - h / 2));
|
int y = (int)Math.Max(0, Math.Round(cy - h / 2));
|
||||||
var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h));
|
var rect = new Rect(x, y, (int)Math.Round(w), (int)Math.Round(h));
|
||||||
boxes.Add(rect);
|
boxes.Add(rect);
|
||||||
confidences.Add(maxScore);
|
confidences.Add(maxScore);
|
||||||
classIds.Add(bestClass);
|
classIds.Add(bestClass);
|
||||||
centerXList.Add(cx);
|
centerXList.Add(cx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -317,18 +190,6 @@ namespace AIFotoONLUS.Core
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Recognize digits inside a cropped image region using the recognition
|
|
||||||
/// network. The method runs the recognition network and returns the
|
|
||||||
/// concatenated sequence of recognized digit labels ordered left-to-right.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="croppedImage">Cropped image containing digits as
|
|
||||||
/// <see cref="Mat"/>. Must not be <c>null</c>.</param>
|
|
||||||
/// <param name="context">Optional context string used for diagnostics
|
|
||||||
/// (e.g. when saving crop image files).</param>
|
|
||||||
/// <returns>A string containing recognized digits in left-to-right order.
|
|
||||||
/// Returns an empty string when no digits are recognized above the
|
|
||||||
/// configured confidence threshold.</returns>
|
|
||||||
public string RecognizeDigits(Mat croppedImage, string? context = null)
|
public string RecognizeDigits(Mat croppedImage, string? context = null)
|
||||||
{
|
{
|
||||||
if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage));
|
if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage));
|
||||||
|
|
@ -426,31 +287,12 @@ namespace AIFotoONLUS.Core
|
||||||
return string.Concat(ordered);
|
return string.Concat(ordered);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Small DTO that describes the name and shape of a detection network
|
|
||||||
/// forward output used for diagnostics.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="Name">Layer/output name.</param>
|
|
||||||
/// <param name="Rows">Number of rows in the output Mat.</param>
|
|
||||||
/// <param name="Cols">Number of columns in the output Mat.</param>
|
|
||||||
public record DetectionOutput(string Name, int Rows, int Cols);
|
public record DetectionOutput(string Name, int Rows, int Cols);
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Result returned by <see cref="ProcessFileWithDiagnostics"/>, contains
|
|
||||||
/// the recognized text result and an array describing detection network
|
|
||||||
/// forward outputs (shapes and names) which are useful for debugging
|
|
||||||
/// model output layout mismatches.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="Result">Recognition result for the processed image.</param>
|
|
||||||
/// <param name="DetectionOutputs">Array describing detection net outputs.</param>
|
|
||||||
public record DiagnosticResult(ImageResult Result, DetectionOutput[] DetectionOutputs);
|
public record DiagnosticResult(ImageResult Result, DetectionOutput[] DetectionOutputs);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Process a single image file and return the recognition result together
|
/// Process a single image file and return the recognition result together with
|
||||||
/// with detection network forward output shapes for diagnostics. This
|
/// detection network forward output shapes for diagnostics.
|
||||||
/// method reads the image from disk, runs a forward pass over the
|
|
||||||
/// detection network to capture the raw output Mat shapes and then calls
|
|
||||||
/// the normal processing pipeline to return the recognized text.
|
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public DiagnosticResult ProcessFileWithDiagnostics(string filePath)
|
public DiagnosticResult ProcessFileWithDiagnostics(string filePath)
|
||||||
{
|
{
|
||||||
|
|
@ -488,16 +330,6 @@ namespace AIFotoONLUS.Core
|
||||||
return new DiagnosticResult(imgRes, outputs);
|
return new DiagnosticResult(imgRes, outputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Process a single image file and return the recognized text as an
|
|
||||||
/// <see cref="ImageResult"/>. The method detects candidate text regions
|
|
||||||
/// and runs recognition on each crop. Multiple recognized digit sequences
|
|
||||||
/// are joined with a comma in the returned <see cref="ImageResult.Text"/>.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="filePath">Path to an image file on disk. Supported
|
|
||||||
/// formats depend on OpenCV (typically JPEG, PNG, ...).</param>
|
|
||||||
/// <returns>An <see cref="ImageResult"/> containing the file name and
|
|
||||||
/// recognized text (possibly empty).</returns>
|
|
||||||
public ImageResult ProcessImage(string filePath)
|
public ImageResult ProcessImage(string filePath)
|
||||||
{
|
{
|
||||||
if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath);
|
if (!File.Exists(filePath)) throw new FileNotFoundException("Image not found", filePath);
|
||||||
|
|
@ -519,14 +351,6 @@ namespace AIFotoONLUS.Core
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Process all JPEG images in a directory and return the recognition
|
|
||||||
/// results. This is a blocking wrapper over <see cref="ProcessDirectoryAsync"/>.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="directoryPath">Path to a directory containing images.</param>
|
|
||||||
/// <param name="skipTextNegative">If true, files whose names start with
|
|
||||||
/// "tn_" will be skipped (convention used to mark text-negative images).</param>
|
|
||||||
/// <returns>Collection of <see cref="ImageResult"/> ordered by file name.</returns>
|
|
||||||
public IEnumerable<ImageResult> ProcessDirectory(string directoryPath, bool skipTextNegative = false)
|
public IEnumerable<ImageResult> ProcessDirectory(string directoryPath, bool skipTextNegative = false)
|
||||||
{
|
{
|
||||||
// Simple wrapper over async implementation
|
// Simple wrapper over async implementation
|
||||||
|
|
@ -554,8 +378,10 @@ namespace AIFotoONLUS.Core
|
||||||
{
|
{
|
||||||
var det = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights);
|
var det = CvDnn.ReadNetFromDarknet(_cfg.DetectionCfg, _cfg.DetectionWeights);
|
||||||
var rec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
|
var rec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
|
||||||
ConfigureNetRuntime(det, _cfg.UseGpu);
|
det.SetPreferableBackend(Backend.OPENCV);
|
||||||
ConfigureNetRuntime(rec, _cfg.UseGpu);
|
det.SetPreferableTarget(Target.CPU);
|
||||||
|
rec.SetPreferableBackend(Backend.OPENCV);
|
||||||
|
rec.SetPreferableTarget(Target.CPU);
|
||||||
netsBag.Add((det, rec));
|
netsBag.Add((det, rec));
|
||||||
return (det, rec);
|
return (det, rec);
|
||||||
});
|
});
|
||||||
|
|
@ -591,7 +417,8 @@ namespace AIFotoONLUS.Core
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
using var tempRec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
|
using var tempRec = CvDnn.ReadNetFromDarknet(_cfg.RecognitionCfg, _cfg.RecognitionWeights);
|
||||||
ConfigureNetRuntime(tempRec, _cfg.UseGpu);
|
tempRec.SetPreferableBackend(Backend.OPENCV);
|
||||||
|
tempRec.SetPreferableTarget(Target.CPU);
|
||||||
var alt = RecognizeDigits(crop, tempRec, ctx);
|
var alt = RecognizeDigits(crop, tempRec, ctx);
|
||||||
if (!string.IsNullOrEmpty(alt)) txt = alt;
|
if (!string.IsNullOrEmpty(alt)) txt = alt;
|
||||||
}
|
}
|
||||||
|
|
@ -677,16 +504,6 @@ namespace AIFotoONLUS.Core
|
||||||
}
|
}
|
||||||
|
|
||||||
// Overload RecognizeDigits that accepts a Net for worker threads
|
// Overload RecognizeDigits that accepts a Net for worker threads
|
||||||
/// <summary>
|
|
||||||
/// Worker overload of <see cref="RecognizeDigits(Mat,string?)"/> that
|
|
||||||
/// accepts a <see cref="Net"/> instance. This is used by the parallel
|
|
||||||
/// processing pipeline where each worker owns its own Net instance.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="croppedImage">Cropped region to recognize.</param>
|
|
||||||
/// <param name="recognitionNet">Recognition <see cref="Net"/> to execute
|
|
||||||
/// the forward pass with.</param>
|
|
||||||
/// <param name="context">Optional context string for diagnostics.</param>
|
|
||||||
/// <returns>Recognized digit sequence or empty string.</returns>
|
|
||||||
private string RecognizeDigits(Mat croppedImage, Net recognitionNet, string? context = null)
|
private string RecognizeDigits(Mat croppedImage, Net recognitionNet, string? context = null)
|
||||||
{
|
{
|
||||||
if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage));
|
if (croppedImage is null) throw new ArgumentNullException(nameof(croppedImage));
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,4 @@
|
||||||
namespace AIFotoONLUS.Core
|
namespace AIFotoONLUS.Core
|
||||||
{
|
{
|
||||||
/// <summary>
|
|
||||||
/// Progress statistics reported during directory processing.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="TotalFiles">Total number of image files to process.</param>
|
|
||||||
/// <param name="ProcessedFiles">Number of files processed so far.</param>
|
|
||||||
/// <param name="ImagesPerSecond">Current processing throughput in images/second.</param>
|
|
||||||
public record ProcessingStats(int TotalFiles, int ProcessedFiles, double ImagesPerSecond);
|
public record ProcessingStats(int TotalFiles, int ProcessedFiles, double ImagesPerSecond);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue