feat: Improve CSV output handling in AiExtractionService, including legacy quoting and empty result skipping

This commit is contained in:
Maddo 2026-05-31 15:27:08 +02:00
commit e68608312a
2 changed files with 46 additions and 10 deletions

View file

@ -1,6 +1,7 @@
using ImageCatalog_2.Services; using ImageCatalog_2.Services;
using ImageCatalog_2.Models; using ImageCatalog_2.Models;
using Shouldly; using Shouldly;
using System.Text;
namespace MaddoShared.Tests; namespace MaddoShared.Tests;
@ -8,7 +9,7 @@ namespace MaddoShared.Tests;
public sealed class AiExtractionServiceCsvTests public sealed class AiExtractionServiceCsvTests
{ {
[TestMethod] [TestMethod]
public void WriteCsvOutput_UsesLegacyCompatibleHeaderAndFilenameColumn() public void WriteCsvOutput_UsesLegacyCompatibleQuotingAndSkipsEmptyResults()
{ {
using var tempDir = new TempDirectory(); using var tempDir = new TempDirectory();
var csvPath = Path.Combine(tempDir.Path, "ocr.csv"); var csvPath = Path.Combine(tempDir.Path, "ocr.csv");
@ -16,15 +17,35 @@ public sealed class AiExtractionServiceCsvTests
AiExtractionService.WriteCsvOutput( AiExtractionService.WriteCsvOutput(
csvPath, csvPath,
[ [
new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43,84,61" }, new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43" },
new AiResultItem { Path = @"C:\images\IMG_7207.JPG", Text = "a\"b" } new AiResultItem { Path = @"C:\images\IMG_7207.JPG", Text = "43,84,61" },
new AiResultItem { Path = @"C:\images\IMG_7208.JPG", Text = string.Empty },
new AiResultItem { Path = @"C:\images\IMG_7209.JPG", Text = " " },
new AiResultItem { Path = @"C:\images\IMG_7210.JPG", Text = "a\"b,c" }
]); ]);
var lines = File.ReadAllLines(csvPath); var csv = File.ReadAllText(csvPath, Encoding.UTF8);
lines[0].ShouldBe("filename,text"); csv.ShouldBe("filename,text\r"
lines[1].ShouldBe("\"IMG_7146.JPG\",\"43,84,61\""); + "IMG_7146.JPG,43\r"
lines[2].ShouldBe("\"IMG_7207.JPG\",\"a\"\"b\""); + "IMG_7207.JPG,\"43,84,61\"\r"
+ "IMG_7210.JPG,\"a\"\"b,c\"\r");
}
[TestMethod]
public void WriteCsvOutput_WritesUtf8WithoutBom()
{
using var tempDir = new TempDirectory();
var csvPath = Path.Combine(tempDir.Path, "ocr.csv");
AiExtractionService.WriteCsvOutput(
csvPath,
[new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43" }]);
var bytes = File.ReadAllBytes(csvPath);
bytes[.."filename,text\r".Length].ShouldBe(Encoding.ASCII.GetBytes("filename,text\r"));
bytes[..3].ShouldNotBe(new byte[] { 0xEF, 0xBB, 0xBF });
} }
private sealed class TempDirectory : IDisposable private sealed class TempDirectory : IDisposable

View file

@ -217,16 +217,31 @@ public class AiExtractionService : IAiExtractionService
Directory.CreateDirectory(dir); Directory.CreateDirectory(dir);
} }
using var sw = new StreamWriter(csvOutputPath, false, Encoding.UTF8); using var sw = new StreamWriter(csvOutputPath, false, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false));
sw.NewLine = "\r";
sw.WriteLine("filename,text"); sw.WriteLine("filename,text");
foreach (var result in extractedResults) foreach (var result in extractedResults)
{ {
var text = result.Text ?? string.Empty;
if (string.IsNullOrWhiteSpace(text))
{
continue;
}
var csvFileName = Path.GetFileName(result.Path ?? string.Empty); var csvFileName = Path.GetFileName(result.Path ?? string.Empty);
var safeText = (result.Text ?? string.Empty).Replace("\"", "\"\""); sw.Write(csvFileName);
sw.WriteLine($"\"{csvFileName}\",\"{safeText}\""); sw.Write(',');
sw.WriteLine(FormatLegacyTextField(text));
} }
} }
private static string FormatLegacyTextField(string text)
{
return text.Contains(',', StringComparison.Ordinal)
? $"\"{text.Replace("\"", "\"\"")}\""
: text;
}
private static double CalculateAverageImagesPerSecond(int processed, TimeSpan elapsed) private static double CalculateAverageImagesPerSecond(int processed, TimeSpan elapsed)
{ {
return elapsed.TotalSeconds > 0 ? processed / elapsed.TotalSeconds : 0; return elapsed.TotalSeconds > 0 ? processed / elapsed.TotalSeconds : 0;