feat: Improve CSV output handling in AiExtractionService, including legacy quoting and empty result skipping

This commit is contained in:
Maddo 2026-05-31 15:27:08 +02:00
commit e68608312a
2 changed files with 46 additions and 10 deletions

View file

@ -1,6 +1,7 @@
using ImageCatalog_2.Services;
using ImageCatalog_2.Models;
using Shouldly;
using System.Text;
namespace MaddoShared.Tests;
@ -8,7 +9,7 @@ namespace MaddoShared.Tests;
public sealed class AiExtractionServiceCsvTests
{
[TestMethod]
public void WriteCsvOutput_UsesLegacyCompatibleHeaderAndFilenameColumn()
public void WriteCsvOutput_UsesLegacyCompatibleQuotingAndSkipsEmptyResults()
{
using var tempDir = new TempDirectory();
var csvPath = Path.Combine(tempDir.Path, "ocr.csv");
@ -16,15 +17,35 @@ public sealed class AiExtractionServiceCsvTests
AiExtractionService.WriteCsvOutput(
csvPath,
[
new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43,84,61" },
new AiResultItem { Path = @"C:\images\IMG_7207.JPG", Text = "a\"b" }
new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43" },
new AiResultItem { Path = @"C:\images\IMG_7207.JPG", Text = "43,84,61" },
new AiResultItem { Path = @"C:\images\IMG_7208.JPG", Text = string.Empty },
new AiResultItem { Path = @"C:\images\IMG_7209.JPG", Text = " " },
new AiResultItem { Path = @"C:\images\IMG_7210.JPG", Text = "a\"b,c" }
]);
var lines = File.ReadAllLines(csvPath);
var csv = File.ReadAllText(csvPath, Encoding.UTF8);
lines[0].ShouldBe("filename,text");
lines[1].ShouldBe("\"IMG_7146.JPG\",\"43,84,61\"");
lines[2].ShouldBe("\"IMG_7207.JPG\",\"a\"\"b\"");
csv.ShouldBe("filename,text\r"
+ "IMG_7146.JPG,43\r"
+ "IMG_7207.JPG,\"43,84,61\"\r"
+ "IMG_7210.JPG,\"a\"\"b,c\"\r");
}
[TestMethod]
public void WriteCsvOutput_WritesUtf8WithoutBom()
{
using var tempDir = new TempDirectory();
var csvPath = Path.Combine(tempDir.Path, "ocr.csv");
AiExtractionService.WriteCsvOutput(
csvPath,
[new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43" }]);
var bytes = File.ReadAllBytes(csvPath);
bytes[.."filename,text\r".Length].ShouldBe(Encoding.ASCII.GetBytes("filename,text\r"));
bytes[..3].ShouldNotBe(new byte[] { 0xEF, 0xBB, 0xBF });
}
private sealed class TempDirectory : IDisposable

View file

@ -217,16 +217,31 @@ public class AiExtractionService : IAiExtractionService
Directory.CreateDirectory(dir);
}
using var sw = new StreamWriter(csvOutputPath, false, Encoding.UTF8);
using var sw = new StreamWriter(csvOutputPath, false, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false));
sw.NewLine = "\r";
sw.WriteLine("filename,text");
foreach (var result in extractedResults)
{
var text = result.Text ?? string.Empty;
if (string.IsNullOrWhiteSpace(text))
{
continue;
}
var csvFileName = Path.GetFileName(result.Path ?? string.Empty);
var safeText = (result.Text ?? string.Empty).Replace("\"", "\"\"");
sw.WriteLine($"\"{csvFileName}\",\"{safeText}\"");
sw.Write(csvFileName);
sw.Write(',');
sw.WriteLine(FormatLegacyTextField(text));
}
}
private static string FormatLegacyTextField(string text)
{
return text.Contains(',', StringComparison.Ordinal)
? $"\"{text.Replace("\"", "\"\"")}\""
: text;
}
private static double CalculateAverageImagesPerSecond(int processed, TimeSpan elapsed)
{
return elapsed.TotalSeconds > 0 ? processed / elapsed.TotalSeconds : 0;