diff --git a/MaddoShared.Tests/Test1.cs b/MaddoShared.Tests/Test1.cs index da8e098..d8dc33b 100644 --- a/MaddoShared.Tests/Test1.cs +++ b/MaddoShared.Tests/Test1.cs @@ -1,6 +1,7 @@ using ImageCatalog_2.Services; using ImageCatalog_2.Models; using Shouldly; +using System.Text; namespace MaddoShared.Tests; @@ -8,7 +9,7 @@ namespace MaddoShared.Tests; public sealed class AiExtractionServiceCsvTests { [TestMethod] - public void WriteCsvOutput_UsesLegacyCompatibleHeaderAndFilenameColumn() + public void WriteCsvOutput_UsesLegacyCompatibleQuotingAndSkipsEmptyResults() { using var tempDir = new TempDirectory(); var csvPath = Path.Combine(tempDir.Path, "ocr.csv"); @@ -16,15 +17,35 @@ public sealed class AiExtractionServiceCsvTests AiExtractionService.WriteCsvOutput( csvPath, [ - new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43,84,61" }, - new AiResultItem { Path = @"C:\images\IMG_7207.JPG", Text = "a\"b" } + new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43" }, + new AiResultItem { Path = @"C:\images\IMG_7207.JPG", Text = "43,84,61" }, + new AiResultItem { Path = @"C:\images\IMG_7208.JPG", Text = string.Empty }, + new AiResultItem { Path = @"C:\images\IMG_7209.JPG", Text = " " }, + new AiResultItem { Path = @"C:\images\IMG_7210.JPG", Text = "a\"b,c" } ]); - var lines = File.ReadAllLines(csvPath); + var csv = File.ReadAllText(csvPath, Encoding.UTF8); - lines[0].ShouldBe("filename,text"); - lines[1].ShouldBe("\"IMG_7146.JPG\",\"43,84,61\""); - lines[2].ShouldBe("\"IMG_7207.JPG\",\"a\"\"b\""); + csv.ShouldBe("filename,text\r" + + "IMG_7146.JPG,43\r" + + "IMG_7207.JPG,\"43,84,61\"\r" + + "IMG_7210.JPG,\"a\"\"b,c\"\r"); + } + + [TestMethod] + public void WriteCsvOutput_WritesUtf8WithoutBom() + { + using var tempDir = new TempDirectory(); + var csvPath = Path.Combine(tempDir.Path, "ocr.csv"); + + AiExtractionService.WriteCsvOutput( + csvPath, + [new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43" }]); + + var bytes = File.ReadAllBytes(csvPath); + + bytes[.."filename,text\r".Length].ShouldBe(Encoding.ASCII.GetBytes("filename,text\r")); + bytes[..3].ShouldNotBe(new byte[] { 0xEF, 0xBB, 0xBF }); } private sealed class TempDirectory : IDisposable diff --git a/imagecatalog/Services/AiExtractionService.cs b/imagecatalog/Services/AiExtractionService.cs index 685a43f..b8f413a 100644 --- a/imagecatalog/Services/AiExtractionService.cs +++ b/imagecatalog/Services/AiExtractionService.cs @@ -217,16 +217,31 @@ public class AiExtractionService : IAiExtractionService Directory.CreateDirectory(dir); } - using var sw = new StreamWriter(csvOutputPath, false, Encoding.UTF8); + using var sw = new StreamWriter(csvOutputPath, false, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false)); + sw.NewLine = "\r"; sw.WriteLine("filename,text"); foreach (var result in extractedResults) { + var text = result.Text ?? string.Empty; + if (string.IsNullOrWhiteSpace(text)) + { + continue; + } + var csvFileName = Path.GetFileName(result.Path ?? string.Empty); - var safeText = (result.Text ?? string.Empty).Replace("\"", "\"\""); - sw.WriteLine($"\"{csvFileName}\",\"{safeText}\""); + sw.Write(csvFileName); + sw.Write(','); + sw.WriteLine(FormatLegacyTextField(text)); } } + private static string FormatLegacyTextField(string text) + { + return text.Contains(',', StringComparison.Ordinal) + ? $"\"{text.Replace("\"", "\"\"")}\"" + : text; + } + private static double CalculateAverageImagesPerSecond(int processed, TimeSpan elapsed) { return elapsed.TotalSeconds > 0 ? processed / elapsed.TotalSeconds : 0;