feat: Improve CSV output handling in AiExtractionService, including legacy quoting and empty result skipping
This commit is contained in:
parent
d76e133f18
commit
e68608312a
2 changed files with 46 additions and 10 deletions
|
|
@ -1,6 +1,7 @@
|
|||
using ImageCatalog_2.Services;
|
||||
using ImageCatalog_2.Models;
|
||||
using Shouldly;
|
||||
using System.Text;
|
||||
|
||||
namespace MaddoShared.Tests;
|
||||
|
||||
|
|
@ -8,7 +9,7 @@ namespace MaddoShared.Tests;
|
|||
public sealed class AiExtractionServiceCsvTests
|
||||
{
|
||||
[TestMethod]
|
||||
public void WriteCsvOutput_UsesLegacyCompatibleHeaderAndFilenameColumn()
|
||||
public void WriteCsvOutput_UsesLegacyCompatibleQuotingAndSkipsEmptyResults()
|
||||
{
|
||||
using var tempDir = new TempDirectory();
|
||||
var csvPath = Path.Combine(tempDir.Path, "ocr.csv");
|
||||
|
|
@ -16,15 +17,35 @@ public sealed class AiExtractionServiceCsvTests
|
|||
AiExtractionService.WriteCsvOutput(
|
||||
csvPath,
|
||||
[
|
||||
new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43,84,61" },
|
||||
new AiResultItem { Path = @"C:\images\IMG_7207.JPG", Text = "a\"b" }
|
||||
new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43" },
|
||||
new AiResultItem { Path = @"C:\images\IMG_7207.JPG", Text = "43,84,61" },
|
||||
new AiResultItem { Path = @"C:\images\IMG_7208.JPG", Text = string.Empty },
|
||||
new AiResultItem { Path = @"C:\images\IMG_7209.JPG", Text = " " },
|
||||
new AiResultItem { Path = @"C:\images\IMG_7210.JPG", Text = "a\"b,c" }
|
||||
]);
|
||||
|
||||
var lines = File.ReadAllLines(csvPath);
|
||||
var csv = File.ReadAllText(csvPath, Encoding.UTF8);
|
||||
|
||||
lines[0].ShouldBe("filename,text");
|
||||
lines[1].ShouldBe("\"IMG_7146.JPG\",\"43,84,61\"");
|
||||
lines[2].ShouldBe("\"IMG_7207.JPG\",\"a\"\"b\"");
|
||||
csv.ShouldBe("filename,text\r"
|
||||
+ "IMG_7146.JPG,43\r"
|
||||
+ "IMG_7207.JPG,\"43,84,61\"\r"
|
||||
+ "IMG_7210.JPG,\"a\"\"b,c\"\r");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void WriteCsvOutput_WritesUtf8WithoutBom()
|
||||
{
|
||||
using var tempDir = new TempDirectory();
|
||||
var csvPath = Path.Combine(tempDir.Path, "ocr.csv");
|
||||
|
||||
AiExtractionService.WriteCsvOutput(
|
||||
csvPath,
|
||||
[new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43" }]);
|
||||
|
||||
var bytes = File.ReadAllBytes(csvPath);
|
||||
|
||||
bytes[.."filename,text\r".Length].ShouldBe(Encoding.ASCII.GetBytes("filename,text\r"));
|
||||
bytes[..3].ShouldNotBe(new byte[] { 0xEF, 0xBB, 0xBF });
|
||||
}
|
||||
|
||||
private sealed class TempDirectory : IDisposable
|
||||
|
|
|
|||
|
|
@ -217,16 +217,31 @@ public class AiExtractionService : IAiExtractionService
|
|||
Directory.CreateDirectory(dir);
|
||||
}
|
||||
|
||||
using var sw = new StreamWriter(csvOutputPath, false, Encoding.UTF8);
|
||||
using var sw = new StreamWriter(csvOutputPath, false, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false));
|
||||
sw.NewLine = "\r";
|
||||
sw.WriteLine("filename,text");
|
||||
foreach (var result in extractedResults)
|
||||
{
|
||||
var text = result.Text ?? string.Empty;
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var csvFileName = Path.GetFileName(result.Path ?? string.Empty);
|
||||
var safeText = (result.Text ?? string.Empty).Replace("\"", "\"\"");
|
||||
sw.WriteLine($"\"{csvFileName}\",\"{safeText}\"");
|
||||
sw.Write(csvFileName);
|
||||
sw.Write(',');
|
||||
sw.WriteLine(FormatLegacyTextField(text));
|
||||
}
|
||||
}
|
||||
|
||||
private static string FormatLegacyTextField(string text)
|
||||
{
|
||||
return text.Contains(',', StringComparison.Ordinal)
|
||||
? $"\"{text.Replace("\"", "\"\"")}\""
|
||||
: text;
|
||||
}
|
||||
|
||||
private static double CalculateAverageImagesPerSecond(int processed, TimeSpan elapsed)
|
||||
{
|
||||
return elapsed.TotalSeconds > 0 ? processed / elapsed.TotalSeconds : 0;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue