feat: Improve CSV output handling in AiExtractionService, including legacy quoting and empty result skipping
This commit is contained in:
parent
d76e133f18
commit
e68608312a
2 changed files with 46 additions and 10 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
using ImageCatalog_2.Services;
|
using ImageCatalog_2.Services;
|
||||||
using ImageCatalog_2.Models;
|
using ImageCatalog_2.Models;
|
||||||
using Shouldly;
|
using Shouldly;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
namespace MaddoShared.Tests;
|
namespace MaddoShared.Tests;
|
||||||
|
|
||||||
|
|
@ -8,7 +9,7 @@ namespace MaddoShared.Tests;
|
||||||
public sealed class AiExtractionServiceCsvTests
|
public sealed class AiExtractionServiceCsvTests
|
||||||
{
|
{
|
||||||
[TestMethod]
|
[TestMethod]
|
||||||
public void WriteCsvOutput_UsesLegacyCompatibleHeaderAndFilenameColumn()
|
public void WriteCsvOutput_UsesLegacyCompatibleQuotingAndSkipsEmptyResults()
|
||||||
{
|
{
|
||||||
using var tempDir = new TempDirectory();
|
using var tempDir = new TempDirectory();
|
||||||
var csvPath = Path.Combine(tempDir.Path, "ocr.csv");
|
var csvPath = Path.Combine(tempDir.Path, "ocr.csv");
|
||||||
|
|
@ -16,15 +17,35 @@ public sealed class AiExtractionServiceCsvTests
|
||||||
AiExtractionService.WriteCsvOutput(
|
AiExtractionService.WriteCsvOutput(
|
||||||
csvPath,
|
csvPath,
|
||||||
[
|
[
|
||||||
new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43,84,61" },
|
new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43" },
|
||||||
new AiResultItem { Path = @"C:\images\IMG_7207.JPG", Text = "a\"b" }
|
new AiResultItem { Path = @"C:\images\IMG_7207.JPG", Text = "43,84,61" },
|
||||||
|
new AiResultItem { Path = @"C:\images\IMG_7208.JPG", Text = string.Empty },
|
||||||
|
new AiResultItem { Path = @"C:\images\IMG_7209.JPG", Text = " " },
|
||||||
|
new AiResultItem { Path = @"C:\images\IMG_7210.JPG", Text = "a\"b,c" }
|
||||||
]);
|
]);
|
||||||
|
|
||||||
var lines = File.ReadAllLines(csvPath);
|
var csv = File.ReadAllText(csvPath, Encoding.UTF8);
|
||||||
|
|
||||||
lines[0].ShouldBe("filename,text");
|
csv.ShouldBe("filename,text\r"
|
||||||
lines[1].ShouldBe("\"IMG_7146.JPG\",\"43,84,61\"");
|
+ "IMG_7146.JPG,43\r"
|
||||||
lines[2].ShouldBe("\"IMG_7207.JPG\",\"a\"\"b\"");
|
+ "IMG_7207.JPG,\"43,84,61\"\r"
|
||||||
|
+ "IMG_7210.JPG,\"a\"\"b,c\"\r");
|
||||||
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void WriteCsvOutput_WritesUtf8WithoutBom()
|
||||||
|
{
|
||||||
|
using var tempDir = new TempDirectory();
|
||||||
|
var csvPath = Path.Combine(tempDir.Path, "ocr.csv");
|
||||||
|
|
||||||
|
AiExtractionService.WriteCsvOutput(
|
||||||
|
csvPath,
|
||||||
|
[new AiResultItem { Path = @"C:\images\IMG_7146.JPG", Text = "43" }]);
|
||||||
|
|
||||||
|
var bytes = File.ReadAllBytes(csvPath);
|
||||||
|
|
||||||
|
bytes[.."filename,text\r".Length].ShouldBe(Encoding.ASCII.GetBytes("filename,text\r"));
|
||||||
|
bytes[..3].ShouldNotBe(new byte[] { 0xEF, 0xBB, 0xBF });
|
||||||
}
|
}
|
||||||
|
|
||||||
private sealed class TempDirectory : IDisposable
|
private sealed class TempDirectory : IDisposable
|
||||||
|
|
|
||||||
|
|
@ -217,16 +217,31 @@ public class AiExtractionService : IAiExtractionService
|
||||||
Directory.CreateDirectory(dir);
|
Directory.CreateDirectory(dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
using var sw = new StreamWriter(csvOutputPath, false, Encoding.UTF8);
|
using var sw = new StreamWriter(csvOutputPath, false, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false));
|
||||||
|
sw.NewLine = "\r";
|
||||||
sw.WriteLine("filename,text");
|
sw.WriteLine("filename,text");
|
||||||
foreach (var result in extractedResults)
|
foreach (var result in extractedResults)
|
||||||
{
|
{
|
||||||
|
var text = result.Text ?? string.Empty;
|
||||||
|
if (string.IsNullOrWhiteSpace(text))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
var csvFileName = Path.GetFileName(result.Path ?? string.Empty);
|
var csvFileName = Path.GetFileName(result.Path ?? string.Empty);
|
||||||
var safeText = (result.Text ?? string.Empty).Replace("\"", "\"\"");
|
sw.Write(csvFileName);
|
||||||
sw.WriteLine($"\"{csvFileName}\",\"{safeText}\"");
|
sw.Write(',');
|
||||||
|
sw.WriteLine(FormatLegacyTextField(text));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static string FormatLegacyTextField(string text)
|
||||||
|
{
|
||||||
|
return text.Contains(',', StringComparison.Ordinal)
|
||||||
|
? $"\"{text.Replace("\"", "\"\"")}\""
|
||||||
|
: text;
|
||||||
|
}
|
||||||
|
|
||||||
private static double CalculateAverageImagesPerSecond(int processed, TimeSpan elapsed)
|
private static double CalculateAverageImagesPerSecond(int processed, TimeSpan elapsed)
|
||||||
{
|
{
|
||||||
return elapsed.TotalSeconds > 0 ? processed / elapsed.TotalSeconds : 0;
|
return elapsed.TotalSeconds > 0 ? processed / elapsed.TotalSeconds : 0;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue