feat: Improve CSV output handling in AiExtractionService, including legacy quoting and empty result skipping

This commit is contained in:
Maddo 2026-05-31 15:27:08 +02:00
commit e68608312a
2 changed files with 46 additions and 10 deletions

View file

@ -217,16 +217,31 @@ public class AiExtractionService : IAiExtractionService
Directory.CreateDirectory(dir);
}
using var sw = new StreamWriter(csvOutputPath, false, Encoding.UTF8);
using var sw = new StreamWriter(csvOutputPath, false, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false));
sw.NewLine = "\r";
sw.WriteLine("filename,text");
foreach (var result in extractedResults)
{
var text = result.Text ?? string.Empty;
if (string.IsNullOrWhiteSpace(text))
{
continue;
}
var csvFileName = Path.GetFileName(result.Path ?? string.Empty);
var safeText = (result.Text ?? string.Empty).Replace("\"", "\"\"");
sw.WriteLine($"\"{csvFileName}\",\"{safeText}\"");
sw.Write(csvFileName);
sw.Write(',');
sw.WriteLine(FormatLegacyTextField(text));
}
}
private static string FormatLegacyTextField(string text)
{
return text.Contains(',', StringComparison.Ordinal)
? $"\"{text.Replace("\"", "\"\"")}\""
: text;
}
private static double CalculateAverageImagesPerSecond(int processed, TimeSpan elapsed)
{
return elapsed.TotalSeconds > 0 ? processed / elapsed.TotalSeconds : 0;