Improve Autoampel page fetching with retry logic and consecutive failure handling; add URL inference for pagination

This commit is contained in:
2026-03-05 00:58:47 +03:00
parent 223da27094
commit 58e5009b04
+57 -11
View File
@@ -336,16 +336,7 @@ async Task RunMergeCoreDbMode(HsnTsnClient hsnTsnClient, string? inputPath, stri
} }
await BulkUpsertVehicleRowsAsync(conn, quotedTable, rowsToEnrich); await BulkUpsertVehicleRowsAsync(conn, quotedTable, rowsToEnrich);
var autoampelUpserted = 0; var autoampelUpserted = await UpsertAutoampelPagesIntoDbAsync(hsnTsnClient, conn, quotedTable);
try
{
autoampelUpserted = await UpsertAutoampelPagesIntoDbAsync(hsnTsnClient, conn, quotedTable);
}
catch (Exception ex)
{
failed++;
Console.Error.WriteLine($"[warn] autoampel page-upsert failed -> {ex.Message}");
}
Console.Error.WriteLine($"[info] Merge-core-db finished. checked={rowsToEnrich.Count}, filled_hsntsn={filledFromHsnTsn}, autoampel_upserted={autoampelUpserted}, failed={failed}"); Console.Error.WriteLine($"[info] Merge-core-db finished. checked={rowsToEnrich.Count}, filled_hsntsn={filledFromHsnTsn}, autoampel_upserted={autoampelUpserted}, failed={failed}");
} }
@@ -454,12 +445,51 @@ async Task<int> UpsertAutoampelPagesIntoDbAsync(HsnTsnClient hsnTsnClient, Npgsq
var pageUrl = hsnTsnClient.GetAutoampelFullListUrl(); var pageUrl = hsnTsnClient.GetAutoampelFullListUrl();
var pageIndex = 0; var pageIndex = 0;
var totalUpserted = 0; var totalUpserted = 0;
var consecutiveFailures = 0;
const int maxConsecutiveFailures = 8;
while (!string.IsNullOrWhiteSpace(pageUrl)) while (!string.IsNullOrWhiteSpace(pageUrl))
{ {
pageIndex++; pageIndex++;
Console.Error.WriteLine($"[info] Processing: {pageUrl}"); Console.Error.WriteLine($"[info] Processing: {pageUrl}");
var page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl); AutoampelPageResult? page = null;
const int maxAttempts = 7;
for (var attempt = 1; attempt <= maxAttempts; attempt++)
{
try
{
page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl);
break;
}
catch (HttpRequestException ex) when (attempt < maxAttempts)
{
var delaySeconds = IsTooManyRequests(ex) ? Math.Min(90, (int)Math.Pow(2, attempt)) : Math.Min(30, attempt * 3);
Console.Error.WriteLine($"[warn] Autoampel page fetch failed ({ex.Message}), retry in {delaySeconds}s (attempt {attempt}/{maxAttempts})");
await Task.Delay(TimeSpan.FromSeconds(delaySeconds));
}
}
if (page is null)
{
consecutiveFailures++;
Console.Error.WriteLine($"[warn] Autoampel page skipped after retries: {pageUrl}");
if (consecutiveFailures >= maxConsecutiveFailures)
{
Console.Error.WriteLine("[warn] Too many consecutive autoampel page failures, stopping page-upsert loop.");
break;
}
pageUrl = InferNextAutoampelListUrl(pageUrl);
if (string.IsNullOrWhiteSpace(pageUrl))
{
break;
}
continue;
}
consecutiveFailures = 0;
var rows = page.Vehicles var rows = page.Vehicles
.GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase) .GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase)
.Select(g => g.First()) .Select(g => g.First())
@@ -495,6 +525,22 @@ async Task<int> UpsertAutoampelPagesIntoDbAsync(HsnTsnClient hsnTsnClient, Npgsq
return totalUpserted; return totalUpserted;
} }
static string? InferNextAutoampelListUrl(string currentUrl)
{
var match = Regex.Match(currentUrl, @"^(?<base>https?://[^/]+/typklassen/liste/)(?<page>\d+)$", RegexOptions.IgnoreCase);
if (!match.Success)
{
return null;
}
if (!int.TryParse(match.Groups["page"].Value, out var currentPage))
{
return null;
}
return $"{match.Groups["base"].Value}{currentPage + 1}";
}
int FillCoreFields(CoreOutputRow target, HsnTsnVehicle source, bool fillYears) int FillCoreFields(CoreOutputRow target, HsnTsnVehicle source, bool fillYears)
{ {
var changes = 0; var changes = 0;