Improve Autoampel page fetching with retry logic and consecutive failure handling; add URL inference for pagination
This commit is contained in:
@@ -336,16 +336,7 @@ async Task RunMergeCoreDbMode(HsnTsnClient hsnTsnClient, string? inputPath, stri
|
||||
}
|
||||
|
||||
await BulkUpsertVehicleRowsAsync(conn, quotedTable, rowsToEnrich);
|
||||
var autoampelUpserted = 0;
|
||||
try
|
||||
{
|
||||
autoampelUpserted = await UpsertAutoampelPagesIntoDbAsync(hsnTsnClient, conn, quotedTable);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
failed++;
|
||||
Console.Error.WriteLine($"[warn] autoampel page-upsert failed -> {ex.Message}");
|
||||
}
|
||||
var autoampelUpserted = await UpsertAutoampelPagesIntoDbAsync(hsnTsnClient, conn, quotedTable);
|
||||
|
||||
Console.Error.WriteLine($"[info] Merge-core-db finished. checked={rowsToEnrich.Count}, filled_hsntsn={filledFromHsnTsn}, autoampel_upserted={autoampelUpserted}, failed={failed}");
|
||||
}
|
||||
@@ -454,12 +445,51 @@ async Task<int> UpsertAutoampelPagesIntoDbAsync(HsnTsnClient hsnTsnClient, Npgsq
|
||||
var pageUrl = hsnTsnClient.GetAutoampelFullListUrl();
|
||||
var pageIndex = 0;
|
||||
var totalUpserted = 0;
|
||||
var consecutiveFailures = 0;
|
||||
const int maxConsecutiveFailures = 8;
|
||||
|
||||
while (!string.IsNullOrWhiteSpace(pageUrl))
|
||||
{
|
||||
pageIndex++;
|
||||
Console.Error.WriteLine($"[info] Processing: {pageUrl}");
|
||||
var page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl);
|
||||
AutoampelPageResult? page = null;
|
||||
const int maxAttempts = 7;
|
||||
for (var attempt = 1; attempt <= maxAttempts; attempt++)
|
||||
{
|
||||
try
|
||||
{
|
||||
page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl);
|
||||
break;
|
||||
}
|
||||
catch (HttpRequestException ex) when (attempt < maxAttempts)
|
||||
{
|
||||
var delaySeconds = IsTooManyRequests(ex) ? Math.Min(90, (int)Math.Pow(2, attempt)) : Math.Min(30, attempt * 3);
|
||||
Console.Error.WriteLine($"[warn] Autoampel page fetch failed ({ex.Message}), retry in {delaySeconds}s (attempt {attempt}/{maxAttempts})");
|
||||
await Task.Delay(TimeSpan.FromSeconds(delaySeconds));
|
||||
}
|
||||
}
|
||||
|
||||
if (page is null)
|
||||
{
|
||||
consecutiveFailures++;
|
||||
Console.Error.WriteLine($"[warn] Autoampel page skipped after retries: {pageUrl}");
|
||||
|
||||
if (consecutiveFailures >= maxConsecutiveFailures)
|
||||
{
|
||||
Console.Error.WriteLine("[warn] Too many consecutive autoampel page failures, stopping page-upsert loop.");
|
||||
break;
|
||||
}
|
||||
|
||||
pageUrl = InferNextAutoampelListUrl(pageUrl);
|
||||
if (string.IsNullOrWhiteSpace(pageUrl))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
consecutiveFailures = 0;
|
||||
var rows = page.Vehicles
|
||||
.GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase)
|
||||
.Select(g => g.First())
|
||||
@@ -495,6 +525,22 @@ async Task<int> UpsertAutoampelPagesIntoDbAsync(HsnTsnClient hsnTsnClient, Npgsq
|
||||
return totalUpserted;
|
||||
}
|
||||
|
||||
static string? InferNextAutoampelListUrl(string currentUrl)
|
||||
{
|
||||
var match = Regex.Match(currentUrl, @"^(?<base>https?://[^/]+/typklassen/liste/)(?<page>\d+)$", RegexOptions.IgnoreCase);
|
||||
if (!match.Success)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!int.TryParse(match.Groups["page"].Value, out var currentPage))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return $"{match.Groups["base"].Value}{currentPage + 1}";
|
||||
}
|
||||
|
||||
int FillCoreFields(CoreOutputRow target, HsnTsnVehicle source, bool fillYears)
|
||||
{
|
||||
var changes = 0;
|
||||
|
||||
Reference in New Issue
Block a user