Improve Autoampel page fetching with retry logic and consecutive failure handling; add URL inference for pagination
This commit is contained in:
@@ -336,16 +336,7 @@ async Task RunMergeCoreDbMode(HsnTsnClient hsnTsnClient, string? inputPath, stri
|
|||||||
}
|
}
|
||||||
|
|
||||||
await BulkUpsertVehicleRowsAsync(conn, quotedTable, rowsToEnrich);
|
await BulkUpsertVehicleRowsAsync(conn, quotedTable, rowsToEnrich);
|
||||||
var autoampelUpserted = 0;
|
var autoampelUpserted = await UpsertAutoampelPagesIntoDbAsync(hsnTsnClient, conn, quotedTable);
|
||||||
try
|
|
||||||
{
|
|
||||||
autoampelUpserted = await UpsertAutoampelPagesIntoDbAsync(hsnTsnClient, conn, quotedTable);
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
failed++;
|
|
||||||
Console.Error.WriteLine($"[warn] autoampel page-upsert failed -> {ex.Message}");
|
|
||||||
}
|
|
||||||
|
|
||||||
Console.Error.WriteLine($"[info] Merge-core-db finished. checked={rowsToEnrich.Count}, filled_hsntsn={filledFromHsnTsn}, autoampel_upserted={autoampelUpserted}, failed={failed}");
|
Console.Error.WriteLine($"[info] Merge-core-db finished. checked={rowsToEnrich.Count}, filled_hsntsn={filledFromHsnTsn}, autoampel_upserted={autoampelUpserted}, failed={failed}");
|
||||||
}
|
}
|
||||||
@@ -454,12 +445,51 @@ async Task<int> UpsertAutoampelPagesIntoDbAsync(HsnTsnClient hsnTsnClient, Npgsq
|
|||||||
var pageUrl = hsnTsnClient.GetAutoampelFullListUrl();
|
var pageUrl = hsnTsnClient.GetAutoampelFullListUrl();
|
||||||
var pageIndex = 0;
|
var pageIndex = 0;
|
||||||
var totalUpserted = 0;
|
var totalUpserted = 0;
|
||||||
|
var consecutiveFailures = 0;
|
||||||
|
const int maxConsecutiveFailures = 8;
|
||||||
|
|
||||||
while (!string.IsNullOrWhiteSpace(pageUrl))
|
while (!string.IsNullOrWhiteSpace(pageUrl))
|
||||||
{
|
{
|
||||||
pageIndex++;
|
pageIndex++;
|
||||||
Console.Error.WriteLine($"[info] Processing: {pageUrl}");
|
Console.Error.WriteLine($"[info] Processing: {pageUrl}");
|
||||||
var page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl);
|
AutoampelPageResult? page = null;
|
||||||
|
const int maxAttempts = 7;
|
||||||
|
for (var attempt = 1; attempt <= maxAttempts; attempt++)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
catch (HttpRequestException ex) when (attempt < maxAttempts)
|
||||||
|
{
|
||||||
|
var delaySeconds = IsTooManyRequests(ex) ? Math.Min(90, (int)Math.Pow(2, attempt)) : Math.Min(30, attempt * 3);
|
||||||
|
Console.Error.WriteLine($"[warn] Autoampel page fetch failed ({ex.Message}), retry in {delaySeconds}s (attempt {attempt}/{maxAttempts})");
|
||||||
|
await Task.Delay(TimeSpan.FromSeconds(delaySeconds));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (page is null)
|
||||||
|
{
|
||||||
|
consecutiveFailures++;
|
||||||
|
Console.Error.WriteLine($"[warn] Autoampel page skipped after retries: {pageUrl}");
|
||||||
|
|
||||||
|
if (consecutiveFailures >= maxConsecutiveFailures)
|
||||||
|
{
|
||||||
|
Console.Error.WriteLine("[warn] Too many consecutive autoampel page failures, stopping page-upsert loop.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pageUrl = InferNextAutoampelListUrl(pageUrl);
|
||||||
|
if (string.IsNullOrWhiteSpace(pageUrl))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
consecutiveFailures = 0;
|
||||||
var rows = page.Vehicles
|
var rows = page.Vehicles
|
||||||
.GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase)
|
.GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase)
|
||||||
.Select(g => g.First())
|
.Select(g => g.First())
|
||||||
@@ -495,6 +525,22 @@ async Task<int> UpsertAutoampelPagesIntoDbAsync(HsnTsnClient hsnTsnClient, Npgsq
|
|||||||
return totalUpserted;
|
return totalUpserted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static string? InferNextAutoampelListUrl(string currentUrl)
|
||||||
|
{
|
||||||
|
var match = Regex.Match(currentUrl, @"^(?<base>https?://[^/]+/typklassen/liste/)(?<page>\d+)$", RegexOptions.IgnoreCase);
|
||||||
|
if (!match.Success)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!int.TryParse(match.Groups["page"].Value, out var currentPage))
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $"{match.Groups["base"].Value}{currentPage + 1}";
|
||||||
|
}
|
||||||
|
|
||||||
int FillCoreFields(CoreOutputRow target, HsnTsnVehicle source, bool fillYears)
|
int FillCoreFields(CoreOutputRow target, HsnTsnVehicle source, bool fillYears)
|
||||||
{
|
{
|
||||||
var changes = 0;
|
var changes = 0;
|
||||||
|
|||||||
Reference in New Issue
Block a user