From 58e5009b0473adb1acbe87092ec4b577d325fe09 Mon Sep 17 00:00:00 2001 From: akinayturan Date: Thu, 5 Mar 2026 00:58:47 +0300 Subject: [PATCH] Improve Autoampel page fetching with retry logic and consecutive failure handling; add URL inference for pagination --- src/HsnTsnScraper/Program.cs | 68 ++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/src/HsnTsnScraper/Program.cs b/src/HsnTsnScraper/Program.cs index 6120df9..10727d5 100644 --- a/src/HsnTsnScraper/Program.cs +++ b/src/HsnTsnScraper/Program.cs @@ -336,16 +336,7 @@ async Task RunMergeCoreDbMode(HsnTsnClient hsnTsnClient, string? inputPath, stri } await BulkUpsertVehicleRowsAsync(conn, quotedTable, rowsToEnrich); - var autoampelUpserted = 0; - try - { - autoampelUpserted = await UpsertAutoampelPagesIntoDbAsync(hsnTsnClient, conn, quotedTable); - } - catch (Exception ex) - { - failed++; - Console.Error.WriteLine($"[warn] autoampel page-upsert failed -> {ex.Message}"); - } + var autoampelUpserted = await UpsertAutoampelPagesIntoDbAsync(hsnTsnClient, conn, quotedTable); Console.Error.WriteLine($"[info] Merge-core-db finished. checked={rowsToEnrich.Count}, filled_hsntsn={filledFromHsnTsn}, autoampel_upserted={autoampelUpserted}, failed={failed}"); } @@ -454,12 +445,51 @@ async Task UpsertAutoampelPagesIntoDbAsync(HsnTsnClient hsnTsnClient, Npgsq var pageUrl = hsnTsnClient.GetAutoampelFullListUrl(); var pageIndex = 0; var totalUpserted = 0; + var consecutiveFailures = 0; + const int maxConsecutiveFailures = 8; while (!string.IsNullOrWhiteSpace(pageUrl)) { pageIndex++; Console.Error.WriteLine($"[info] Processing: {pageUrl}"); - var page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl); + AutoampelPageResult? page = null; + const int maxAttempts = 7; + for (var attempt = 1; attempt <= maxAttempts; attempt++) + { + try + { + page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl); + break; + } + catch (HttpRequestException ex) when (attempt < maxAttempts) + { + var delaySeconds = IsTooManyRequests(ex) ? Math.Min(90, (int)Math.Pow(2, attempt)) : Math.Min(30, attempt * 3); + Console.Error.WriteLine($"[warn] Autoampel page fetch failed ({ex.Message}), retry in {delaySeconds}s (attempt {attempt}/{maxAttempts})"); + await Task.Delay(TimeSpan.FromSeconds(delaySeconds)); + } + } + + if (page is null) + { + consecutiveFailures++; + Console.Error.WriteLine($"[warn] Autoampel page skipped after retries: {pageUrl}"); + + if (consecutiveFailures >= maxConsecutiveFailures) + { + Console.Error.WriteLine("[warn] Too many consecutive autoampel page failures, stopping page-upsert loop."); + break; + } + + pageUrl = InferNextAutoampelListUrl(pageUrl); + if (string.IsNullOrWhiteSpace(pageUrl)) + { + break; + } + + continue; + } + + consecutiveFailures = 0; var rows = page.Vehicles .GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase) .Select(g => g.First()) @@ -495,6 +525,22 @@ async Task UpsertAutoampelPagesIntoDbAsync(HsnTsnClient hsnTsnClient, Npgsq return totalUpserted; } +static string? InferNextAutoampelListUrl(string currentUrl) +{ + var match = Regex.Match(currentUrl, @"^(?https?://[^/]+/typklassen/liste/)(?\d+)$", RegexOptions.IgnoreCase); + if (!match.Success) + { + return null; + } + + if (!int.TryParse(match.Groups["page"].Value, out var currentPage)) + { + return null; + } + + return $"{match.Groups["base"].Value}{currentPage + 1}"; +} + int FillCoreFields(CoreOutputRow target, HsnTsnVehicle source, bool fillYears) { var changes = 0;