diff --git a/README.md b/README.md index 2692393..8f33b21 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,12 @@ Scrape all brand pages: dotnet run --project src/HsnTsnScraper/HsnTsnScraper.csproj > hsntsn.csv ``` +Scrape directly from Autoampel typklassen pages (no hsn-tsn redirect chain): + +```bash +dotnet run --project src/HsnTsnScraper/HsnTsnScraper.csproj -- --source autoampel > hsntsn.csv +``` + Scrape only specific queries from `stdin`: ```bash @@ -38,3 +44,18 @@ Repair only missing year fields from an existing CSV: ```bash dotnet run --project src/HsnTsnScraper/HsnTsnScraper.csproj -- --repair-years --input-csv hsntsn.csv --output-csv hsntsn.repaired.csv ``` + +Merge core fields by `HsnTsn` and write to PostgreSQL (priority: `hsn-tsn.de` then `autoampel.de`): + +```bash +dotnet run --project src/HsnTsnScraper/HsnTsnScraper.csproj -- --merge-core-db --pg-connection "Host=localhost;Port=5432;Database=hsntsn;Username=hsntsn;Password=hsntsn" --pg-table public.hsntsn_vehicle +``` + +You can also pass the connection via environment variable: + +```bash +export HSNTSN_PG="Host=localhost;Port=5432;Database=hsntsn;Username=hsntsn;Password=hsntsn" +dotnet run --project src/HsnTsnScraper/HsnTsnScraper.csproj -- --merge-core-db +``` + +Optional: if you already have a CSV, you can still seed from it with `--input-csv hsntsn.csv`. diff --git a/src/HsnTsnScraper/HsnTsnClient.cs b/src/HsnTsnScraper/HsnTsnClient.cs index 0f9bba2..f4b3e66 100644 --- a/src/HsnTsnScraper/HsnTsnClient.cs +++ b/src/HsnTsnScraper/HsnTsnClient.cs @@ -9,8 +9,10 @@ namespace HsnTsnScraper; public sealed class HsnTsnClient : IDisposable { + private const string AutoampelBaseUrl = "https://www.autoampel.de/"; private static readonly Regex HsnTsnRegex = new(@"(?\d{4})\s*/\s*(?[A-Z0-9]{3})", RegexOptions.Compiled); private static readonly Regex PsKwRegex = new(@"(?\d+)\s*PS\s*\((?\d+)\s*kW\)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex PsKwShortRegex = new(@"(?\d+)\s*\((?\d+)\)", RegexOptions.Compiled); private static readonly Regex CcmRegex = new(@"(?[\d\.\,]+)\s*ccm", RegexOptions.Compiled | RegexOptions.IgnoreCase); private static readonly Regex YearRangeRegex = new(@"(?\d{4})(?:\D+(?\d{4}))?", RegexOptions.Compiled); private static readonly HashSet ExcludedLinks = new(StringComparer.OrdinalIgnoreCase) @@ -101,6 +103,116 @@ public sealed class HsnTsnClient : IDisposable return ParseVehiclesFromListPage(html, absoluteUrl, null); } + public async Task> GetVehiclesByHsnFromHsnTsnAsync(string hsn, CancellationToken cancellationToken = default) + { + if (string.IsNullOrWhiteSpace(hsn)) + { + return Array.Empty(); + } + + var encoded = Uri.EscapeDataString(hsn.Trim()); + var url = $"/liste.php?string={encoded}"; + var html = await GetStringAsync(url, cancellationToken); + return ParseVehiclesFromListPage(html, new Uri(_client.BaseAddress!, url).ToString(), hsn) + .Where(v => v.Hsn.Equals(hsn, StringComparison.OrdinalIgnoreCase)) + .ToArray(); + } + + public string GetAutoampelFullListUrl() + { + return $"{AutoampelBaseUrl}typklassen/liste"; + } + + public async Task GetVehiclesFromAutoampelListPageAsync(string absoluteUrl, CancellationToken cancellationToken = default) + { + var html = await GetStringAsync(absoluteUrl, cancellationToken); + var doc = LoadDocument(html); + var rows = doc.DocumentNode.SelectNodes("//table[contains(@class,'autolist')]//tbody/tr") ?? new HtmlNodeCollection(null); + var result = new List(); + + foreach (var row in rows) + { + var cells = row.SelectNodes("./td"); + if (cells is null || cells.Count < 9) + { + continue; + } + + var hsnTsnText = Clean(cells[8].InnerText); + var match = HsnTsnRegex.Match(hsnTsnText); + if (!match.Success) + { + continue; + } + + var vehicleTypeRaw = Clean(cells[3].InnerText); + var detailHref = cells[3].SelectSingleNode(".//a[@href]")?.GetAttributeValue("href", string.Empty) ?? string.Empty; + var detailUrl = ToAbsoluteUrl(detailHref, AutoampelBaseUrl); + + var yearText = Clean(cells[4].InnerText); + ParseYearRange(yearText, out var yearFrom, out var yearTo); + + var powerText = Clean(cells[5].InnerText); + ParsePowerAutoampel(powerText, out var ps, out var kw); + + var displacementText = Clean(cells[6].InnerText); + var displacementCcm = ParseDisplacementAutoampel(displacementText); + + var fuelType = Clean(cells[7].InnerText); + var brand = ExtractBrand(vehicleTypeRaw); + + var vehicle = new HsnTsnVehicle + { + Hsn = match.Groups["hsn"].Value, + Tsn = match.Groups["tsn"].Value, + HsnTsn = $"{match.Groups["hsn"].Value}/{match.Groups["tsn"].Value}", + Brand = brand, + VehicleType = vehicleTypeRaw, + YearFrom = yearFrom, + YearTo = yearTo, + PowerPs = ps, + PowerKw = kw, + DisplacementCcm = displacementCcm, + FuelType = fuelType, + SourceListUrl = absoluteUrl, + SourceDetailUrl = detailUrl + }; + + vehicle.MatchKey = BuildMatchKey(vehicle); + result.Add(vehicle); + } + + var nextHref = doc.DocumentNode.SelectSingleNode("//ul[@id='pagination']//a[@rel='next']")?.GetAttributeValue("href", string.Empty) ?? string.Empty; + var nextUrl = string.IsNullOrWhiteSpace(nextHref) ? null : ToAbsoluteUrl(nextHref, AutoampelBaseUrl); + + return new AutoampelPageResult + { + Vehicles = result, + NextPageUrl = nextUrl + }; + } + + public async Task> GetVehiclesByHsnFromAutoampelAsync(string hsn, CancellationToken cancellationToken = default) + { + if (string.IsNullOrWhiteSpace(hsn)) + { + return Array.Empty(); + } + + var startUrl = $"{AutoampelBaseUrl}typklassen/hsn-{hsn}"; + var currentUrl = startUrl; + var all = new List(); + + while (!string.IsNullOrWhiteSpace(currentUrl)) + { + var page = await GetVehiclesFromAutoampelListPageAsync(currentUrl, cancellationToken); + all.AddRange(page.Vehicles.Where(v => v.Hsn.Equals(hsn, StringComparison.OrdinalIgnoreCase))); + currentUrl = page.NextPageUrl; + } + + return all; + } + public async Task GetVehicleDetailAsync(string detailUrl, CancellationToken cancellationToken = default) { if (string.IsNullOrWhiteSpace(detailUrl)) @@ -178,7 +290,7 @@ public sealed class HsnTsnClient : IDisposable var vehicleTypeRaw = Clean(cells[1].InnerText); var detailHref = cells[1].SelectSingleNode(".//a[@href]")?.GetAttributeValue("href", string.Empty) ?? string.Empty; - var detailUrl = ToAbsoluteUrl(detailHref); + var detailUrl = ToAbsoluteUrl(detailHref, _client.BaseAddress!.ToString()); var powerText = Clean(cells[2].InnerText); ParsePower(powerText, out var ps, out var kw); @@ -212,7 +324,7 @@ public sealed class HsnTsnClient : IDisposable return result; } - private string ToAbsoluteUrl(string href) + private static string ToAbsoluteUrl(string href, string baseUrl) { if (string.IsNullOrWhiteSpace(href)) { @@ -227,7 +339,7 @@ public sealed class HsnTsnClient : IDisposable } var normalized = href.StartsWith("/", StringComparison.Ordinal) ? href : $"/{href.TrimStart('/')}"; - return new Uri(_client.BaseAddress!, normalized).ToString(); + return new Uri(new Uri(baseUrl), normalized).ToString(); } private static HtmlDocument LoadDocument(string html) @@ -264,6 +376,28 @@ public sealed class HsnTsnClient : IDisposable } } + private static void ParsePowerAutoampel(string powerText, out int? ps, out int? kw) + { + ps = null; + kw = null; + + var match = PsKwShortRegex.Match(powerText); + if (!match.Success) + { + return; + } + + if (int.TryParse(match.Groups["ps"].Value, out var psParsed)) + { + ps = psParsed; + } + + if (int.TryParse(match.Groups["kw"].Value, out var kwParsed)) + { + kw = kwParsed; + } + } + private static int? ParseDisplacement(string text) { var match = CcmRegex.Match(text); @@ -276,6 +410,12 @@ public sealed class HsnTsnClient : IDisposable return int.TryParse(numeric, out var ccm) ? ccm : null; } + private static int? ParseDisplacementAutoampel(string text) + { + var numeric = Regex.Replace(text, @"[^\d]", string.Empty); + return int.TryParse(numeric, out var ccm) ? ccm : null; + } + private static void ParseYearRange(string yearText, out int? fromYear, out int? toYear) { fromYear = null; @@ -367,6 +507,12 @@ public sealed class HsnTsnClient : IDisposable } } +public sealed class AutoampelPageResult +{ + public IReadOnlyList Vehicles { get; init; } = Array.Empty(); + public string? NextPageUrl { get; init; } +} + public sealed class VehicleDetail { public string Hsn { get; set; } = string.Empty; diff --git a/src/HsnTsnScraper/HsnTsnScraper.csproj b/src/HsnTsnScraper/HsnTsnScraper.csproj index 1778750..ec3ffc2 100644 --- a/src/HsnTsnScraper/HsnTsnScraper.csproj +++ b/src/HsnTsnScraper/HsnTsnScraper.csproj @@ -12,6 +12,7 @@ + diff --git a/src/HsnTsnScraper/Program.cs b/src/HsnTsnScraper/Program.cs index 1b7aa17..6120df9 100644 --- a/src/HsnTsnScraper/Program.cs +++ b/src/HsnTsnScraper/Program.cs @@ -4,23 +4,867 @@ using System.Text.RegularExpressions; using CsvHelper; using CsvHelper.Configuration; using HsnTsnScraper; +using Npgsql; var repairYears = args.Contains("--repair-years", StringComparer.OrdinalIgnoreCase); +var mergeCore = args.Contains("--merge-core", StringComparer.OrdinalIgnoreCase); +var mergeCoreDb = args.Contains("--merge-core-db", StringComparer.OrdinalIgnoreCase); var includeDetails = args.Contains("--include-details", StringComparer.OrdinalIgnoreCase); var inputCsv = GetOptionValue(args, "--input-csv"); var outputCsv = GetOptionValue(args, "--output-csv"); +var pgConnection = GetOptionValue(args, "--pg-connection"); +var pgTable = GetOptionValue(args, "--pg-table") ?? "public.hsntsn_vehicle"; +var source = (GetOptionValue(args, "--source") ?? "hsntsn").Trim().ToLowerInvariant(); using var client = new HsnTsnClient(); +if (mergeCore) +{ + await RunMergeCoreMode(client, inputCsv, outputCsv); + return; +} + +if (mergeCoreDb) +{ + await RunMergeCoreDbMode(client, inputCsv, pgConnection, pgTable); + return; +} + if (repairYears) { await RunRepairYearsMode(client, inputCsv, outputCsv); return; } -await RunScrapeMode(client, includeDetails); +await RunScrapeMode(client, includeDetails, source); return; +async Task RunMergeCoreMode(HsnTsnClient hsnTsnClient, string? inputPath, string? outputPath) +{ + var inputCsvPath = string.IsNullOrWhiteSpace(inputPath) ? "hsntsn.csv" : inputPath; + var outputCsvPath = string.IsNullOrWhiteSpace(outputPath) ? "hsntsn.core.csv" : outputPath; + + if (Path.GetFullPath(inputCsvPath).Equals(Path.GetFullPath(outputCsvPath), StringComparison.OrdinalIgnoreCase)) + { + Console.Error.WriteLine("[error] --input-csv and --output-csv cannot point to the same file."); + return; + } + + Console.Error.WriteLine($"[info] Merge-core mode started. input={inputCsvPath}, output={outputCsvPath}"); + + var map = new Dictionary(StringComparer.OrdinalIgnoreCase); + + await using (var inputStream = File.OpenRead(inputCsvPath)) + using (var inputReader = new StreamReader(inputStream)) + using (var csvReader = new CsvReader(inputReader, new CsvConfiguration(CultureInfo.InvariantCulture) + { + Delimiter = ";", + MissingFieldFound = null, + HeaderValidated = null + })) + { + await csvReader.ReadAsync(); + csvReader.ReadHeader(); + + await foreach (var record in csvReader.GetRecordsAsync()) + { + var key = record.HsnTsn?.Trim(); + if (string.IsNullOrWhiteSpace(key)) + { + continue; + } + + if (!map.TryGetValue(key, out var row)) + { + row = new CoreOutputRow + { + HsnTsn = key, + Hsn = record.Hsn, + Tsn = record.Tsn, + Brand = record.Brand, + Model = !string.IsNullOrWhiteSpace(record.Model) ? record.Model : record.VehicleType, + YearFrom = record.YearFrom, + YearTo = record.YearTo + }; + map[key] = row; + } + else + { + if (string.IsNullOrWhiteSpace(row.Hsn) && !string.IsNullOrWhiteSpace(record.Hsn)) row.Hsn = record.Hsn; + if (string.IsNullOrWhiteSpace(row.Tsn) && !string.IsNullOrWhiteSpace(record.Tsn)) row.Tsn = record.Tsn; + if (string.IsNullOrWhiteSpace(row.Brand) && !string.IsNullOrWhiteSpace(record.Brand)) row.Brand = record.Brand; + if (string.IsNullOrWhiteSpace(row.Model) && !string.IsNullOrWhiteSpace(record.Model)) row.Model = record.Model; + if (row.YearFrom is null && record.YearFrom is not null) row.YearFrom = record.YearFrom; + if (row.YearTo is null && record.YearTo is not null) row.YearTo = record.YearTo; + } + } + } + + var byHsn = map.Values + .Where(x => !string.IsNullOrWhiteSpace(x.Hsn)) + .GroupBy(x => x.Hsn, StringComparer.OrdinalIgnoreCase) + .OrderBy(x => x.Key, StringComparer.OrdinalIgnoreCase) + .ToList(); + + var hsnIndex = 0; + var filledFromHsnTsn = 0; + var filledFromAutoampel = 0; + var failed = 0; + Dictionary? autoampelIndex = null; + + foreach (var hsnGroup in byHsn) + { + hsnIndex++; + var hsn = hsnGroup.Key; + Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: checking hsn-tsn.de"); + var yearMissingBeforeHsnTsn = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null); + + Dictionary fromHsnTsn; + try + { + fromHsnTsn = (await hsnTsnClient.GetVehiclesByHsnFromHsnTsnAsync(hsn)) + .GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase) + .Select(g => g.First()) + .ToDictionary(v => v.HsnTsn, v => v, StringComparer.OrdinalIgnoreCase); + } + catch (Exception ex) + { + failed++; + Console.Error.WriteLine($"[warn] HSN {hsn} failed on hsn-tsn.de -> {ex.Message}"); + fromHsnTsn = new Dictionary(StringComparer.OrdinalIgnoreCase); + } + + foreach (var row in hsnGroup) + { + if (fromHsnTsn.TryGetValue(row.HsnTsn, out var sourceRow)) + { + filledFromHsnTsn += FillCoreFields(row, sourceRow, fillYears: false); + } + } + + var matchedHsnTsn = hsnGroup.Count(r => fromHsnTsn.ContainsKey(r.HsnTsn)); + var yearMissingAfterHsnTsn = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null); + var yearFilledByHsnTsn = Math.Max(0, yearMissingBeforeHsnTsn - yearMissingAfterHsnTsn); + Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: hsn-tsn matched={matchedHsnTsn}, year_filled={yearFilledByHsnTsn}, still_missing={yearMissingAfterHsnTsn}"); + + var needsAutoampel = hsnGroup.Any(r => r.YearFrom is null || r.YearTo is null || string.IsNullOrWhiteSpace(r.Brand) || string.IsNullOrWhiteSpace(r.Model)); + if (!needsAutoampel) + { + continue; + } + + Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: checking autoampel.de"); + var yearMissingBeforeAutoampel = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null); + + if (autoampelIndex is null) + { + try + { + autoampelIndex = await BuildAutoampelIndexByHsnTsnAsync(hsnTsnClient); + } + catch (Exception ex) + { + failed++; + Console.Error.WriteLine($"[warn] autoampel index build failed -> {ex.Message}"); + autoampelIndex = new Dictionary(StringComparer.OrdinalIgnoreCase); + } + } + + foreach (var row in hsnGroup) + { + if (autoampelIndex.TryGetValue(row.HsnTsn, out var sourceRow)) + { + filledFromAutoampel += FillCoreFields(row, sourceRow, fillYears: true); + } + } + + var matchedAutoampel = hsnGroup.Count(r => autoampelIndex.ContainsKey(r.HsnTsn)); + var yearMissingAfterAutoampel = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null); + var yearFilledByAutoampel = Math.Max(0, yearMissingBeforeAutoampel - yearMissingAfterAutoampel); + Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: autoampel matched={matchedAutoampel}, year_filled={yearFilledByAutoampel}, still_missing={yearMissingAfterAutoampel}"); + } + + await using var outputStream = File.Create(outputCsvPath); + await using var outputWriter = new StreamWriter(outputStream); + await using var csvWriter = new CsvWriter(outputWriter, new CsvConfiguration(CultureInfo.InvariantCulture) + { + Delimiter = ";" + }); + + csvWriter.WriteHeader(); + await csvWriter.NextRecordAsync(); + + foreach (var row in map.Values + .OrderBy(x => x.Hsn, StringComparer.OrdinalIgnoreCase) + .ThenBy(x => x.Tsn, StringComparer.OrdinalIgnoreCase)) + { + csvWriter.WriteRecord(row); + await csvWriter.NextRecordAsync(); + } + + await csvWriter.FlushAsync(); + Console.Error.WriteLine($"[info] Merge-core finished. rows={map.Count}, filled_hsntsn={filledFromHsnTsn}, filled_autoampel={filledFromAutoampel}, failed={failed}"); +} + +async Task RunMergeCoreDbMode(HsnTsnClient hsnTsnClient, string? inputPath, string? connectionStringArg, string tableName) +{ + var inputCsvPath = string.IsNullOrWhiteSpace(inputPath) ? string.Empty : inputPath; + var connectionString = string.IsNullOrWhiteSpace(connectionStringArg) + ? Environment.GetEnvironmentVariable("HSNTSN_PG") + : connectionStringArg; + + if (string.IsNullOrWhiteSpace(connectionString)) + { + Console.Error.WriteLine("[error] Missing PostgreSQL connection. Use --pg-connection or HSNTSN_PG env var."); + return; + } + + var quotedTable = QuoteQualifiedTableName(tableName); + if (string.IsNullOrWhiteSpace(quotedTable)) + { + Console.Error.WriteLine($"[error] Invalid --pg-table value: {tableName}"); + return; + } + + if (string.IsNullOrWhiteSpace(inputCsvPath)) + { + Console.Error.WriteLine($"[info] Merge-core-db started. input=, table={tableName}"); + } + else + { + Console.Error.WriteLine($"[info] Merge-core-db started. input={inputCsvPath}, table={tableName}"); + } + + Dictionary map; + if (!string.IsNullOrWhiteSpace(inputCsvPath) && File.Exists(inputCsvPath)) + { + map = await ReadDbRowsFromCsvAsync(inputCsvPath); + Console.Error.WriteLine($"[info] Seed source: CSV ({map.Count} unique HsnTsn)"); + } + else + { + if (!string.IsNullOrWhiteSpace(inputCsvPath)) + { + Console.Error.WriteLine($"[warn] Input CSV not found: {inputCsvPath}. Falling back to live scrape."); + } + + map = await BuildSeedRowsFromHsnTsnAsync(hsnTsnClient); + Console.Error.WriteLine($"[info] Seed source: hsn-tsn.de live scrape ({map.Count} unique HsnTsn)"); + } + + if (map.Count == 0) + { + Console.Error.WriteLine("[warn] No seed data collected."); + return; + } + + await using var conn = new NpgsqlConnection(connectionString); + await conn.OpenAsync(); + await EnsureVehicleTableAsync(conn, quotedTable); + + await BulkUpsertVehicleRowsAsync(conn, quotedTable, map.Values); + Console.Error.WriteLine($"[info] Seed upsert finished. rows={map.Count}"); + + var rowsToEnrich = await LoadVehicleRowsNeedingEnrichmentAsync(conn, quotedTable); + var byHsn = rowsToEnrich + .Where(x => !string.IsNullOrWhiteSpace(x.Hsn)) + .GroupBy(x => x.Hsn, StringComparer.OrdinalIgnoreCase) + .OrderBy(x => x.Key, StringComparer.OrdinalIgnoreCase) + .ToList(); + + var hsnIndex = 0; + var filledFromHsnTsn = 0; + var failed = 0; + + foreach (var hsnGroup in byHsn) + { + hsnIndex++; + var hsn = hsnGroup.Key; + Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: checking hsn-tsn.de"); + var yearMissingBeforeHsnTsn = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null); + + Dictionary fromHsnTsn; + try + { + fromHsnTsn = (await hsnTsnClient.GetVehiclesByHsnFromHsnTsnAsync(hsn)) + .GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase) + .Select(g => g.First()) + .ToDictionary(v => v.HsnTsn, v => v, StringComparer.OrdinalIgnoreCase); + } + catch (Exception ex) + { + failed++; + Console.Error.WriteLine($"[warn] HSN {hsn} failed on hsn-tsn.de -> {ex.Message}"); + fromHsnTsn = new Dictionary(StringComparer.OrdinalIgnoreCase); + } + + foreach (var row in hsnGroup) + { + if (fromHsnTsn.TryGetValue(row.HsnTsn, out var sourceRow)) + { + filledFromHsnTsn += FillDbFields(row, sourceRow); + } + } + + var matchedHsnTsn = hsnGroup.Count(r => fromHsnTsn.ContainsKey(r.HsnTsn)); + var yearMissingAfterHsnTsn = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null); + var yearFilledByHsnTsn = Math.Max(0, yearMissingBeforeHsnTsn - yearMissingAfterHsnTsn); + Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: hsn-tsn matched={matchedHsnTsn}, year_filled={yearFilledByHsnTsn}, still_missing={yearMissingAfterHsnTsn}"); + + var needsAutoampel = hsnGroup.Any(NeedsAutoampelLookup); + if (!needsAutoampel) + { + continue; + } + } + + foreach (var row in rowsToEnrich) + { + if (string.IsNullOrWhiteSpace(row.MatchKey)) + { + row.MatchKey = BuildMatchKey(new HsnTsnVehicle + { + Brand = row.Brand, + VehicleType = row.VehicleType, + Model = row.Model, + OfficialType = row.OfficialType, + PowerKw = row.PowerKw, + DisplacementCcm = row.DisplacementCcm, + FuelType = row.FuelType + }); + } + } + + await BulkUpsertVehicleRowsAsync(conn, quotedTable, rowsToEnrich); + var autoampelUpserted = 0; + try + { + autoampelUpserted = await UpsertAutoampelPagesIntoDbAsync(hsnTsnClient, conn, quotedTable); + } + catch (Exception ex) + { + failed++; + Console.Error.WriteLine($"[warn] autoampel page-upsert failed -> {ex.Message}"); + } + + Console.Error.WriteLine($"[info] Merge-core-db finished. checked={rowsToEnrich.Count}, filled_hsntsn={filledFromHsnTsn}, autoampel_upserted={autoampelUpserted}, failed={failed}"); +} + +async Task> BuildSeedRowsFromHsnTsnAsync(HsnTsnClient hsnTsnClient) +{ + var map = new Dictionary(StringComparer.OrdinalIgnoreCase); + + IReadOnlyList brandUrls; + try + { + brandUrls = await hsnTsnClient.GetBrandPageUrls(); + } + catch (Exception ex) + { + Console.Error.WriteLine($"[error] Could not fetch brand urls for DB seed: {ex.Message}"); + return map; + } + + var pageIndex = 0; + foreach (var url in brandUrls) + { + pageIndex++; + Console.Error.WriteLine($"[info] [seed {pageIndex}/{brandUrls.Count}] {url}"); + IReadOnlyList vehicles; + try + { + vehicles = await hsnTsnClient.GetVehiclesFromBrandPageAsync(url); + } + catch (Exception ex) + { + Console.Error.WriteLine($"[warn] Seed page failed: {url} -> {ex.Message}"); + continue; + } + + foreach (var v in vehicles) + { + if (!map.TryGetValue(v.HsnTsn, out var row)) + { + row = new DbVehicleRow + { + HsnTsn = v.HsnTsn, + Hsn = v.Hsn, + Tsn = v.Tsn, + Brand = v.Brand, + VehicleType = v.VehicleType, + Model = v.Model, + OfficialType = v.OfficialType, + YearFrom = v.YearFrom, + YearTo = v.YearTo, + PowerPs = v.PowerPs, + PowerKw = v.PowerKw, + DisplacementCcm = v.DisplacementCcm, + FuelType = v.FuelType, + MatchKey = v.MatchKey + }; + map[v.HsnTsn] = row; + continue; + } + + if (string.IsNullOrWhiteSpace(row.Hsn) && !string.IsNullOrWhiteSpace(v.Hsn)) row.Hsn = v.Hsn; + if (string.IsNullOrWhiteSpace(row.Tsn) && !string.IsNullOrWhiteSpace(v.Tsn)) row.Tsn = v.Tsn; + if (string.IsNullOrWhiteSpace(row.Brand) && !string.IsNullOrWhiteSpace(v.Brand)) row.Brand = v.Brand; + if (string.IsNullOrWhiteSpace(row.VehicleType) && !string.IsNullOrWhiteSpace(v.VehicleType)) row.VehicleType = v.VehicleType; + if (string.IsNullOrWhiteSpace(row.Model) && !string.IsNullOrWhiteSpace(v.Model)) row.Model = v.Model; + if (string.IsNullOrWhiteSpace(row.OfficialType) && !string.IsNullOrWhiteSpace(v.OfficialType)) row.OfficialType = v.OfficialType; + if (row.YearFrom is null && v.YearFrom is not null) row.YearFrom = v.YearFrom; + if (row.YearTo is null && v.YearTo is not null) row.YearTo = v.YearTo; + if (row.PowerPs is null && v.PowerPs is not null) row.PowerPs = v.PowerPs; + if (row.PowerKw is null && v.PowerKw is not null) row.PowerKw = v.PowerKw; + if (row.DisplacementCcm is null && v.DisplacementCcm is not null) row.DisplacementCcm = v.DisplacementCcm; + if (string.IsNullOrWhiteSpace(row.FuelType) && !string.IsNullOrWhiteSpace(v.FuelType)) row.FuelType = v.FuelType; + if (string.IsNullOrWhiteSpace(row.MatchKey) && !string.IsNullOrWhiteSpace(v.MatchKey)) row.MatchKey = v.MatchKey; + } + } + + return map; +} + +async Task> BuildAutoampelIndexByHsnTsnAsync(HsnTsnClient hsnTsnClient) +{ + var index = new Dictionary(StringComparer.OrdinalIgnoreCase); + var pageUrl = hsnTsnClient.GetAutoampelFullListUrl(); + + while (!string.IsNullOrWhiteSpace(pageUrl)) + { + Console.Error.WriteLine($"[info] Processing: {pageUrl}"); + var page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl); + foreach (var vehicle in page.Vehicles) + { + if (!index.ContainsKey(vehicle.HsnTsn)) + { + index[vehicle.HsnTsn] = vehicle; + } + } + + pageUrl = page.NextPageUrl; + } + + Console.Error.WriteLine($"[info] Autoampel index ready. unique={index.Count}"); + return index; +} + +async Task UpsertAutoampelPagesIntoDbAsync(HsnTsnClient hsnTsnClient, NpgsqlConnection conn, string quotedTable) +{ + var pageUrl = hsnTsnClient.GetAutoampelFullListUrl(); + var pageIndex = 0; + var totalUpserted = 0; + + while (!string.IsNullOrWhiteSpace(pageUrl)) + { + pageIndex++; + Console.Error.WriteLine($"[info] Processing: {pageUrl}"); + var page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl); + var rows = page.Vehicles + .GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase) + .Select(g => g.First()) + .Select(v => new DbVehicleRow + { + HsnTsn = v.HsnTsn, + Hsn = v.Hsn, + Tsn = v.Tsn, + Brand = v.Brand, + VehicleType = v.VehicleType, + Model = !string.IsNullOrWhiteSpace(v.Model) ? v.Model : DeriveModel(v), + OfficialType = v.OfficialType, + YearFrom = v.YearFrom, + YearTo = v.YearTo, + PowerPs = v.PowerPs, + PowerKw = v.PowerKw, + DisplacementCcm = v.DisplacementCcm, + FuelType = v.FuelType, + MatchKey = !string.IsNullOrWhiteSpace(v.MatchKey) ? v.MatchKey : BuildMatchKey(v) + }) + .ToList(); + + if (rows.Count > 0) + { + await BulkUpsertVehicleRowsAsync(conn, quotedTable, rows); + totalUpserted += rows.Count; + } + + Console.Error.WriteLine($"[info] Autoampel page {pageIndex} upserted={rows.Count}, total={totalUpserted}"); + pageUrl = page.NextPageUrl; + } + + return totalUpserted; +} + +int FillCoreFields(CoreOutputRow target, HsnTsnVehicle source, bool fillYears) +{ + var changes = 0; + + if (string.IsNullOrWhiteSpace(target.Brand) && !string.IsNullOrWhiteSpace(source.Brand)) + { + target.Brand = source.Brand; + changes++; + } + + if (string.IsNullOrWhiteSpace(target.Model)) + { + var model = DeriveModel(source); + if (!string.IsNullOrWhiteSpace(model)) + { + target.Model = model; + changes++; + } + } + + if (fillYears) + { + if (target.YearFrom is null && source.YearFrom is not null) + { + target.YearFrom = source.YearFrom; + changes++; + } + + if (target.YearTo is null && source.YearTo is not null) + { + target.YearTo = source.YearTo; + changes++; + } + } + + return changes; +} + +string DeriveModel(HsnTsnVehicle vehicle) +{ + if (!string.IsNullOrWhiteSpace(vehicle.Model)) + { + return vehicle.Model; + } + + var raw = vehicle.VehicleType?.Trim() ?? string.Empty; + if (string.IsNullOrWhiteSpace(raw)) + { + return string.Empty; + } + + var brand = vehicle.Brand?.Trim() ?? string.Empty; + if (!string.IsNullOrWhiteSpace(brand) && raw.StartsWith(brand + " ", StringComparison.OrdinalIgnoreCase)) + { + return raw[(brand.Length + 1)..].Trim(); + } + + return raw; +} + +static bool NeedsAutoampelLookup(DbVehicleRow row) +{ + return string.IsNullOrWhiteSpace(row.Brand) + || string.IsNullOrWhiteSpace(row.VehicleType) + || string.IsNullOrWhiteSpace(row.Model) + || row.YearFrom is null + || row.YearTo is null + || row.PowerPs is null + || row.PowerKw is null + || row.DisplacementCcm is null + || string.IsNullOrWhiteSpace(row.FuelType) + || string.IsNullOrWhiteSpace(row.MatchKey); +} + +int FillDbFields(DbVehicleRow target, HsnTsnVehicle source) +{ + var changes = 0; + + if (string.IsNullOrWhiteSpace(target.Brand) && !string.IsNullOrWhiteSpace(source.Brand)) + { + target.Brand = source.Brand; + changes++; + } + + if (string.IsNullOrWhiteSpace(target.VehicleType) && !string.IsNullOrWhiteSpace(source.VehicleType)) + { + target.VehicleType = source.VehicleType; + changes++; + } + + if (string.IsNullOrWhiteSpace(target.Model)) + { + var model = DeriveModel(source); + if (!string.IsNullOrWhiteSpace(model)) + { + target.Model = model; + changes++; + } + } + + if (string.IsNullOrWhiteSpace(target.OfficialType) && !string.IsNullOrWhiteSpace(source.OfficialType)) + { + target.OfficialType = source.OfficialType; + changes++; + } + + if (target.YearFrom is null && source.YearFrom is not null) + { + target.YearFrom = source.YearFrom; + changes++; + } + + if (target.YearTo is null && source.YearTo is not null) + { + target.YearTo = source.YearTo; + changes++; + } + + if (target.PowerPs is null && source.PowerPs is not null) + { + target.PowerPs = source.PowerPs; + changes++; + } + + if (target.PowerKw is null && source.PowerKw is not null) + { + target.PowerKw = source.PowerKw; + changes++; + } + + if (target.DisplacementCcm is null && source.DisplacementCcm is not null) + { + target.DisplacementCcm = source.DisplacementCcm; + changes++; + } + + if (string.IsNullOrWhiteSpace(target.FuelType) && !string.IsNullOrWhiteSpace(source.FuelType)) + { + target.FuelType = source.FuelType; + changes++; + } + + if (string.IsNullOrWhiteSpace(target.MatchKey) && !string.IsNullOrWhiteSpace(source.MatchKey)) + { + target.MatchKey = source.MatchKey; + changes++; + } + + return changes; +} + +async Task> ReadDbRowsFromCsvAsync(string inputCsvPath) +{ + var map = new Dictionary(StringComparer.OrdinalIgnoreCase); + + await using var inputStream = File.OpenRead(inputCsvPath); + using var inputReader = new StreamReader(inputStream); + using var csvReader = new CsvReader(inputReader, new CsvConfiguration(CultureInfo.InvariantCulture) + { + Delimiter = ";", + MissingFieldFound = null, + HeaderValidated = null + }); + + await csvReader.ReadAsync(); + csvReader.ReadHeader(); + + await foreach (var record in csvReader.GetRecordsAsync()) + { + var key = record.HsnTsn?.Trim(); + if (string.IsNullOrWhiteSpace(key)) + { + continue; + } + + if (!map.TryGetValue(key, out var row)) + { + row = new DbVehicleRow + { + HsnTsn = key, + Hsn = record.Hsn, + Tsn = record.Tsn, + Brand = record.Brand, + VehicleType = record.VehicleType, + Model = record.Model, + OfficialType = record.OfficialType, + YearFrom = record.YearFrom, + YearTo = record.YearTo, + PowerPs = record.PowerPs, + PowerKw = record.PowerKw, + DisplacementCcm = record.DisplacementCcm, + FuelType = record.FuelType, + MatchKey = record.MatchKey + }; + map[key] = row; + } + else + { + if (string.IsNullOrWhiteSpace(row.Hsn) && !string.IsNullOrWhiteSpace(record.Hsn)) row.Hsn = record.Hsn; + if (string.IsNullOrWhiteSpace(row.Tsn) && !string.IsNullOrWhiteSpace(record.Tsn)) row.Tsn = record.Tsn; + if (string.IsNullOrWhiteSpace(row.Brand) && !string.IsNullOrWhiteSpace(record.Brand)) row.Brand = record.Brand; + if (string.IsNullOrWhiteSpace(row.VehicleType) && !string.IsNullOrWhiteSpace(record.VehicleType)) row.VehicleType = record.VehicleType; + if (string.IsNullOrWhiteSpace(row.Model) && !string.IsNullOrWhiteSpace(record.Model)) row.Model = record.Model; + if (string.IsNullOrWhiteSpace(row.OfficialType) && !string.IsNullOrWhiteSpace(record.OfficialType)) row.OfficialType = record.OfficialType; + if (row.YearFrom is null && record.YearFrom is not null) row.YearFrom = record.YearFrom; + if (row.YearTo is null && record.YearTo is not null) row.YearTo = record.YearTo; + if (row.PowerPs is null && record.PowerPs is not null) row.PowerPs = record.PowerPs; + if (row.PowerKw is null && record.PowerKw is not null) row.PowerKw = record.PowerKw; + if (row.DisplacementCcm is null && record.DisplacementCcm is not null) row.DisplacementCcm = record.DisplacementCcm; + if (string.IsNullOrWhiteSpace(row.FuelType) && !string.IsNullOrWhiteSpace(record.FuelType)) row.FuelType = record.FuelType; + if (string.IsNullOrWhiteSpace(row.MatchKey) && !string.IsNullOrWhiteSpace(record.MatchKey)) row.MatchKey = record.MatchKey; + } + } + + return map; +} + +static string QuoteQualifiedTableName(string raw) +{ + if (string.IsNullOrWhiteSpace(raw)) + { + return string.Empty; + } + + var parts = raw.Split('.', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + if (parts.Length == 0 || parts.Length > 2) + { + return string.Empty; + } + + foreach (var part in parts) + { + if (!Regex.IsMatch(part, @"^[A-Za-z_][A-Za-z0-9_]*$")) + { + return string.Empty; + } + } + + return string.Join(".", parts.Select(p => $"\"{p}\"")); +} + +async Task EnsureVehicleTableAsync(NpgsqlConnection conn, string quotedTable) +{ + var sql = $@" +CREATE TABLE IF NOT EXISTS {quotedTable} ( + hsn_tsn text PRIMARY KEY, + hsn text NOT NULL, + tsn text NOT NULL, + brand text NULL, + vehicle_type text NULL, + model text NULL, + official_type text NULL, + year_to integer NULL, + year_from integer NULL, + power_ps integer NULL, + power_kw integer NULL, + displacement_ccm integer NULL, + fuel_type text NULL, + match_key text NULL +);"; + await using var cmd = new NpgsqlCommand(sql, conn); + await cmd.ExecuteNonQueryAsync(); +} + +async Task BulkUpsertVehicleRowsAsync(NpgsqlConnection conn, string quotedTable, IEnumerable rows) +{ + await using var tx = await conn.BeginTransactionAsync(); + var sql = $@" +INSERT INTO {quotedTable} AS t (hsn_tsn, hsn, tsn, brand, vehicle_type, model, official_type, year_to, year_from, power_ps, power_kw, displacement_ccm, fuel_type, match_key) +VALUES (@hsn_tsn, @hsn, @tsn, @brand, @vehicle_type, @model, @official_type, @year_to, @year_from, @power_ps, @power_kw, @displacement_ccm, @fuel_type, @match_key) +ON CONFLICT (hsn_tsn) DO UPDATE +SET hsn = COALESCE(NULLIF(t.hsn, ''), EXCLUDED.hsn), + tsn = COALESCE(NULLIF(t.tsn, ''), EXCLUDED.tsn), + brand = COALESCE(NULLIF(t.brand, ''), EXCLUDED.brand), + vehicle_type = COALESCE(NULLIF(t.vehicle_type, ''), EXCLUDED.vehicle_type), + model = COALESCE(NULLIF(t.model, ''), EXCLUDED.model), + official_type = COALESCE(NULLIF(t.official_type, ''), EXCLUDED.official_type), + year_to = COALESCE(t.year_to, EXCLUDED.year_to), + year_from = COALESCE(t.year_from, EXCLUDED.year_from), + power_ps = COALESCE(t.power_ps, EXCLUDED.power_ps), + power_kw = COALESCE(t.power_kw, EXCLUDED.power_kw), + displacement_ccm = COALESCE(t.displacement_ccm, EXCLUDED.displacement_ccm), + fuel_type = COALESCE(NULLIF(t.fuel_type, ''), EXCLUDED.fuel_type), + match_key = COALESCE(NULLIF(t.match_key, ''), EXCLUDED.match_key);"; + + await using var cmd = new NpgsqlCommand(sql, conn, tx); + cmd.Parameters.Add(new NpgsqlParameter("hsn_tsn", NpgsqlTypes.NpgsqlDbType.Text)); + cmd.Parameters.Add(new NpgsqlParameter("hsn", NpgsqlTypes.NpgsqlDbType.Text)); + cmd.Parameters.Add(new NpgsqlParameter("tsn", NpgsqlTypes.NpgsqlDbType.Text)); + cmd.Parameters.Add(new NpgsqlParameter("brand", NpgsqlTypes.NpgsqlDbType.Text)); + cmd.Parameters.Add(new NpgsqlParameter("vehicle_type", NpgsqlTypes.NpgsqlDbType.Text)); + cmd.Parameters.Add(new NpgsqlParameter("model", NpgsqlTypes.NpgsqlDbType.Text)); + cmd.Parameters.Add(new NpgsqlParameter("official_type", NpgsqlTypes.NpgsqlDbType.Text)); + cmd.Parameters.Add(new NpgsqlParameter("year_to", NpgsqlTypes.NpgsqlDbType.Integer)); + cmd.Parameters.Add(new NpgsqlParameter("year_from", NpgsqlTypes.NpgsqlDbType.Integer)); + cmd.Parameters.Add(new NpgsqlParameter("power_ps", NpgsqlTypes.NpgsqlDbType.Integer)); + cmd.Parameters.Add(new NpgsqlParameter("power_kw", NpgsqlTypes.NpgsqlDbType.Integer)); + cmd.Parameters.Add(new NpgsqlParameter("displacement_ccm", NpgsqlTypes.NpgsqlDbType.Integer)); + cmd.Parameters.Add(new NpgsqlParameter("fuel_type", NpgsqlTypes.NpgsqlDbType.Text)); + cmd.Parameters.Add(new NpgsqlParameter("match_key", NpgsqlTypes.NpgsqlDbType.Text)); + + foreach (var row in rows) + { + cmd.Parameters["hsn_tsn"].Value = row.HsnTsn; + cmd.Parameters["hsn"].Value = row.Hsn ?? string.Empty; + cmd.Parameters["tsn"].Value = row.Tsn ?? string.Empty; + cmd.Parameters["brand"].Value = string.IsNullOrWhiteSpace(row.Brand) ? DBNull.Value : row.Brand; + cmd.Parameters["vehicle_type"].Value = string.IsNullOrWhiteSpace(row.VehicleType) ? DBNull.Value : row.VehicleType; + cmd.Parameters["model"].Value = string.IsNullOrWhiteSpace(row.Model) ? DBNull.Value : row.Model; + cmd.Parameters["official_type"].Value = string.IsNullOrWhiteSpace(row.OfficialType) ? DBNull.Value : row.OfficialType; + cmd.Parameters["year_to"].Value = row.YearTo.HasValue ? row.YearTo.Value : DBNull.Value; + cmd.Parameters["year_from"].Value = row.YearFrom.HasValue ? row.YearFrom.Value : DBNull.Value; + cmd.Parameters["power_ps"].Value = row.PowerPs.HasValue ? row.PowerPs.Value : DBNull.Value; + cmd.Parameters["power_kw"].Value = row.PowerKw.HasValue ? row.PowerKw.Value : DBNull.Value; + cmd.Parameters["displacement_ccm"].Value = row.DisplacementCcm.HasValue ? row.DisplacementCcm.Value : DBNull.Value; + cmd.Parameters["fuel_type"].Value = string.IsNullOrWhiteSpace(row.FuelType) ? DBNull.Value : row.FuelType; + cmd.Parameters["match_key"].Value = string.IsNullOrWhiteSpace(row.MatchKey) ? DBNull.Value : row.MatchKey; + await cmd.ExecuteNonQueryAsync(); + } + + await tx.CommitAsync(); +} + +async Task> LoadVehicleRowsNeedingEnrichmentAsync(NpgsqlConnection conn, string quotedTable) +{ + var sql = $@" +SELECT hsn_tsn, hsn, tsn, brand, vehicle_type, model, official_type, year_to, year_from, power_ps, power_kw, displacement_ccm, fuel_type, match_key +FROM {quotedTable} +WHERE hsn IS NOT NULL AND hsn <> '' + AND ( + brand IS NULL OR brand = '' OR + vehicle_type IS NULL OR vehicle_type = '' OR + model IS NULL OR model = '' OR + power_ps IS NULL OR + power_kw IS NULL OR + displacement_ccm IS NULL OR + fuel_type IS NULL OR fuel_type = '' OR + match_key IS NULL OR match_key = '' OR + year_to IS NULL OR + year_from IS NULL + ) +ORDER BY hsn, tsn;"; + + var list = new List(); + await using var cmd = new NpgsqlCommand(sql, conn); + await using var reader = await cmd.ExecuteReaderAsync(); + while (await reader.ReadAsync()) + { + list.Add(new DbVehicleRow + { + HsnTsn = reader.GetString(0), + Hsn = reader.IsDBNull(1) ? string.Empty : reader.GetString(1), + Tsn = reader.IsDBNull(2) ? string.Empty : reader.GetString(2), + Brand = reader.IsDBNull(3) ? string.Empty : reader.GetString(3), + VehicleType = reader.IsDBNull(4) ? string.Empty : reader.GetString(4), + Model = reader.IsDBNull(5) ? string.Empty : reader.GetString(5), + OfficialType = reader.IsDBNull(6) ? string.Empty : reader.GetString(6), + YearTo = reader.IsDBNull(7) ? null : reader.GetInt32(7), + YearFrom = reader.IsDBNull(8) ? null : reader.GetInt32(8), + PowerPs = reader.IsDBNull(9) ? null : reader.GetInt32(9), + PowerKw = reader.IsDBNull(10) ? null : reader.GetInt32(10), + DisplacementCcm = reader.IsDBNull(11) ? null : reader.GetInt32(11), + FuelType = reader.IsDBNull(12) ? string.Empty : reader.GetString(12), + MatchKey = reader.IsDBNull(13) ? string.Empty : reader.GetString(13) + }); + } + + return list; +} + async Task RunRepairYearsMode(HsnTsnClient hsnTsnClient, string? inputPath, string? outputPath) { var inputCsvPath = string.IsNullOrWhiteSpace(inputPath) ? "hsntsn.csv" : inputPath; @@ -113,13 +957,19 @@ async Task RunRepairYearsMode(HsnTsnClient hsnTsnClient, string? inputPath, stri Console.Error.WriteLine($"[info] Repair finished. {processed}/{totalRecords}, updated={updated}, failed={failed}"); } -async Task RunScrapeMode(HsnTsnClient hsnTsnClient, bool includeDetailPages) +async Task RunScrapeMode(HsnTsnClient hsnTsnClient, bool includeDetailPages, string scrapeSource) { var written = new HashSet(StringComparer.OrdinalIgnoreCase); var processed = 0; var failed = 0; - Console.Error.WriteLine($"[info] Scrape started. includeDetails={includeDetailPages}"); + if (scrapeSource is not ("hsntsn" or "autoampel")) + { + Console.Error.WriteLine($"[error] Unknown --source value: {scrapeSource}. Use 'hsntsn' or 'autoampel'."); + return; + } + + Console.Error.WriteLine($"[info] Scrape started. source={scrapeSource}, includeDetails={includeDetailPages}"); await using var csvWriter = new CsvWriter(Console.Out, new CsvConfiguration(CultureInfo.InvariantCulture) { @@ -130,7 +980,39 @@ async Task RunScrapeMode(HsnTsnClient hsnTsnClient, bool includeDetailPages) await csvWriter.NextRecordAsync(); await csvWriter.FlushAsync(); - if (Console.IsInputRedirected) + if (scrapeSource == "autoampel") + { + if (Console.IsInputRedirected) + { + Console.Error.WriteLine("[error] --source autoampel does not support stdin query mode. Run without stdin redirection."); + return; + } + + var pageUrl = hsnTsnClient.GetAutoampelFullListUrl(); + while (!string.IsNullOrWhiteSpace(pageUrl)) + { + Console.Error.WriteLine($"[info] Processing: {pageUrl}"); + AutoampelPageResult pageResult; + try + { + pageResult = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl); + } + catch (Exception ex) + { + failed++; + Console.Error.WriteLine($"[warn] Autoampel page failed: {pageUrl} -> {ex.Message}"); + break; + } + + foreach (var vehicle in pageResult.Vehicles) + { + await WriteVehicleIfNew(vehicle); + } + + pageUrl = pageResult.NextPageUrl; + } + } + else if (Console.IsInputRedirected) { await foreach (var query in ReadInput()) { @@ -337,3 +1219,32 @@ string BuildMatchKey(HsnTsnVehicle vehicle) normalized = Regex.Replace(normalized, @"[^A-Z0-9]+", " ").Trim(); return Regex.Replace(normalized, @"\s+", " "); } + +public sealed class CoreOutputRow +{ + public string HsnTsn { get; set; } = string.Empty; + public string Hsn { get; set; } = string.Empty; + public string Tsn { get; set; } = string.Empty; + public string Brand { get; set; } = string.Empty; + public string Model { get; set; } = string.Empty; + public int? YearTo { get; set; } + public int? YearFrom { get; set; } +} + +public sealed class DbVehicleRow +{ + public string HsnTsn { get; set; } = string.Empty; + public string Hsn { get; set; } = string.Empty; + public string Tsn { get; set; } = string.Empty; + public string Brand { get; set; } = string.Empty; + public string VehicleType { get; set; } = string.Empty; + public string Model { get; set; } = string.Empty; + public string OfficialType { get; set; } = string.Empty; + public int? YearFrom { get; set; } + public int? YearTo { get; set; } + public int? PowerPs { get; set; } + public int? PowerKw { get; set; } + public int? DisplacementCcm { get; set; } + public string FuelType { get; set; } = string.Empty; + public string MatchKey { get; set; } = string.Empty; +}