|
|
|
@@ -4,23 +4,867 @@ using System.Text.RegularExpressions;
|
|
|
|
|
using CsvHelper;
|
|
|
|
|
using CsvHelper.Configuration;
|
|
|
|
|
using HsnTsnScraper;
|
|
|
|
|
using Npgsql;
|
|
|
|
|
|
|
|
|
|
var repairYears = args.Contains("--repair-years", StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
var mergeCore = args.Contains("--merge-core", StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
var mergeCoreDb = args.Contains("--merge-core-db", StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
var includeDetails = args.Contains("--include-details", StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
var inputCsv = GetOptionValue(args, "--input-csv");
|
|
|
|
|
var outputCsv = GetOptionValue(args, "--output-csv");
|
|
|
|
|
var pgConnection = GetOptionValue(args, "--pg-connection");
|
|
|
|
|
var pgTable = GetOptionValue(args, "--pg-table") ?? "public.hsntsn_vehicle";
|
|
|
|
|
var source = (GetOptionValue(args, "--source") ?? "hsntsn").Trim().ToLowerInvariant();
|
|
|
|
|
|
|
|
|
|
using var client = new HsnTsnClient();
|
|
|
|
|
|
|
|
|
|
if (mergeCore)
|
|
|
|
|
{
|
|
|
|
|
await RunMergeCoreMode(client, inputCsv, outputCsv);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (mergeCoreDb)
|
|
|
|
|
{
|
|
|
|
|
await RunMergeCoreDbMode(client, inputCsv, pgConnection, pgTable);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (repairYears)
|
|
|
|
|
{
|
|
|
|
|
await RunRepairYearsMode(client, inputCsv, outputCsv);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await RunScrapeMode(client, includeDetails);
|
|
|
|
|
await RunScrapeMode(client, includeDetails, source);
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
async Task RunMergeCoreMode(HsnTsnClient hsnTsnClient, string? inputPath, string? outputPath)
|
|
|
|
|
{
|
|
|
|
|
var inputCsvPath = string.IsNullOrWhiteSpace(inputPath) ? "hsntsn.csv" : inputPath;
|
|
|
|
|
var outputCsvPath = string.IsNullOrWhiteSpace(outputPath) ? "hsntsn.core.csv" : outputPath;
|
|
|
|
|
|
|
|
|
|
if (Path.GetFullPath(inputCsvPath).Equals(Path.GetFullPath(outputCsvPath), StringComparison.OrdinalIgnoreCase))
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine("[error] --input-csv and --output-csv cannot point to the same file.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Console.Error.WriteLine($"[info] Merge-core mode started. input={inputCsvPath}, output={outputCsvPath}");
|
|
|
|
|
|
|
|
|
|
var map = new Dictionary<string, CoreOutputRow>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
|
|
|
|
|
await using (var inputStream = File.OpenRead(inputCsvPath))
|
|
|
|
|
using (var inputReader = new StreamReader(inputStream))
|
|
|
|
|
using (var csvReader = new CsvReader(inputReader, new CsvConfiguration(CultureInfo.InvariantCulture)
|
|
|
|
|
{
|
|
|
|
|
Delimiter = ";",
|
|
|
|
|
MissingFieldFound = null,
|
|
|
|
|
HeaderValidated = null
|
|
|
|
|
}))
|
|
|
|
|
{
|
|
|
|
|
await csvReader.ReadAsync();
|
|
|
|
|
csvReader.ReadHeader();
|
|
|
|
|
|
|
|
|
|
await foreach (var record in csvReader.GetRecordsAsync<HsnTsnVehicle>())
|
|
|
|
|
{
|
|
|
|
|
var key = record.HsnTsn?.Trim();
|
|
|
|
|
if (string.IsNullOrWhiteSpace(key))
|
|
|
|
|
{
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!map.TryGetValue(key, out var row))
|
|
|
|
|
{
|
|
|
|
|
row = new CoreOutputRow
|
|
|
|
|
{
|
|
|
|
|
HsnTsn = key,
|
|
|
|
|
Hsn = record.Hsn,
|
|
|
|
|
Tsn = record.Tsn,
|
|
|
|
|
Brand = record.Brand,
|
|
|
|
|
Model = !string.IsNullOrWhiteSpace(record.Model) ? record.Model : record.VehicleType,
|
|
|
|
|
YearFrom = record.YearFrom,
|
|
|
|
|
YearTo = record.YearTo
|
|
|
|
|
};
|
|
|
|
|
map[key] = row;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Hsn) && !string.IsNullOrWhiteSpace(record.Hsn)) row.Hsn = record.Hsn;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Tsn) && !string.IsNullOrWhiteSpace(record.Tsn)) row.Tsn = record.Tsn;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Brand) && !string.IsNullOrWhiteSpace(record.Brand)) row.Brand = record.Brand;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Model) && !string.IsNullOrWhiteSpace(record.Model)) row.Model = record.Model;
|
|
|
|
|
if (row.YearFrom is null && record.YearFrom is not null) row.YearFrom = record.YearFrom;
|
|
|
|
|
if (row.YearTo is null && record.YearTo is not null) row.YearTo = record.YearTo;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var byHsn = map.Values
|
|
|
|
|
.Where(x => !string.IsNullOrWhiteSpace(x.Hsn))
|
|
|
|
|
.GroupBy(x => x.Hsn, StringComparer.OrdinalIgnoreCase)
|
|
|
|
|
.OrderBy(x => x.Key, StringComparer.OrdinalIgnoreCase)
|
|
|
|
|
.ToList();
|
|
|
|
|
|
|
|
|
|
var hsnIndex = 0;
|
|
|
|
|
var filledFromHsnTsn = 0;
|
|
|
|
|
var filledFromAutoampel = 0;
|
|
|
|
|
var failed = 0;
|
|
|
|
|
Dictionary<string, HsnTsnVehicle>? autoampelIndex = null;
|
|
|
|
|
|
|
|
|
|
foreach (var hsnGroup in byHsn)
|
|
|
|
|
{
|
|
|
|
|
hsnIndex++;
|
|
|
|
|
var hsn = hsnGroup.Key;
|
|
|
|
|
Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: checking hsn-tsn.de");
|
|
|
|
|
var yearMissingBeforeHsnTsn = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null);
|
|
|
|
|
|
|
|
|
|
Dictionary<string, HsnTsnVehicle> fromHsnTsn;
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
fromHsnTsn = (await hsnTsnClient.GetVehiclesByHsnFromHsnTsnAsync(hsn))
|
|
|
|
|
.GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase)
|
|
|
|
|
.Select(g => g.First())
|
|
|
|
|
.ToDictionary(v => v.HsnTsn, v => v, StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
failed++;
|
|
|
|
|
Console.Error.WriteLine($"[warn] HSN {hsn} failed on hsn-tsn.de -> {ex.Message}");
|
|
|
|
|
fromHsnTsn = new Dictionary<string, HsnTsnVehicle>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach (var row in hsnGroup)
|
|
|
|
|
{
|
|
|
|
|
if (fromHsnTsn.TryGetValue(row.HsnTsn, out var sourceRow))
|
|
|
|
|
{
|
|
|
|
|
filledFromHsnTsn += FillCoreFields(row, sourceRow, fillYears: false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var matchedHsnTsn = hsnGroup.Count(r => fromHsnTsn.ContainsKey(r.HsnTsn));
|
|
|
|
|
var yearMissingAfterHsnTsn = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null);
|
|
|
|
|
var yearFilledByHsnTsn = Math.Max(0, yearMissingBeforeHsnTsn - yearMissingAfterHsnTsn);
|
|
|
|
|
Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: hsn-tsn matched={matchedHsnTsn}, year_filled={yearFilledByHsnTsn}, still_missing={yearMissingAfterHsnTsn}");
|
|
|
|
|
|
|
|
|
|
var needsAutoampel = hsnGroup.Any(r => r.YearFrom is null || r.YearTo is null || string.IsNullOrWhiteSpace(r.Brand) || string.IsNullOrWhiteSpace(r.Model));
|
|
|
|
|
if (!needsAutoampel)
|
|
|
|
|
{
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: checking autoampel.de");
|
|
|
|
|
var yearMissingBeforeAutoampel = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null);
|
|
|
|
|
|
|
|
|
|
if (autoampelIndex is null)
|
|
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
autoampelIndex = await BuildAutoampelIndexByHsnTsnAsync(hsnTsnClient);
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
failed++;
|
|
|
|
|
Console.Error.WriteLine($"[warn] autoampel index build failed -> {ex.Message}");
|
|
|
|
|
autoampelIndex = new Dictionary<string, HsnTsnVehicle>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach (var row in hsnGroup)
|
|
|
|
|
{
|
|
|
|
|
if (autoampelIndex.TryGetValue(row.HsnTsn, out var sourceRow))
|
|
|
|
|
{
|
|
|
|
|
filledFromAutoampel += FillCoreFields(row, sourceRow, fillYears: true);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var matchedAutoampel = hsnGroup.Count(r => autoampelIndex.ContainsKey(r.HsnTsn));
|
|
|
|
|
var yearMissingAfterAutoampel = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null);
|
|
|
|
|
var yearFilledByAutoampel = Math.Max(0, yearMissingBeforeAutoampel - yearMissingAfterAutoampel);
|
|
|
|
|
Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: autoampel matched={matchedAutoampel}, year_filled={yearFilledByAutoampel}, still_missing={yearMissingAfterAutoampel}");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await using var outputStream = File.Create(outputCsvPath);
|
|
|
|
|
await using var outputWriter = new StreamWriter(outputStream);
|
|
|
|
|
await using var csvWriter = new CsvWriter(outputWriter, new CsvConfiguration(CultureInfo.InvariantCulture)
|
|
|
|
|
{
|
|
|
|
|
Delimiter = ";"
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
csvWriter.WriteHeader<CoreOutputRow>();
|
|
|
|
|
await csvWriter.NextRecordAsync();
|
|
|
|
|
|
|
|
|
|
foreach (var row in map.Values
|
|
|
|
|
.OrderBy(x => x.Hsn, StringComparer.OrdinalIgnoreCase)
|
|
|
|
|
.ThenBy(x => x.Tsn, StringComparer.OrdinalIgnoreCase))
|
|
|
|
|
{
|
|
|
|
|
csvWriter.WriteRecord(row);
|
|
|
|
|
await csvWriter.NextRecordAsync();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await csvWriter.FlushAsync();
|
|
|
|
|
Console.Error.WriteLine($"[info] Merge-core finished. rows={map.Count}, filled_hsntsn={filledFromHsnTsn}, filled_autoampel={filledFromAutoampel}, failed={failed}");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async Task RunMergeCoreDbMode(HsnTsnClient hsnTsnClient, string? inputPath, string? connectionStringArg, string tableName)
|
|
|
|
|
{
|
|
|
|
|
var inputCsvPath = string.IsNullOrWhiteSpace(inputPath) ? string.Empty : inputPath;
|
|
|
|
|
var connectionString = string.IsNullOrWhiteSpace(connectionStringArg)
|
|
|
|
|
? Environment.GetEnvironmentVariable("HSNTSN_PG")
|
|
|
|
|
: connectionStringArg;
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(connectionString))
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine("[error] Missing PostgreSQL connection. Use --pg-connection or HSNTSN_PG env var.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var quotedTable = QuoteQualifiedTableName(tableName);
|
|
|
|
|
if (string.IsNullOrWhiteSpace(quotedTable))
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine($"[error] Invalid --pg-table value: {tableName}");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(inputCsvPath))
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine($"[info] Merge-core-db started. input=<none>, table={tableName}");
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine($"[info] Merge-core-db started. input={inputCsvPath}, table={tableName}");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Dictionary<string, DbVehicleRow> map;
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(inputCsvPath) && File.Exists(inputCsvPath))
|
|
|
|
|
{
|
|
|
|
|
map = await ReadDbRowsFromCsvAsync(inputCsvPath);
|
|
|
|
|
Console.Error.WriteLine($"[info] Seed source: CSV ({map.Count} unique HsnTsn)");
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(inputCsvPath))
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine($"[warn] Input CSV not found: {inputCsvPath}. Falling back to live scrape.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
map = await BuildSeedRowsFromHsnTsnAsync(hsnTsnClient);
|
|
|
|
|
Console.Error.WriteLine($"[info] Seed source: hsn-tsn.de live scrape ({map.Count} unique HsnTsn)");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (map.Count == 0)
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine("[warn] No seed data collected.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await using var conn = new NpgsqlConnection(connectionString);
|
|
|
|
|
await conn.OpenAsync();
|
|
|
|
|
await EnsureVehicleTableAsync(conn, quotedTable);
|
|
|
|
|
|
|
|
|
|
await BulkUpsertVehicleRowsAsync(conn, quotedTable, map.Values);
|
|
|
|
|
Console.Error.WriteLine($"[info] Seed upsert finished. rows={map.Count}");
|
|
|
|
|
|
|
|
|
|
var rowsToEnrich = await LoadVehicleRowsNeedingEnrichmentAsync(conn, quotedTable);
|
|
|
|
|
var byHsn = rowsToEnrich
|
|
|
|
|
.Where(x => !string.IsNullOrWhiteSpace(x.Hsn))
|
|
|
|
|
.GroupBy(x => x.Hsn, StringComparer.OrdinalIgnoreCase)
|
|
|
|
|
.OrderBy(x => x.Key, StringComparer.OrdinalIgnoreCase)
|
|
|
|
|
.ToList();
|
|
|
|
|
|
|
|
|
|
var hsnIndex = 0;
|
|
|
|
|
var filledFromHsnTsn = 0;
|
|
|
|
|
var failed = 0;
|
|
|
|
|
|
|
|
|
|
foreach (var hsnGroup in byHsn)
|
|
|
|
|
{
|
|
|
|
|
hsnIndex++;
|
|
|
|
|
var hsn = hsnGroup.Key;
|
|
|
|
|
Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: checking hsn-tsn.de");
|
|
|
|
|
var yearMissingBeforeHsnTsn = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null);
|
|
|
|
|
|
|
|
|
|
Dictionary<string, HsnTsnVehicle> fromHsnTsn;
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
fromHsnTsn = (await hsnTsnClient.GetVehiclesByHsnFromHsnTsnAsync(hsn))
|
|
|
|
|
.GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase)
|
|
|
|
|
.Select(g => g.First())
|
|
|
|
|
.ToDictionary(v => v.HsnTsn, v => v, StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
failed++;
|
|
|
|
|
Console.Error.WriteLine($"[warn] HSN {hsn} failed on hsn-tsn.de -> {ex.Message}");
|
|
|
|
|
fromHsnTsn = new Dictionary<string, HsnTsnVehicle>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach (var row in hsnGroup)
|
|
|
|
|
{
|
|
|
|
|
if (fromHsnTsn.TryGetValue(row.HsnTsn, out var sourceRow))
|
|
|
|
|
{
|
|
|
|
|
filledFromHsnTsn += FillDbFields(row, sourceRow);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var matchedHsnTsn = hsnGroup.Count(r => fromHsnTsn.ContainsKey(r.HsnTsn));
|
|
|
|
|
var yearMissingAfterHsnTsn = hsnGroup.Count(r => r.YearFrom is null || r.YearTo is null);
|
|
|
|
|
var yearFilledByHsnTsn = Math.Max(0, yearMissingBeforeHsnTsn - yearMissingAfterHsnTsn);
|
|
|
|
|
Console.Error.WriteLine($"[info] [{hsnIndex}/{byHsn.Count}] HSN {hsn}: hsn-tsn matched={matchedHsnTsn}, year_filled={yearFilledByHsnTsn}, still_missing={yearMissingAfterHsnTsn}");
|
|
|
|
|
|
|
|
|
|
var needsAutoampel = hsnGroup.Any(NeedsAutoampelLookup);
|
|
|
|
|
if (!needsAutoampel)
|
|
|
|
|
{
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach (var row in rowsToEnrich)
|
|
|
|
|
{
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.MatchKey))
|
|
|
|
|
{
|
|
|
|
|
row.MatchKey = BuildMatchKey(new HsnTsnVehicle
|
|
|
|
|
{
|
|
|
|
|
Brand = row.Brand,
|
|
|
|
|
VehicleType = row.VehicleType,
|
|
|
|
|
Model = row.Model,
|
|
|
|
|
OfficialType = row.OfficialType,
|
|
|
|
|
PowerKw = row.PowerKw,
|
|
|
|
|
DisplacementCcm = row.DisplacementCcm,
|
|
|
|
|
FuelType = row.FuelType
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await BulkUpsertVehicleRowsAsync(conn, quotedTable, rowsToEnrich);
|
|
|
|
|
var autoampelUpserted = 0;
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
autoampelUpserted = await UpsertAutoampelPagesIntoDbAsync(hsnTsnClient, conn, quotedTable);
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
failed++;
|
|
|
|
|
Console.Error.WriteLine($"[warn] autoampel page-upsert failed -> {ex.Message}");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Console.Error.WriteLine($"[info] Merge-core-db finished. checked={rowsToEnrich.Count}, filled_hsntsn={filledFromHsnTsn}, autoampel_upserted={autoampelUpserted}, failed={failed}");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async Task<Dictionary<string, DbVehicleRow>> BuildSeedRowsFromHsnTsnAsync(HsnTsnClient hsnTsnClient)
|
|
|
|
|
{
|
|
|
|
|
var map = new Dictionary<string, DbVehicleRow>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
|
|
|
|
|
IReadOnlyList<string> brandUrls;
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
brandUrls = await hsnTsnClient.GetBrandPageUrls();
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine($"[error] Could not fetch brand urls for DB seed: {ex.Message}");
|
|
|
|
|
return map;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var pageIndex = 0;
|
|
|
|
|
foreach (var url in brandUrls)
|
|
|
|
|
{
|
|
|
|
|
pageIndex++;
|
|
|
|
|
Console.Error.WriteLine($"[info] [seed {pageIndex}/{brandUrls.Count}] {url}");
|
|
|
|
|
IReadOnlyList<HsnTsnVehicle> vehicles;
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
vehicles = await hsnTsnClient.GetVehiclesFromBrandPageAsync(url);
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine($"[warn] Seed page failed: {url} -> {ex.Message}");
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach (var v in vehicles)
|
|
|
|
|
{
|
|
|
|
|
if (!map.TryGetValue(v.HsnTsn, out var row))
|
|
|
|
|
{
|
|
|
|
|
row = new DbVehicleRow
|
|
|
|
|
{
|
|
|
|
|
HsnTsn = v.HsnTsn,
|
|
|
|
|
Hsn = v.Hsn,
|
|
|
|
|
Tsn = v.Tsn,
|
|
|
|
|
Brand = v.Brand,
|
|
|
|
|
VehicleType = v.VehicleType,
|
|
|
|
|
Model = v.Model,
|
|
|
|
|
OfficialType = v.OfficialType,
|
|
|
|
|
YearFrom = v.YearFrom,
|
|
|
|
|
YearTo = v.YearTo,
|
|
|
|
|
PowerPs = v.PowerPs,
|
|
|
|
|
PowerKw = v.PowerKw,
|
|
|
|
|
DisplacementCcm = v.DisplacementCcm,
|
|
|
|
|
FuelType = v.FuelType,
|
|
|
|
|
MatchKey = v.MatchKey
|
|
|
|
|
};
|
|
|
|
|
map[v.HsnTsn] = row;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Hsn) && !string.IsNullOrWhiteSpace(v.Hsn)) row.Hsn = v.Hsn;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Tsn) && !string.IsNullOrWhiteSpace(v.Tsn)) row.Tsn = v.Tsn;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Brand) && !string.IsNullOrWhiteSpace(v.Brand)) row.Brand = v.Brand;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.VehicleType) && !string.IsNullOrWhiteSpace(v.VehicleType)) row.VehicleType = v.VehicleType;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Model) && !string.IsNullOrWhiteSpace(v.Model)) row.Model = v.Model;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.OfficialType) && !string.IsNullOrWhiteSpace(v.OfficialType)) row.OfficialType = v.OfficialType;
|
|
|
|
|
if (row.YearFrom is null && v.YearFrom is not null) row.YearFrom = v.YearFrom;
|
|
|
|
|
if (row.YearTo is null && v.YearTo is not null) row.YearTo = v.YearTo;
|
|
|
|
|
if (row.PowerPs is null && v.PowerPs is not null) row.PowerPs = v.PowerPs;
|
|
|
|
|
if (row.PowerKw is null && v.PowerKw is not null) row.PowerKw = v.PowerKw;
|
|
|
|
|
if (row.DisplacementCcm is null && v.DisplacementCcm is not null) row.DisplacementCcm = v.DisplacementCcm;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.FuelType) && !string.IsNullOrWhiteSpace(v.FuelType)) row.FuelType = v.FuelType;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.MatchKey) && !string.IsNullOrWhiteSpace(v.MatchKey)) row.MatchKey = v.MatchKey;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return map;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async Task<Dictionary<string, HsnTsnVehicle>> BuildAutoampelIndexByHsnTsnAsync(HsnTsnClient hsnTsnClient)
|
|
|
|
|
{
|
|
|
|
|
var index = new Dictionary<string, HsnTsnVehicle>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
var pageUrl = hsnTsnClient.GetAutoampelFullListUrl();
|
|
|
|
|
|
|
|
|
|
while (!string.IsNullOrWhiteSpace(pageUrl))
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine($"[info] Processing: {pageUrl}");
|
|
|
|
|
var page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl);
|
|
|
|
|
foreach (var vehicle in page.Vehicles)
|
|
|
|
|
{
|
|
|
|
|
if (!index.ContainsKey(vehicle.HsnTsn))
|
|
|
|
|
{
|
|
|
|
|
index[vehicle.HsnTsn] = vehicle;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pageUrl = page.NextPageUrl;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Console.Error.WriteLine($"[info] Autoampel index ready. unique={index.Count}");
|
|
|
|
|
return index;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async Task<int> UpsertAutoampelPagesIntoDbAsync(HsnTsnClient hsnTsnClient, NpgsqlConnection conn, string quotedTable)
|
|
|
|
|
{
|
|
|
|
|
var pageUrl = hsnTsnClient.GetAutoampelFullListUrl();
|
|
|
|
|
var pageIndex = 0;
|
|
|
|
|
var totalUpserted = 0;
|
|
|
|
|
|
|
|
|
|
while (!string.IsNullOrWhiteSpace(pageUrl))
|
|
|
|
|
{
|
|
|
|
|
pageIndex++;
|
|
|
|
|
Console.Error.WriteLine($"[info] Processing: {pageUrl}");
|
|
|
|
|
var page = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl);
|
|
|
|
|
var rows = page.Vehicles
|
|
|
|
|
.GroupBy(v => v.HsnTsn, StringComparer.OrdinalIgnoreCase)
|
|
|
|
|
.Select(g => g.First())
|
|
|
|
|
.Select(v => new DbVehicleRow
|
|
|
|
|
{
|
|
|
|
|
HsnTsn = v.HsnTsn,
|
|
|
|
|
Hsn = v.Hsn,
|
|
|
|
|
Tsn = v.Tsn,
|
|
|
|
|
Brand = v.Brand,
|
|
|
|
|
VehicleType = v.VehicleType,
|
|
|
|
|
Model = !string.IsNullOrWhiteSpace(v.Model) ? v.Model : DeriveModel(v),
|
|
|
|
|
OfficialType = v.OfficialType,
|
|
|
|
|
YearFrom = v.YearFrom,
|
|
|
|
|
YearTo = v.YearTo,
|
|
|
|
|
PowerPs = v.PowerPs,
|
|
|
|
|
PowerKw = v.PowerKw,
|
|
|
|
|
DisplacementCcm = v.DisplacementCcm,
|
|
|
|
|
FuelType = v.FuelType,
|
|
|
|
|
MatchKey = !string.IsNullOrWhiteSpace(v.MatchKey) ? v.MatchKey : BuildMatchKey(v)
|
|
|
|
|
})
|
|
|
|
|
.ToList();
|
|
|
|
|
|
|
|
|
|
if (rows.Count > 0)
|
|
|
|
|
{
|
|
|
|
|
await BulkUpsertVehicleRowsAsync(conn, quotedTable, rows);
|
|
|
|
|
totalUpserted += rows.Count;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Console.Error.WriteLine($"[info] Autoampel page {pageIndex} upserted={rows.Count}, total={totalUpserted}");
|
|
|
|
|
pageUrl = page.NextPageUrl;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return totalUpserted;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int FillCoreFields(CoreOutputRow target, HsnTsnVehicle source, bool fillYears)
|
|
|
|
|
{
|
|
|
|
|
var changes = 0;
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(target.Brand) && !string.IsNullOrWhiteSpace(source.Brand))
|
|
|
|
|
{
|
|
|
|
|
target.Brand = source.Brand;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(target.Model))
|
|
|
|
|
{
|
|
|
|
|
var model = DeriveModel(source);
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(model))
|
|
|
|
|
{
|
|
|
|
|
target.Model = model;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (fillYears)
|
|
|
|
|
{
|
|
|
|
|
if (target.YearFrom is null && source.YearFrom is not null)
|
|
|
|
|
{
|
|
|
|
|
target.YearFrom = source.YearFrom;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (target.YearTo is null && source.YearTo is not null)
|
|
|
|
|
{
|
|
|
|
|
target.YearTo = source.YearTo;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return changes;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string DeriveModel(HsnTsnVehicle vehicle)
|
|
|
|
|
{
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(vehicle.Model))
|
|
|
|
|
{
|
|
|
|
|
return vehicle.Model;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var raw = vehicle.VehicleType?.Trim() ?? string.Empty;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(raw))
|
|
|
|
|
{
|
|
|
|
|
return string.Empty;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var brand = vehicle.Brand?.Trim() ?? string.Empty;
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(brand) && raw.StartsWith(brand + " ", StringComparison.OrdinalIgnoreCase))
|
|
|
|
|
{
|
|
|
|
|
return raw[(brand.Length + 1)..].Trim();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return raw;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool NeedsAutoampelLookup(DbVehicleRow row)
|
|
|
|
|
{
|
|
|
|
|
return string.IsNullOrWhiteSpace(row.Brand)
|
|
|
|
|
|| string.IsNullOrWhiteSpace(row.VehicleType)
|
|
|
|
|
|| string.IsNullOrWhiteSpace(row.Model)
|
|
|
|
|
|| row.YearFrom is null
|
|
|
|
|
|| row.YearTo is null
|
|
|
|
|
|| row.PowerPs is null
|
|
|
|
|
|| row.PowerKw is null
|
|
|
|
|
|| row.DisplacementCcm is null
|
|
|
|
|
|| string.IsNullOrWhiteSpace(row.FuelType)
|
|
|
|
|
|| string.IsNullOrWhiteSpace(row.MatchKey);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int FillDbFields(DbVehicleRow target, HsnTsnVehicle source)
|
|
|
|
|
{
|
|
|
|
|
var changes = 0;
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(target.Brand) && !string.IsNullOrWhiteSpace(source.Brand))
|
|
|
|
|
{
|
|
|
|
|
target.Brand = source.Brand;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(target.VehicleType) && !string.IsNullOrWhiteSpace(source.VehicleType))
|
|
|
|
|
{
|
|
|
|
|
target.VehicleType = source.VehicleType;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(target.Model))
|
|
|
|
|
{
|
|
|
|
|
var model = DeriveModel(source);
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(model))
|
|
|
|
|
{
|
|
|
|
|
target.Model = model;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(target.OfficialType) && !string.IsNullOrWhiteSpace(source.OfficialType))
|
|
|
|
|
{
|
|
|
|
|
target.OfficialType = source.OfficialType;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (target.YearFrom is null && source.YearFrom is not null)
|
|
|
|
|
{
|
|
|
|
|
target.YearFrom = source.YearFrom;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (target.YearTo is null && source.YearTo is not null)
|
|
|
|
|
{
|
|
|
|
|
target.YearTo = source.YearTo;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (target.PowerPs is null && source.PowerPs is not null)
|
|
|
|
|
{
|
|
|
|
|
target.PowerPs = source.PowerPs;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (target.PowerKw is null && source.PowerKw is not null)
|
|
|
|
|
{
|
|
|
|
|
target.PowerKw = source.PowerKw;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (target.DisplacementCcm is null && source.DisplacementCcm is not null)
|
|
|
|
|
{
|
|
|
|
|
target.DisplacementCcm = source.DisplacementCcm;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(target.FuelType) && !string.IsNullOrWhiteSpace(source.FuelType))
|
|
|
|
|
{
|
|
|
|
|
target.FuelType = source.FuelType;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(target.MatchKey) && !string.IsNullOrWhiteSpace(source.MatchKey))
|
|
|
|
|
{
|
|
|
|
|
target.MatchKey = source.MatchKey;
|
|
|
|
|
changes++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return changes;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async Task<Dictionary<string, DbVehicleRow>> ReadDbRowsFromCsvAsync(string inputCsvPath)
|
|
|
|
|
{
|
|
|
|
|
var map = new Dictionary<string, DbVehicleRow>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
|
|
|
|
|
await using var inputStream = File.OpenRead(inputCsvPath);
|
|
|
|
|
using var inputReader = new StreamReader(inputStream);
|
|
|
|
|
using var csvReader = new CsvReader(inputReader, new CsvConfiguration(CultureInfo.InvariantCulture)
|
|
|
|
|
{
|
|
|
|
|
Delimiter = ";",
|
|
|
|
|
MissingFieldFound = null,
|
|
|
|
|
HeaderValidated = null
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
await csvReader.ReadAsync();
|
|
|
|
|
csvReader.ReadHeader();
|
|
|
|
|
|
|
|
|
|
await foreach (var record in csvReader.GetRecordsAsync<HsnTsnVehicle>())
|
|
|
|
|
{
|
|
|
|
|
var key = record.HsnTsn?.Trim();
|
|
|
|
|
if (string.IsNullOrWhiteSpace(key))
|
|
|
|
|
{
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!map.TryGetValue(key, out var row))
|
|
|
|
|
{
|
|
|
|
|
row = new DbVehicleRow
|
|
|
|
|
{
|
|
|
|
|
HsnTsn = key,
|
|
|
|
|
Hsn = record.Hsn,
|
|
|
|
|
Tsn = record.Tsn,
|
|
|
|
|
Brand = record.Brand,
|
|
|
|
|
VehicleType = record.VehicleType,
|
|
|
|
|
Model = record.Model,
|
|
|
|
|
OfficialType = record.OfficialType,
|
|
|
|
|
YearFrom = record.YearFrom,
|
|
|
|
|
YearTo = record.YearTo,
|
|
|
|
|
PowerPs = record.PowerPs,
|
|
|
|
|
PowerKw = record.PowerKw,
|
|
|
|
|
DisplacementCcm = record.DisplacementCcm,
|
|
|
|
|
FuelType = record.FuelType,
|
|
|
|
|
MatchKey = record.MatchKey
|
|
|
|
|
};
|
|
|
|
|
map[key] = row;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Hsn) && !string.IsNullOrWhiteSpace(record.Hsn)) row.Hsn = record.Hsn;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Tsn) && !string.IsNullOrWhiteSpace(record.Tsn)) row.Tsn = record.Tsn;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Brand) && !string.IsNullOrWhiteSpace(record.Brand)) row.Brand = record.Brand;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.VehicleType) && !string.IsNullOrWhiteSpace(record.VehicleType)) row.VehicleType = record.VehicleType;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.Model) && !string.IsNullOrWhiteSpace(record.Model)) row.Model = record.Model;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.OfficialType) && !string.IsNullOrWhiteSpace(record.OfficialType)) row.OfficialType = record.OfficialType;
|
|
|
|
|
if (row.YearFrom is null && record.YearFrom is not null) row.YearFrom = record.YearFrom;
|
|
|
|
|
if (row.YearTo is null && record.YearTo is not null) row.YearTo = record.YearTo;
|
|
|
|
|
if (row.PowerPs is null && record.PowerPs is not null) row.PowerPs = record.PowerPs;
|
|
|
|
|
if (row.PowerKw is null && record.PowerKw is not null) row.PowerKw = record.PowerKw;
|
|
|
|
|
if (row.DisplacementCcm is null && record.DisplacementCcm is not null) row.DisplacementCcm = record.DisplacementCcm;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.FuelType) && !string.IsNullOrWhiteSpace(record.FuelType)) row.FuelType = record.FuelType;
|
|
|
|
|
if (string.IsNullOrWhiteSpace(row.MatchKey) && !string.IsNullOrWhiteSpace(record.MatchKey)) row.MatchKey = record.MatchKey;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return map;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static string QuoteQualifiedTableName(string raw)
|
|
|
|
|
{
|
|
|
|
|
if (string.IsNullOrWhiteSpace(raw))
|
|
|
|
|
{
|
|
|
|
|
return string.Empty;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var parts = raw.Split('.', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
|
|
|
|
if (parts.Length == 0 || parts.Length > 2)
|
|
|
|
|
{
|
|
|
|
|
return string.Empty;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach (var part in parts)
|
|
|
|
|
{
|
|
|
|
|
if (!Regex.IsMatch(part, @"^[A-Za-z_][A-Za-z0-9_]*$"))
|
|
|
|
|
{
|
|
|
|
|
return string.Empty;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return string.Join(".", parts.Select(p => $"\"{p}\""));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async Task EnsureVehicleTableAsync(NpgsqlConnection conn, string quotedTable)
|
|
|
|
|
{
|
|
|
|
|
var sql = $@"
|
|
|
|
|
CREATE TABLE IF NOT EXISTS {quotedTable} (
|
|
|
|
|
hsn_tsn text PRIMARY KEY,
|
|
|
|
|
hsn text NOT NULL,
|
|
|
|
|
tsn text NOT NULL,
|
|
|
|
|
brand text NULL,
|
|
|
|
|
vehicle_type text NULL,
|
|
|
|
|
model text NULL,
|
|
|
|
|
official_type text NULL,
|
|
|
|
|
year_to integer NULL,
|
|
|
|
|
year_from integer NULL,
|
|
|
|
|
power_ps integer NULL,
|
|
|
|
|
power_kw integer NULL,
|
|
|
|
|
displacement_ccm integer NULL,
|
|
|
|
|
fuel_type text NULL,
|
|
|
|
|
match_key text NULL
|
|
|
|
|
);";
|
|
|
|
|
await using var cmd = new NpgsqlCommand(sql, conn);
|
|
|
|
|
await cmd.ExecuteNonQueryAsync();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async Task BulkUpsertVehicleRowsAsync(NpgsqlConnection conn, string quotedTable, IEnumerable<DbVehicleRow> rows)
|
|
|
|
|
{
|
|
|
|
|
await using var tx = await conn.BeginTransactionAsync();
|
|
|
|
|
var sql = $@"
|
|
|
|
|
INSERT INTO {quotedTable} AS t (hsn_tsn, hsn, tsn, brand, vehicle_type, model, official_type, year_to, year_from, power_ps, power_kw, displacement_ccm, fuel_type, match_key)
|
|
|
|
|
VALUES (@hsn_tsn, @hsn, @tsn, @brand, @vehicle_type, @model, @official_type, @year_to, @year_from, @power_ps, @power_kw, @displacement_ccm, @fuel_type, @match_key)
|
|
|
|
|
ON CONFLICT (hsn_tsn) DO UPDATE
|
|
|
|
|
SET hsn = COALESCE(NULLIF(t.hsn, ''), EXCLUDED.hsn),
|
|
|
|
|
tsn = COALESCE(NULLIF(t.tsn, ''), EXCLUDED.tsn),
|
|
|
|
|
brand = COALESCE(NULLIF(t.brand, ''), EXCLUDED.brand),
|
|
|
|
|
vehicle_type = COALESCE(NULLIF(t.vehicle_type, ''), EXCLUDED.vehicle_type),
|
|
|
|
|
model = COALESCE(NULLIF(t.model, ''), EXCLUDED.model),
|
|
|
|
|
official_type = COALESCE(NULLIF(t.official_type, ''), EXCLUDED.official_type),
|
|
|
|
|
year_to = COALESCE(t.year_to, EXCLUDED.year_to),
|
|
|
|
|
year_from = COALESCE(t.year_from, EXCLUDED.year_from),
|
|
|
|
|
power_ps = COALESCE(t.power_ps, EXCLUDED.power_ps),
|
|
|
|
|
power_kw = COALESCE(t.power_kw, EXCLUDED.power_kw),
|
|
|
|
|
displacement_ccm = COALESCE(t.displacement_ccm, EXCLUDED.displacement_ccm),
|
|
|
|
|
fuel_type = COALESCE(NULLIF(t.fuel_type, ''), EXCLUDED.fuel_type),
|
|
|
|
|
match_key = COALESCE(NULLIF(t.match_key, ''), EXCLUDED.match_key);";
|
|
|
|
|
|
|
|
|
|
await using var cmd = new NpgsqlCommand(sql, conn, tx);
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("hsn_tsn", NpgsqlTypes.NpgsqlDbType.Text));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("hsn", NpgsqlTypes.NpgsqlDbType.Text));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("tsn", NpgsqlTypes.NpgsqlDbType.Text));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("brand", NpgsqlTypes.NpgsqlDbType.Text));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("vehicle_type", NpgsqlTypes.NpgsqlDbType.Text));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("model", NpgsqlTypes.NpgsqlDbType.Text));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("official_type", NpgsqlTypes.NpgsqlDbType.Text));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("year_to", NpgsqlTypes.NpgsqlDbType.Integer));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("year_from", NpgsqlTypes.NpgsqlDbType.Integer));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("power_ps", NpgsqlTypes.NpgsqlDbType.Integer));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("power_kw", NpgsqlTypes.NpgsqlDbType.Integer));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("displacement_ccm", NpgsqlTypes.NpgsqlDbType.Integer));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("fuel_type", NpgsqlTypes.NpgsqlDbType.Text));
|
|
|
|
|
cmd.Parameters.Add(new NpgsqlParameter("match_key", NpgsqlTypes.NpgsqlDbType.Text));
|
|
|
|
|
|
|
|
|
|
foreach (var row in rows)
|
|
|
|
|
{
|
|
|
|
|
cmd.Parameters["hsn_tsn"].Value = row.HsnTsn;
|
|
|
|
|
cmd.Parameters["hsn"].Value = row.Hsn ?? string.Empty;
|
|
|
|
|
cmd.Parameters["tsn"].Value = row.Tsn ?? string.Empty;
|
|
|
|
|
cmd.Parameters["brand"].Value = string.IsNullOrWhiteSpace(row.Brand) ? DBNull.Value : row.Brand;
|
|
|
|
|
cmd.Parameters["vehicle_type"].Value = string.IsNullOrWhiteSpace(row.VehicleType) ? DBNull.Value : row.VehicleType;
|
|
|
|
|
cmd.Parameters["model"].Value = string.IsNullOrWhiteSpace(row.Model) ? DBNull.Value : row.Model;
|
|
|
|
|
cmd.Parameters["official_type"].Value = string.IsNullOrWhiteSpace(row.OfficialType) ? DBNull.Value : row.OfficialType;
|
|
|
|
|
cmd.Parameters["year_to"].Value = row.YearTo.HasValue ? row.YearTo.Value : DBNull.Value;
|
|
|
|
|
cmd.Parameters["year_from"].Value = row.YearFrom.HasValue ? row.YearFrom.Value : DBNull.Value;
|
|
|
|
|
cmd.Parameters["power_ps"].Value = row.PowerPs.HasValue ? row.PowerPs.Value : DBNull.Value;
|
|
|
|
|
cmd.Parameters["power_kw"].Value = row.PowerKw.HasValue ? row.PowerKw.Value : DBNull.Value;
|
|
|
|
|
cmd.Parameters["displacement_ccm"].Value = row.DisplacementCcm.HasValue ? row.DisplacementCcm.Value : DBNull.Value;
|
|
|
|
|
cmd.Parameters["fuel_type"].Value = string.IsNullOrWhiteSpace(row.FuelType) ? DBNull.Value : row.FuelType;
|
|
|
|
|
cmd.Parameters["match_key"].Value = string.IsNullOrWhiteSpace(row.MatchKey) ? DBNull.Value : row.MatchKey;
|
|
|
|
|
await cmd.ExecuteNonQueryAsync();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await tx.CommitAsync();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async Task<List<DbVehicleRow>> LoadVehicleRowsNeedingEnrichmentAsync(NpgsqlConnection conn, string quotedTable)
|
|
|
|
|
{
|
|
|
|
|
var sql = $@"
|
|
|
|
|
SELECT hsn_tsn, hsn, tsn, brand, vehicle_type, model, official_type, year_to, year_from, power_ps, power_kw, displacement_ccm, fuel_type, match_key
|
|
|
|
|
FROM {quotedTable}
|
|
|
|
|
WHERE hsn IS NOT NULL AND hsn <> ''
|
|
|
|
|
AND (
|
|
|
|
|
brand IS NULL OR brand = '' OR
|
|
|
|
|
vehicle_type IS NULL OR vehicle_type = '' OR
|
|
|
|
|
model IS NULL OR model = '' OR
|
|
|
|
|
power_ps IS NULL OR
|
|
|
|
|
power_kw IS NULL OR
|
|
|
|
|
displacement_ccm IS NULL OR
|
|
|
|
|
fuel_type IS NULL OR fuel_type = '' OR
|
|
|
|
|
match_key IS NULL OR match_key = '' OR
|
|
|
|
|
year_to IS NULL OR
|
|
|
|
|
year_from IS NULL
|
|
|
|
|
)
|
|
|
|
|
ORDER BY hsn, tsn;";
|
|
|
|
|
|
|
|
|
|
var list = new List<DbVehicleRow>();
|
|
|
|
|
await using var cmd = new NpgsqlCommand(sql, conn);
|
|
|
|
|
await using var reader = await cmd.ExecuteReaderAsync();
|
|
|
|
|
while (await reader.ReadAsync())
|
|
|
|
|
{
|
|
|
|
|
list.Add(new DbVehicleRow
|
|
|
|
|
{
|
|
|
|
|
HsnTsn = reader.GetString(0),
|
|
|
|
|
Hsn = reader.IsDBNull(1) ? string.Empty : reader.GetString(1),
|
|
|
|
|
Tsn = reader.IsDBNull(2) ? string.Empty : reader.GetString(2),
|
|
|
|
|
Brand = reader.IsDBNull(3) ? string.Empty : reader.GetString(3),
|
|
|
|
|
VehicleType = reader.IsDBNull(4) ? string.Empty : reader.GetString(4),
|
|
|
|
|
Model = reader.IsDBNull(5) ? string.Empty : reader.GetString(5),
|
|
|
|
|
OfficialType = reader.IsDBNull(6) ? string.Empty : reader.GetString(6),
|
|
|
|
|
YearTo = reader.IsDBNull(7) ? null : reader.GetInt32(7),
|
|
|
|
|
YearFrom = reader.IsDBNull(8) ? null : reader.GetInt32(8),
|
|
|
|
|
PowerPs = reader.IsDBNull(9) ? null : reader.GetInt32(9),
|
|
|
|
|
PowerKw = reader.IsDBNull(10) ? null : reader.GetInt32(10),
|
|
|
|
|
DisplacementCcm = reader.IsDBNull(11) ? null : reader.GetInt32(11),
|
|
|
|
|
FuelType = reader.IsDBNull(12) ? string.Empty : reader.GetString(12),
|
|
|
|
|
MatchKey = reader.IsDBNull(13) ? string.Empty : reader.GetString(13)
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return list;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async Task RunRepairYearsMode(HsnTsnClient hsnTsnClient, string? inputPath, string? outputPath)
|
|
|
|
|
{
|
|
|
|
|
var inputCsvPath = string.IsNullOrWhiteSpace(inputPath) ? "hsntsn.csv" : inputPath;
|
|
|
|
@@ -113,13 +957,19 @@ async Task RunRepairYearsMode(HsnTsnClient hsnTsnClient, string? inputPath, stri
|
|
|
|
|
Console.Error.WriteLine($"[info] Repair finished. {processed}/{totalRecords}, updated={updated}, failed={failed}");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async Task RunScrapeMode(HsnTsnClient hsnTsnClient, bool includeDetailPages)
|
|
|
|
|
async Task RunScrapeMode(HsnTsnClient hsnTsnClient, bool includeDetailPages, string scrapeSource)
|
|
|
|
|
{
|
|
|
|
|
var written = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
var processed = 0;
|
|
|
|
|
var failed = 0;
|
|
|
|
|
|
|
|
|
|
Console.Error.WriteLine($"[info] Scrape started. includeDetails={includeDetailPages}");
|
|
|
|
|
if (scrapeSource is not ("hsntsn" or "autoampel"))
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine($"[error] Unknown --source value: {scrapeSource}. Use 'hsntsn' or 'autoampel'.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Console.Error.WriteLine($"[info] Scrape started. source={scrapeSource}, includeDetails={includeDetailPages}");
|
|
|
|
|
|
|
|
|
|
await using var csvWriter = new CsvWriter(Console.Out, new CsvConfiguration(CultureInfo.InvariantCulture)
|
|
|
|
|
{
|
|
|
|
@@ -130,7 +980,39 @@ async Task RunScrapeMode(HsnTsnClient hsnTsnClient, bool includeDetailPages)
|
|
|
|
|
await csvWriter.NextRecordAsync();
|
|
|
|
|
await csvWriter.FlushAsync();
|
|
|
|
|
|
|
|
|
|
if (Console.IsInputRedirected)
|
|
|
|
|
if (scrapeSource == "autoampel")
|
|
|
|
|
{
|
|
|
|
|
if (Console.IsInputRedirected)
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine("[error] --source autoampel does not support stdin query mode. Run without stdin redirection.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var pageUrl = hsnTsnClient.GetAutoampelFullListUrl();
|
|
|
|
|
while (!string.IsNullOrWhiteSpace(pageUrl))
|
|
|
|
|
{
|
|
|
|
|
Console.Error.WriteLine($"[info] Processing: {pageUrl}");
|
|
|
|
|
AutoampelPageResult pageResult;
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
pageResult = await hsnTsnClient.GetVehiclesFromAutoampelListPageAsync(pageUrl);
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
failed++;
|
|
|
|
|
Console.Error.WriteLine($"[warn] Autoampel page failed: {pageUrl} -> {ex.Message}");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach (var vehicle in pageResult.Vehicles)
|
|
|
|
|
{
|
|
|
|
|
await WriteVehicleIfNew(vehicle);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pageUrl = pageResult.NextPageUrl;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (Console.IsInputRedirected)
|
|
|
|
|
{
|
|
|
|
|
await foreach (var query in ReadInput())
|
|
|
|
|
{
|
|
|
|
@@ -337,3 +1219,32 @@ string BuildMatchKey(HsnTsnVehicle vehicle)
|
|
|
|
|
normalized = Regex.Replace(normalized, @"[^A-Z0-9]+", " ").Trim();
|
|
|
|
|
return Regex.Replace(normalized, @"\s+", " ");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public sealed class CoreOutputRow
|
|
|
|
|
{
|
|
|
|
|
public string HsnTsn { get; set; } = string.Empty;
|
|
|
|
|
public string Hsn { get; set; } = string.Empty;
|
|
|
|
|
public string Tsn { get; set; } = string.Empty;
|
|
|
|
|
public string Brand { get; set; } = string.Empty;
|
|
|
|
|
public string Model { get; set; } = string.Empty;
|
|
|
|
|
public int? YearTo { get; set; }
|
|
|
|
|
public int? YearFrom { get; set; }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public sealed class DbVehicleRow
|
|
|
|
|
{
|
|
|
|
|
public string HsnTsn { get; set; } = string.Empty;
|
|
|
|
|
public string Hsn { get; set; } = string.Empty;
|
|
|
|
|
public string Tsn { get; set; } = string.Empty;
|
|
|
|
|
public string Brand { get; set; } = string.Empty;
|
|
|
|
|
public string VehicleType { get; set; } = string.Empty;
|
|
|
|
|
public string Model { get; set; } = string.Empty;
|
|
|
|
|
public string OfficialType { get; set; } = string.Empty;
|
|
|
|
|
public int? YearFrom { get; set; }
|
|
|
|
|
public int? YearTo { get; set; }
|
|
|
|
|
public int? PowerPs { get; set; }
|
|
|
|
|
public int? PowerKw { get; set; }
|
|
|
|
|
public int? DisplacementCcm { get; set; }
|
|
|
|
|
public string FuelType { get; set; } = string.Empty;
|
|
|
|
|
public string MatchKey { get; set; } = string.Empty;
|
|
|
|
|
}
|
|
|
|
|