Enhance year parsing logic in HsnTsnClient; implement ParseBestYearRange method for improved accuracy
This commit is contained in:
@@ -127,8 +127,14 @@ public sealed class HsnTsnClient : IDisposable
|
|||||||
CanonicalUrl = doc.DocumentNode.SelectSingleNode("//meta[@property='url']")?.GetAttributeValue("content", string.Empty).Trim() ?? string.Empty
|
CanonicalUrl = doc.DocumentNode.SelectSingleNode("//meta[@property='url']")?.GetAttributeValue("content", string.Empty).Trim() ?? string.Empty
|
||||||
};
|
};
|
||||||
|
|
||||||
var yearText = HtmlEntity.DeEntitize(doc.DocumentNode.SelectSingleNode("//small[@property='vehicleModelDate']")?.InnerText ?? string.Empty).Trim();
|
var yearTexts = new[]
|
||||||
ParseYearRange(yearText, out var fromYear, out var toYear);
|
{
|
||||||
|
HtmlEntity.DeEntitize(doc.DocumentNode.SelectSingleNode("//small[@property='vehicleModelDate']")?.InnerText ?? string.Empty).Trim(),
|
||||||
|
HtmlEntity.DeEntitize(doc.DocumentNode.SelectSingleNode("//title")?.InnerText ?? string.Empty).Trim(),
|
||||||
|
HtmlEntity.DeEntitize(doc.DocumentNode.SelectSingleNode("//h1")?.InnerText ?? string.Empty).Trim()
|
||||||
|
};
|
||||||
|
|
||||||
|
ParseBestYearRange(yearTexts, out var fromYear, out var toYear);
|
||||||
detail.YearFrom = fromYear;
|
detail.YearFrom = fromYear;
|
||||||
detail.YearTo = toYear;
|
detail.YearTo = toYear;
|
||||||
|
|
||||||
@@ -292,6 +298,40 @@ public sealed class HsnTsnClient : IDisposable
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void ParseBestYearRange(IEnumerable<string> candidates, out int? fromYear, out int? toYear)
|
||||||
|
{
|
||||||
|
fromYear = null;
|
||||||
|
toYear = null;
|
||||||
|
|
||||||
|
foreach (var text in candidates)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(text))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
ParseYearRange(text, out var currentFrom, out var currentTo);
|
||||||
|
if (currentFrom is null)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fromYear is null)
|
||||||
|
{
|
||||||
|
fromYear = currentFrom;
|
||||||
|
toYear = currentTo;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prefer a range that includes both endpoints over a single year.
|
||||||
|
if (toYear is null && currentTo is not null)
|
||||||
|
{
|
||||||
|
fromYear = currentFrom;
|
||||||
|
toYear = currentTo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static string ExtractBrand(string vehicleType)
|
private static string ExtractBrand(string vehicleType)
|
||||||
{
|
{
|
||||||
if (string.IsNullOrWhiteSpace(vehicleType))
|
if (string.IsNullOrWhiteSpace(vehicleType))
|
||||||
|
|||||||
Reference in New Issue
Block a user