using AngleSharp.Dom;
using AngleSharp.Html.Parser;
using Sonex.Data.Records;
using System.Net;
using System.Text;
using System.Text.Json;
using WorkerCore = Sonex.Library.WorkersCore.Worker;
namespace Sonex.Worker.WebSync;
internal sealed class WebSyncProductTaskRunner
{
private static readonly HttpClient ProductHttpClient = new();
private const string OperationProductTaskExecution = "ProductTaskExecution";
private const string OperationPageDownload = "PageDownload";
private const string OperationProductUpsert = "ProductUpsert";
private const string DictionaryTableProducts = "products";
private const string ArticleNumberKey = "Artikelnummer";
private const string OnlineOnlyKey = "Online Only";
private const string MaterialKey = "Materiaal";
private const string ColorKey = "Kleur";
private static readonly HashSet SupportedImageExtensions = new(StringComparer.OrdinalIgnoreCase)
{
".jpg",
".jpeg",
".png",
".webp"
};
private readonly WebSyncImageSynchronizer _imageSynchronizer = new();
public async Task ExecuteWithRetryAsync(
string productUrl,
string targetImagesPath,
bool updateImages,
WebSyncCollectionSnapshot collectionSnapshot,
int retryCount,
int downloadTimeoutSeconds,
int retryDelaySeconds,
WebSyncRunReport runReport,
Func waitIfPaused,
CancellationToken cancellationToken)
{
int attempts = Math.Max(1, retryCount + 1);
for (int attempt = 1; attempt <= attempts; attempt++)
{
cancellationToken.ThrowIfCancellationRequested();
await waitIfPaused(cancellationToken).ConfigureAwait(false);
try
{
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
timeoutCts.CancelAfter(TimeSpan.FromSeconds(downloadTimeoutSeconds));
await ExecuteSingleTaskAsync(
productUrl,
targetImagesPath,
updateImages,
collectionSnapshot,
runReport,
waitIfPaused,
timeoutCts.Token).ConfigureAwait(false);
return;
}
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested && attempt < attempts)
{
await waitIfPaused(cancellationToken).ConfigureAwait(false);
await Task.Delay(TimeSpan.FromSeconds(retryDelaySeconds), cancellationToken).ConfigureAwait(false);
}
catch (Exception) when (attempt < attempts)
{
await waitIfPaused(cancellationToken).ConfigureAwait(false);
await Task.Delay(TimeSpan.FromSeconds(retryDelaySeconds), cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
string operation = ex is WebSyncOperationException operationException
? operationException.Operation
: OperationProductTaskExecution;
string message = $"Task failed for {productUrl} after {attempts} attempts. Details={ex.Message}";
PrintTaskConsoleError(productUrl, attempt, attempts, ex);
runReport.RegisterException(operation, ex, message);
WorkerCore.LogError(message, ex, operation);
return;
}
}
}
private async Task ExecuteSingleTaskAsync(
string productUrl,
string targetImagesPath,
bool updateImages,
WebSyncCollectionSnapshot collectionSnapshot,
WebSyncRunReport runReport,
Func waitIfPaused,
CancellationToken cancellationToken)
{
await waitIfPaused(cancellationToken).ConfigureAwait(false);
DownloadPageResult downloadedPage = await DownloadPageHtmlAsync(
productUrl,
runReport,
cancellationToken).ConfigureAwait(false);
await waitIfPaused(cancellationToken).ConfigureAwait(false);
ProductPageData? product = await ExtractProductResultAsync(
downloadedPage.FinalUrl,
downloadedPage.Html,
cancellationToken).ConfigureAwait(false);
if (product is null)
return;
await waitIfPaused(cancellationToken).ConfigureAwait(false);
await UpsertProductAsync(product, collectionSnapshot, runReport, cancellationToken).ConfigureAwait(false);
if (updateImages)
{
await waitIfPaused(cancellationToken).ConfigureAwait(false);
await _imageSynchronizer.UpdateAsync(
product.ArticleNumber,
product.ImageUrls,
targetImagesPath,
runReport,
waitIfPaused,
cancellationToken).ConfigureAwait(false);
}
}
private static async Task UpsertProductAsync(
ProductPageData product,
WebSyncCollectionSnapshot collectionSnapshot,
WebSyncRunReport runReport,
CancellationToken cancellationToken)
{
bool isTrending = collectionSnapshot.IsTrending(product.ArticleNumber);
bool isNieuw = collectionSnapshot.IsNieuw(product.ArticleNumber);
bool isActies = collectionSnapshot.IsActies(product.ArticleNumber);
var productRecord = new ProductWebInfoRecord
{
Artnr = product.ArticleNumber,
Url = product.Url,
Title = product.Title,
Keywords = product.Keywords,
Notification = product.Notification,
PromoText = product.PromoText,
StockMessage = product.StockMessage,
Category = product.Category,
RootCategory = product.RootCategory,
LastCategory = product.LastCategory,
Description = product.Description,
Material = product.Material,
Color = product.Color,
Series = product.Series,
MinimumOrderQuantity = product.MinimumOrderQuantity,
OnlineOnly = product.OnlineOnly,
CanOrder = product.CanOrder,
IsTrending = isTrending,
IsNieuw = isNieuw,
IsActies = isActies,
};
var upsertResult = await WorkerCore.ExecuteDatabaseSingleWithRetryAsync(
ct => productRecord.Upsert(ct),
OperationProductUpsert,
cancellationToken).ConfigureAwait(false);
if (!upsertResult.Success || upsertResult.Item != true)
{
if (cancellationToken.IsCancellationRequested ||
IsCancellationError(upsertResult.ErrorType, upsertResult.ErrorMessage))
{
throw new OperationCanceledException(
upsertResult.ErrorMessage ?? "The operation was canceled.",
cancellationToken);
}
string message = upsertResult.ErrorMessage ?? $"Unknown database error for article {product.ArticleNumber}.";
string errorType = string.IsNullOrWhiteSpace(upsertResult.ErrorType)
? "Unknown"
: upsertResult.ErrorType;
throw new WebSyncOperationException(
OperationProductUpsert,
$"Product upsert failed for article {product.ArticleNumber}. DbErrorType={errorType}. Message={message}",
new InvalidOperationException(message));
}
runReport.IncrementPageUpdated();
RegisterDictionaryValues(product, runReport);
}
private static void RegisterDictionaryValues(
ProductPageData product,
WebSyncRunReport runReport)
{
runReport.RegisterDictionaryValue(DictionaryTableProducts, "title", product.Title);
runReport.RegisterDictionaryValue(DictionaryTableProducts, "keywords", product.Keywords);
runReport.RegisterDictionaryValue(DictionaryTableProducts, "notification", product.Notification);
runReport.RegisterDictionaryValue(DictionaryTableProducts, "promo_text", product.PromoText);
runReport.RegisterDictionaryValue(DictionaryTableProducts, "stock_message", product.StockMessage);
runReport.RegisterDictionaryValue(DictionaryTableProducts, "category", product.Category);
runReport.RegisterDictionaryValue(DictionaryTableProducts, "root_category", product.RootCategory);
runReport.RegisterDictionaryValue(DictionaryTableProducts, "last_category", product.LastCategory);
runReport.RegisterDictionaryValue(DictionaryTableProducts, "description", product.Description);
runReport.RegisterDictionaryValue(DictionaryTableProducts, "material", product.Material);
runReport.RegisterDictionaryValue(DictionaryTableProducts, "color", product.Color);
runReport.RegisterDictionaryValue(DictionaryTableProducts, "series", product.Series);
}
private static async Task DownloadPageHtmlAsync(
string productUrl,
WebSyncRunReport runReport,
CancellationToken cancellationToken)
{
try
{
using var response = await ProductHttpClient.GetAsync(
productUrl,
HttpCompletionOption.ResponseHeadersRead,
cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
string html = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
string finalUrl = response.RequestMessage?.RequestUri?.AbsoluteUri ?? productUrl;
runReport.IncrementPageDownloaded();
return new DownloadPageResult
{
Html = html,
FinalUrl = finalUrl
};
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
throw new WebSyncOperationException(
OperationPageDownload,
$"Page download failed for {productUrl}.",
ex);
}
}
private static async Task ExtractProductResultAsync(
string finalUrl,
string html,
CancellationToken cancellationToken)
{
var parser = new HtmlParser();
IDocument document = await parser.ParseDocumentAsync(html, cancellationToken).ConfigureAwait(false);
ProductSpecifications specifications = ExtractSpecifications(document);
CatalogProductData? catalogData = ExtractCatalogProductData(
document,
finalUrl,
specifications.ArticleNumber);
string articleNumber = FirstNotEmpty(specifications.ArticleNumber, catalogData?.ArticleNumber);
if (string.IsNullOrWhiteSpace(articleNumber))
return null;
string title = FirstNotEmpty(
NormalizeText(document.QuerySelector("div.product__header h1")?.TextContent),
catalogData?.Title);
if (string.IsNullOrWhiteSpace(title))
return null;
IReadOnlyList categoryNames = catalogData?.CategoryNames ?? [];
CategoryData categoryData = ExtractCategoryData(document, categoryNames);
string stockMessage = NormalizeText(document.QuerySelector("div.stock__message span")?.TextContent);
bool canOrder = ResolveCanOrder(document, stockMessage, catalogData);
bool onlineOnly = specifications.HasOnlineOnly
? specifications.OnlineOnly
: (catalogData?.OnlineOnly ?? false);
int? minimumOrderQuantity = ResolveMinimumOrderQuantity(document, catalogData);
return new ProductPageData
{
Url = FirstNotEmpty(finalUrl, catalogData?.Url),
ArticleNumber = articleNumber,
Title = title,
Keywords = FirstNotEmpty(ExtractKeywords(document), catalogData?.Keywords),
StockMessage = stockMessage,
Notification = NormalizeText(document.QuerySelector("div.product__notifications p")?.TextContent),
PromoText = ExtractPromoText(document),
Category = categoryData.FullCategory,
RootCategory = categoryData.RootCategory,
LastCategory = categoryData.LastCategory,
Description = ResolveDescription(document, catalogData?.DescriptionHtml),
Material = specifications.Material,
Color = specifications.Color,
Series = specifications.Series,
MinimumOrderQuantity = minimumOrderQuantity,
OnlineOnly = onlineOnly,
CanOrder = canOrder,
ImageUrls = ExtractImageUrls(finalUrl, document, catalogData?.ImageGalleryPlaceholders)
};
}
private static CatalogProductData? ExtractCatalogProductData(
IDocument document,
string finalUrl,
string articleNumberFromSpecifications)
{
var candidates = new List();
foreach (IElement element in document.QuerySelectorAll("catalog-product-configuration"))
{
CatalogProductData? data = ParseCatalogProductJson(element.GetAttribute(":product-data"));
if (data is not null)
candidates.Add(data);
}
foreach (IElement element in document.QuerySelectorAll("catalog-product-family-products"))
{
CatalogProductData? data = ParseCatalogProductJson(element.GetAttribute(":parent-product"));
if (data is not null)
candidates.Add(data);
}
if (candidates.Count == 0)
return null;
string normalizedArticleNumber = NormalizeArticleNumber(articleNumberFromSpecifications);
string normalizedFinalUrlPath = NormalizeComparableUrlPath(finalUrl);
CatalogProductData? exactMatch = candidates.FirstOrDefault(candidate =>
IsArticleNumberMatch(candidate.ArticleNumber, normalizedArticleNumber) &&
IsUrlMatch(candidate.Url, normalizedFinalUrlPath));
if (exactMatch is not null)
return exactMatch;
CatalogProductData? articleMatch = candidates.FirstOrDefault(candidate =>
IsArticleNumberMatch(candidate.ArticleNumber, normalizedArticleNumber));
if (articleMatch is not null)
return articleMatch;
CatalogProductData? urlMatch = candidates.FirstOrDefault(candidate =>
IsUrlMatch(candidate.Url, normalizedFinalUrlPath));
if (urlMatch is not null)
return urlMatch;
return candidates[0];
}
private static CatalogProductData? ParseCatalogProductJson(string? rawJson)
{
if (string.IsNullOrWhiteSpace(rawJson))
return null;
string decodedJson = WebUtility.HtmlDecode(rawJson);
if (string.IsNullOrWhiteSpace(decodedJson))
return null;
try
{
using JsonDocument parsed = JsonDocument.Parse(decodedJson);
JsonElement root = parsed.RootElement;
return new CatalogProductData
{
ArticleNumber = NormalizeText(GetJsonString(root, "sku")),
Title = NormalizeText(GetJsonString(root, "name")),
Url = NormalizeText(GetJsonString(root, "url")),
Keywords = NormalizeText(GetJsonString(root, "meta_keywords")),
DescriptionHtml = GetJsonString(root, "description"),
IsSalable = GetJsonBool(root, "is_salable"),
OnlineOnly = GetJsonBool(root, "online_only"),
InStock = GetJsonBool(root, "in_stock"),
StatusInStock = GetJsonBool(root, "status_in_stock"),
FrontendStock = GetJsonInt(root, "frontend_stock"),
NotSaleable = GetJsonBool(root, "not_saleable"),
XenosMinBestelhoeveelheid = GetJsonInt(root, "xenos_min_bestelhoeveelheid"),
MinimumQtyAllowedIncrement = GetMinimumQtyAllowedIncrement(root),
CategoryNames = GetJsonStringArray(root, "category_names"),
ImageGalleryPlaceholders = GetJsonStringArray(root, "image_gallery_placeholders")
};
}
catch
{
return null;
}
}
private static string GetJsonString(JsonElement root, string propertyName)
{
if (!root.TryGetProperty(propertyName, out JsonElement element))
return string.Empty;
return element.ValueKind switch
{
JsonValueKind.String => element.GetString() ?? string.Empty,
JsonValueKind.Number => element.GetRawText(),
JsonValueKind.True => "true",
JsonValueKind.False => "false",
_ => string.Empty
};
}
private static bool? GetJsonBool(JsonElement root, string propertyName)
{
if (!root.TryGetProperty(propertyName, out JsonElement element))
return null;
if (element.ValueKind == JsonValueKind.True)
return true;
if (element.ValueKind == JsonValueKind.False)
return false;
if (element.ValueKind == JsonValueKind.Number && element.TryGetInt32(out int intValue))
return intValue != 0;
if (element.ValueKind == JsonValueKind.String)
{
string value = NormalizeText(element.GetString());
if (string.IsNullOrWhiteSpace(value))
return null;
if (bool.TryParse(value, out bool boolValue))
return boolValue;
if (int.TryParse(value, out int parsedInt))
return parsedInt != 0;
if (string.Equals(value, "ja", StringComparison.OrdinalIgnoreCase))
return true;
if (string.Equals(value, "nee", StringComparison.OrdinalIgnoreCase))
return false;
}
return null;
}
private static int? GetJsonInt(JsonElement root, string propertyName)
{
if (!root.TryGetProperty(propertyName, out JsonElement element))
return null;
if (element.ValueKind == JsonValueKind.Number)
{
if (element.TryGetInt32(out int numberValue))
return numberValue;
return null;
}
if (element.ValueKind == JsonValueKind.String &&
TryParsePositiveInteger(element.GetString(), out int parsedValue))
{
return parsedValue;
}
return null;
}
private static int? GetMinimumQtyAllowedIncrement(JsonElement root)
{
if (!root.TryGetProperty("minimum_qty_allowed", out JsonElement minimumQtyAllowed))
return null;
if (minimumQtyAllowed.ValueKind != JsonValueKind.Object)
return null;
if (!minimumQtyAllowed.TryGetProperty("increment", out JsonElement increment))
return null;
if (increment.ValueKind == JsonValueKind.Number && increment.TryGetInt32(out int numberValue))
return numberValue;
if (increment.ValueKind == JsonValueKind.String &&
TryParsePositiveInteger(increment.GetString(), out int parsedValue))
{
return parsedValue;
}
return null;
}
private static List GetJsonStringArray(JsonElement root, string propertyName)
{
var values = new List();
if (!root.TryGetProperty(propertyName, out JsonElement element) ||
element.ValueKind != JsonValueKind.Array)
{
return values;
}
foreach (JsonElement item in element.EnumerateArray())
{
if (item.ValueKind != JsonValueKind.String)
continue;
string value = NormalizeText(item.GetString());
if (string.IsNullOrWhiteSpace(value))
continue;
values.Add(value);
}
return values;
}
private static string FirstNotEmpty(string? first, string? second)
{
if (!string.IsNullOrWhiteSpace(first))
return first.Trim();
return string.IsNullOrWhiteSpace(second)
? string.Empty
: second.Trim();
}
private static string NormalizeArticleNumber(string? articleNumber)
{
return string.IsNullOrWhiteSpace(articleNumber)
? string.Empty
: articleNumber.Trim();
}
private static bool IsArticleNumberMatch(string candidateArticleNumber, string normalizedExpectedArticleNumber)
{
if (string.IsNullOrWhiteSpace(normalizedExpectedArticleNumber))
return false;
string normalizedCandidate = NormalizeArticleNumber(candidateArticleNumber);
if (string.IsNullOrWhiteSpace(normalizedCandidate))
return false;
return string.Equals(
normalizedCandidate,
normalizedExpectedArticleNumber,
StringComparison.OrdinalIgnoreCase);
}
private static bool IsUrlMatch(string candidateUrl, string normalizedFinalUrlPath)
{
if (string.IsNullOrWhiteSpace(normalizedFinalUrlPath))
return false;
string normalizedCandidatePath = NormalizeComparableUrlPath(candidateUrl);
if (string.IsNullOrWhiteSpace(normalizedCandidatePath))
return false;
return string.Equals(
normalizedCandidatePath,
normalizedFinalUrlPath,
StringComparison.OrdinalIgnoreCase);
}
private static string NormalizeComparableUrlPath(string? url)
{
if (string.IsNullOrWhiteSpace(url))
return string.Empty;
string decoded = WebUtility.HtmlDecode(url).Replace("\\/", "/", StringComparison.Ordinal).Trim();
if (Uri.TryCreate(decoded, UriKind.Absolute, out Uri? absoluteUri))
return NormalizeUrlPath(absoluteUri.AbsolutePath);
if (Uri.TryCreate(decoded, UriKind.Relative, out Uri? relativeUri))
return NormalizeUrlPath(relativeUri.OriginalString);
return NormalizeUrlPath(decoded);
}
private static string NormalizeUrlPath(string value)
{
if (string.IsNullOrWhiteSpace(value))
return string.Empty;
string normalized = value;
int queryIndex = normalized.IndexOf('?');
if (queryIndex >= 0)
normalized = normalized[..queryIndex];
int fragmentIndex = normalized.IndexOf('#');
if (fragmentIndex >= 0)
normalized = normalized[..fragmentIndex];
normalized = normalized.Trim();
if (!normalized.StartsWith("/", StringComparison.Ordinal))
normalized = "/" + normalized;
normalized = normalized.TrimEnd('/');
if (normalized.Length == 0)
normalized = "/";
return normalized.ToLowerInvariant();
}
private static string ResolveDescription(IDocument document, string? descriptionHtml)
{
string normalizedFromDocument = ExtractDescriptionFromDocument(document);
if (!string.IsNullOrWhiteSpace(normalizedFromDocument))
return normalizedFromDocument;
return ExtractDescriptionFromHtml(descriptionHtml);
}
private static string ExtractDescriptionFromHtml(string? descriptionHtml)
{
if (string.IsNullOrWhiteSpace(descriptionHtml))
return string.Empty;
string decoded = WebUtility.HtmlDecode(descriptionHtml);
string html = $"{decoded}
";
var parser = new HtmlParser();
IDocument fragmentDocument = parser.ParseDocument(html);
IElement? root = fragmentDocument.QuerySelector("div");
if (root is null)
return string.Empty;
return NormalizeText(root.TextContent);
}
private static string ExtractDescriptionFromDocument(IDocument document)
{
IElement? contentNode = document.QuerySelector("div.product__description div._content")
?? document.QuerySelector("div.product__description");
if (contentNode is null)
return string.Empty;
var paragraphs = contentNode.QuerySelectorAll("p");
var lines = new List();
foreach (IElement paragraph in paragraphs)
{
string text = NormalizeText(paragraph.TextContent);
if (!string.IsNullOrWhiteSpace(text))
lines.Add(text);
}
if (lines.Count > 0)
return string.Join(Environment.NewLine, lines);
return NormalizeText(contentNode.TextContent);
}
private static bool ResolveCanOrder(
IDocument document,
string stockMessage,
CatalogProductData? catalogData)
{
if (IsUnavailableStockMessage(stockMessage))
return false;
if (catalogData is not null)
{
if (catalogData.IsSalable.HasValue && !catalogData.IsSalable.Value)
return false;
if (catalogData.NotSaleable.HasValue && catalogData.NotSaleable.Value)
return false;
if (catalogData.StatusInStock.HasValue && !catalogData.StatusInStock.Value)
return false;
if (catalogData.InStock.HasValue && !catalogData.InStock.Value)
return false;
if (catalogData.FrontendStock.HasValue && catalogData.FrontendStock.Value <= 0)
return false;
return true;
}
if (HasAddToCartButton(document))
return true;
return true;
}
private static bool HasAddToCartButton(IDocument document)
{
return document.QuerySelector("div.add-to-cart__holder button") is not null;
}
private static bool IsUnavailableStockMessage(string stockMessage)
{
if (string.IsNullOrWhiteSpace(stockMessage))
return false;
string normalized = stockMessage.ToLowerInvariant();
return normalized.Contains("uitverkocht", StringComparison.Ordinal) ||
normalized.Contains("niet verkoopbaar", StringComparison.Ordinal) ||
normalized.Contains("niet online", StringComparison.Ordinal) ||
normalized.Contains("alleen verkrijgbaar in onze winkels", StringComparison.Ordinal) ||
normalized.Contains("verschijnt binnenkort online", StringComparison.Ordinal);
}
private static string ExtractKeywords(IDocument document)
{
foreach (IElement meta in document.QuerySelectorAll("meta[name][content]"))
{
string name = meta.GetAttribute("name") ?? string.Empty;
if (!name.Equals("keywords", StringComparison.OrdinalIgnoreCase))
continue;
string content = meta.GetAttribute("content") ?? string.Empty;
return NormalizeText(content);
}
return string.Empty;
}
private static CategoryData ExtractCategoryData(
IDocument document,
IReadOnlyList categoryNamesFromProductData)
{
var names = new List();
var seen = new HashSet(StringComparer.OrdinalIgnoreCase);
foreach (IElement element in document.QuerySelectorAll("div.breadcrumbs [property='name']"))
{
string name = NormalizeText(element.TextContent);
if (name.StartsWith("Terug naar", StringComparison.OrdinalIgnoreCase))
{
name = NormalizeText(name["Terug naar".Length..]);
}
if (string.IsNullOrWhiteSpace(name))
continue;
if (string.Equals(name, "Home", StringComparison.OrdinalIgnoreCase))
continue;
if (seen.Add(name))
names.Add(name);
}
if (names.Count == 0)
{
foreach (string categoryName in categoryNamesFromProductData)
{
if (string.IsNullOrWhiteSpace(categoryName))
continue;
string normalized = NormalizeText(categoryName);
if (seen.Add(normalized))
names.Add(normalized);
}
}
if (names.Count == 0)
return new CategoryData();
return new CategoryData
{
FullCategory = string.Join(" > ", names),
RootCategory = names[0],
LastCategory = names[^1]
};
}
private static string ExtractPromoText(IDocument document)
{
IElement? promoNode = document.QuerySelector("div.product__promo");
if (promoNode is null)
return string.Empty;
return NormalizeText(promoNode.TextContent);
}
private static ProductSpecifications ExtractSpecifications(IDocument document)
{
var result = new ProductSpecifications();
foreach (IElement item in document.QuerySelectorAll("div.product__specifications ul li"))
{
IElement[] spans = item.QuerySelectorAll("span").Take(2).ToArray();
if (spans.Length < 2)
continue;
string key = NormalizeText(spans[0].TextContent);
if (string.IsNullOrWhiteSpace(key))
continue;
string value = NormalizeText(spans[1].TextContent);
switch (key)
{
case ArticleNumberKey:
result.ArticleNumber = value;
break;
case OnlineOnlyKey:
result.HasOnlineOnly = true;
result.OnlineOnly = ParseOnlineOnly(value);
break;
case MaterialKey:
result.Material = value;
break;
case ColorKey:
result.Color = value;
break;
default:
if (key.Contains("serie", StringComparison.OrdinalIgnoreCase))
{
result.Series = value;
}
break;
}
}
return result;
}
private static int? ResolveMinimumOrderQuantity(IDocument document, CatalogProductData? catalogData)
{
int? fromSelect = ExtractMinimumOrderQuantityFromSelect(document);
if (fromSelect.HasValue && fromSelect.Value > 0)
return fromSelect;
int? fromXenosField = catalogData?.XenosMinBestelhoeveelheid;
if (fromXenosField.HasValue && fromXenosField.Value > 0)
return fromXenosField;
int? fromIncrement = catalogData?.MinimumQtyAllowedIncrement;
if (fromIncrement.HasValue && fromIncrement.Value > 0)
return fromIncrement;
return null;
}
private static int? ExtractMinimumOrderQuantityFromSelect(IDocument document)
{
int? minValue = null;
IEnumerable options = document.QuerySelectorAll("div.product__qty select option, select[name='qty'] option");
foreach (IElement option in options)
{
if (!TryParsePositiveInteger(option.GetAttribute("value"), out int value) &&
!TryParsePositiveInteger(option.TextContent, out value))
{
continue;
}
if (!minValue.HasValue || value < minValue.Value)
minValue = value;
}
return minValue;
}
private static bool TryParsePositiveInteger(string? rawValue, out int value)
{
value = 0;
if (string.IsNullOrWhiteSpace(rawValue))
return false;
string normalized = NormalizeText(rawValue);
if (normalized.Length == 0)
return false;
var digits = new StringBuilder(normalized.Length);
foreach (char ch in normalized)
{
if (char.IsDigit(ch))
{
digits.Append(ch);
continue;
}
if (digits.Length > 0)
break;
}
if (digits.Length == 0)
return false;
if (!int.TryParse(digits.ToString(), out value))
return false;
return value > 0;
}
private static bool ParseOnlineOnly(string value)
{
string normalized = NormalizeText(value);
if (string.Equals(normalized, "Ja", StringComparison.OrdinalIgnoreCase))
return true;
if (string.Equals(normalized, "Nee", StringComparison.OrdinalIgnoreCase))
return false;
if (bool.TryParse(normalized, out bool boolValue))
return boolValue;
return string.Equals(normalized, "1", StringComparison.Ordinal);
}
private static List ExtractImageUrls(
string pageUrl,
IDocument document,
IReadOnlyList? placeholdersFromProductData)
{
var urls = new List();
var seen = new HashSet(StringComparer.OrdinalIgnoreCase);
foreach (IElement element in document.QuerySelectorAll("div.product__media-gallery [data-zoom], div[class*='--gallery'] [data-zoom]"))
{
AddImageUrl(pageUrl, element.GetAttribute("data-zoom"), urls, seen);
}
if (urls.Count == 0 && placeholdersFromProductData is not null && placeholdersFromProductData.Count > 0)
{
foreach (string placeholder in placeholdersFromProductData)
{
AddImageUrl(pageUrl, placeholder, urls, seen);
}
}
return urls;
}
private static void AddImageUrl(
string pageUrl,
string? rawUrl,
List urls,
HashSet seen)
{
if (!TryNormalizeImageUrl(pageUrl, rawUrl, out string normalized))
return;
if (seen.Add(normalized))
urls.Add(normalized);
}
private static bool TryNormalizeImageUrl(
string pageUrl,
string? rawUrl,
out string imageUrl)
{
imageUrl = string.Empty;
if (string.IsNullOrWhiteSpace(rawUrl))
return false;
if (!Uri.TryCreate(pageUrl, UriKind.Absolute, out Uri? pageUri))
return false;
string normalizedRaw = WebUtility.HtmlDecode(rawUrl.Trim()).Replace("\\/", "/", StringComparison.Ordinal);
if (!Uri.TryCreate(pageUri, normalizedRaw, out Uri? imageUri))
return false;
if (!string.Equals(imageUri.Scheme, Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) &&
!string.Equals(imageUri.Scheme, Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))
{
return false;
}
if (!imageUri.AbsolutePath.Contains("/pub/cdn/", StringComparison.OrdinalIgnoreCase))
return false;
string extension = Path.GetExtension(imageUri.AbsolutePath);
if (!SupportedImageExtensions.Contains(extension))
return false;
imageUrl = imageUri.AbsoluteUri;
return true;
}
private static string NormalizeText(string? value)
{
if (string.IsNullOrWhiteSpace(value))
return string.Empty;
string decoded = WebUtility.HtmlDecode(value).Replace('\u00A0', ' ');
return CollapseWhitespace(decoded).Trim();
}
private static string CollapseWhitespace(string value)
{
if (string.IsNullOrEmpty(value))
return string.Empty;
var buffer = new char[value.Length];
int write = 0;
bool previousWasWhitespace = false;
for (int i = 0; i < value.Length; i++)
{
char current = value[i];
bool isWhitespace = char.IsWhiteSpace(current);
if (isWhitespace)
{
if (previousWasWhitespace)
continue;
buffer[write++] = ' ';
previousWasWhitespace = true;
continue;
}
buffer[write++] = current;
previousWasWhitespace = false;
}
return new string(buffer, 0, write);
}
private static void PrintTaskConsoleError(string productUrl, int attempt, int totalAttempts, Exception exception)
{
Console.Error.WriteLine(
$"[{DateTime.Now:yyyy-MM-dd HH:mm:ss}] [ProductTaskExecution] Task failed for {productUrl}. Attempt {attempt}/{totalAttempts}. ExceptionType={exception.GetType().Name}");
Console.Error.WriteLine(exception.ToString());
}
private static bool IsCancellationError(string? errorType, string? errorMessage)
{
if (!string.IsNullOrWhiteSpace(errorType))
{
if (errorType.Contains("OperationCanceledException", StringComparison.Ordinal) ||
errorType.Contains("TaskCanceledException", StringComparison.Ordinal))
{
return true;
}
}
if (!string.IsNullOrWhiteSpace(errorMessage))
{
return errorMessage.Contains("operation was canceled", StringComparison.OrdinalIgnoreCase) ||
errorMessage.Contains("operation cancelled", StringComparison.OrdinalIgnoreCase);
}
return false;
}
private sealed class ProductPageData
{
public string Url { get; init; } = string.Empty;
public string ArticleNumber { get; init; } = string.Empty;
public string Title { get; init; } = string.Empty;
public string Keywords { get; init; } = string.Empty;
public string StockMessage { get; init; } = string.Empty;
public string Notification { get; init; } = string.Empty;
public string PromoText { get; init; } = string.Empty;
public string Category { get; init; } = string.Empty;
public string RootCategory { get; init; } = string.Empty;
public string LastCategory { get; init; } = string.Empty;
public string Description { get; init; } = string.Empty;
public string Material { get; init; } = string.Empty;
public string Color { get; init; } = string.Empty;
public string Series { get; init; } = string.Empty;
public int? MinimumOrderQuantity { get; init; }
public bool OnlineOnly { get; init; }
public bool CanOrder { get; init; }
public List ImageUrls { get; init; } = [];
}
private sealed class DownloadPageResult
{
public string Html { get; init; } = string.Empty;
public string FinalUrl { get; init; } = string.Empty;
}
private sealed class ProductSpecifications
{
public string ArticleNumber { get; set; } = string.Empty;
public bool HasOnlineOnly { get; set; }
public bool OnlineOnly { get; set; }
public string Material { get; set; } = string.Empty;
public string Color { get; set; } = string.Empty;
public string Series { get; set; } = string.Empty;
}
private sealed class CatalogProductData
{
public string ArticleNumber { get; init; } = string.Empty;
public string Title { get; init; } = string.Empty;
public string Url { get; init; } = string.Empty;
public string Keywords { get; init; } = string.Empty;
public string DescriptionHtml { get; init; } = string.Empty;
public bool? IsSalable { get; init; }
public bool? OnlineOnly { get; init; }
public bool? InStock { get; init; }
public bool? StatusInStock { get; init; }
public int? FrontendStock { get; init; }
public bool? NotSaleable { get; init; }
public int? XenosMinBestelhoeveelheid { get; init; }
public int? MinimumQtyAllowedIncrement { get; init; }
public List CategoryNames { get; init; } = [];
public List ImageGalleryPlaceholders { get; init; } = [];
}
private sealed class CategoryData
{
public string FullCategory { get; init; } = string.Empty;
public string RootCategory { get; init; } = string.Empty;
public string LastCategory { get; init; } = string.Empty;
}
}