using System.Xml; using WorkerCore = Sonex.Library.WorkersCore.Worker; namespace Sonex.Worker.WebSync; internal sealed class WebSyncSitemapLoader { private static readonly HttpClient SitemapHttpClient = new(); private const string OperationSitemapDownload = "SitemapDownload"; public async Task> DownloadProductLinksAsync( string sitemapUrl, int retryCount, int downloadTimeoutSeconds, int retryDelaySeconds, WebSyncRunReport runReport, Func waitIfPaused, CancellationToken cancellationToken) { var links = new List(); int attempts = Math.Max(1, retryCount + 1); for (int attempt = 1; attempt <= attempts; attempt++) { cancellationToken.ThrowIfCancellationRequested(); await waitIfPaused(cancellationToken).ConfigureAwait(false); try { using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); timeoutCts.CancelAfter(TimeSpan.FromSeconds(downloadTimeoutSeconds)); using var response = await SitemapHttpClient.GetAsync( sitemapUrl, HttpCompletionOption.ResponseHeadersRead, timeoutCts.Token).ConfigureAwait(false); response.EnsureSuccessStatusCode(); await using var stream = await response.Content.ReadAsStreamAsync(timeoutCts.Token).ConfigureAwait(false); links = await ExtractProductLinksAsync(stream, waitIfPaused, timeoutCts.Token).ConfigureAwait(false); runReport.SetSitemapLinksCount(links.Count); break; } catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested && attempt < attempts) { await waitIfPaused(cancellationToken).ConfigureAwait(false); await Task.Delay(TimeSpan.FromSeconds(retryDelaySeconds), cancellationToken).ConfigureAwait(false); } catch (Exception) when (attempt < attempts) { await waitIfPaused(cancellationToken).ConfigureAwait(false); await Task.Delay(TimeSpan.FromSeconds(retryDelaySeconds), cancellationToken).ConfigureAwait(false); } catch (Exception ex) { PrintSitemapConsoleError(sitemapUrl, attempt, attempts, ex); string message = $"Sitemap download failed for {sitemapUrl} after {attempts} attempts."; runReport.RegisterException(OperationSitemapDownload, ex, message); WorkerCore.LogError(message, ex, OperationSitemapDownload); break; } } return links; } private static void PrintSitemapConsoleError( string sitemapUrl, int attempt, int totalAttempts, Exception exception) { Console.Error.WriteLine( $"[{DateTime.Now:yyyy-MM-dd HH:mm:ss}] [SitemapDownload] Download failed for {sitemapUrl}. Attempt {attempt}/{totalAttempts}. ExceptionType={exception.GetType().Name}"); Console.Error.WriteLine(exception.ToString()); } private static async Task> ExtractProductLinksAsync( Stream sitemapStream, Func waitIfPaused, CancellationToken cancellationToken) { var links = new List(); var seen = new HashSet(StringComparer.OrdinalIgnoreCase); using var xmlReader = XmlReader.Create( sitemapStream, new XmlReaderSettings { Async = true, DtdProcessing = DtdProcessing.Prohibit, IgnoreWhitespace = true }); while (await xmlReader.ReadAsync().ConfigureAwait(false)) { cancellationToken.ThrowIfCancellationRequested(); await waitIfPaused(cancellationToken).ConfigureAwait(false); if (xmlReader.NodeType != XmlNodeType.Element || !string.Equals(xmlReader.LocalName, "loc", StringComparison.OrdinalIgnoreCase)) continue; string url = (await xmlReader.ReadElementContentAsStringAsync().ConfigureAwait(false)).Trim(); if (string.IsNullOrWhiteSpace(url)) continue; if (seen.Add(url)) links.Add(url); } return links; } }