using CPRNIMS.Infrastructure.Dto.Canvass.Response; using CPRNIMS.Infrastructure.Dto.Canvass.Result; using Microsoft.Extensions.Configuration; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.Json; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace CPRNIMS.Domain.Services.Canvass { public class SupplierSearchService { private readonly HttpClient _httpClient; private readonly IConfiguration _config; // Common contact page suffixes to try private static readonly string[] ContactPaths = { "/contact", "/contact-us", "/pages/contact-us", "/about/contact", "/about" }; public SupplierSearchService(HttpClient httpClient, IConfiguration config) { _httpClient = httpClient; _config = config; } public async Task> SearchAndFilterSuppliersAsync( string itemName, string itemDescription) { // Step 1: Tavily — get supplier URLs var (searchContent, supplierUrls) = await SearchTavilyAsync(itemName, itemDescription); // Step 2: Fetch contact pages from discovered URLs var contactContent = await FetchContactPagesAsync(supplierUrls); // Step 3: Combine search + contact content, send to Groq var combined = searchContent + " CONTACT_PAGES_DATA: " + contactContent; var suppliers = await FilterWithGroqAsync(itemName, itemDescription, combined); return suppliers; } // ── Tavily ────────────────────────────────────────────────────────────── private async Task<(string content, List urls)> SearchTavilyAsync( string itemName, string itemDescription) { var query = $"{itemName} {itemDescription} suppliers Philippines budget price contact email phone"; var payload = new { query, max_results = 10, search_depth = "advanced", include_answer = false }; var request = new HttpRequestMessage(HttpMethod.Post, _config["Tavily:SearchUrl"]); request.Headers.Add("Authorization", $"Bearer {_config["Tavily:ApiKey"]}"); request.Content = new StringContent( JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"); var response = await _httpClient.SendAsync(request); response.EnsureSuccessStatusCode(); var body = await response.Content.ReadAsStringAsync(); var result = JsonSerializer.Deserialize(body, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }); var sb = new StringBuilder(); var urls = new List(); int i = 1; foreach (var r in result?.Results ?? new()) { // Clean text var clean = Regex.Replace(r.Content ?? "", @"[^\x20-\x7E]", " "); clean = Regex.Replace(clean, @"\s{3,}", " "); if (clean.Length > 300) clean = clean[..300]; sb.Append($"{i}. Title:{r.Title}|URL:{r.Url}|Content:{clean}|"); // Collect base domain URLs for contact page fetching try { var uri = new Uri(r.Url); var baseUrl = $"{uri.Scheme}://{uri.Host}"; if (!urls.Contains(baseUrl)) urls.Add(baseUrl); } catch { } i++; } var fullText = sb.ToString(); if (fullText.Length > 2000) fullText = fullText[..2000]; return (fullText, urls); } // ── Fetch Contact Pages ────────────────────────────────────────────────── private async Task FetchContactPagesAsync(List baseUrls) { var sb = new StringBuilder(); // Limit to top 5 domains to avoid timeout foreach (var baseUrl in baseUrls.Take(5)) { foreach (var path in ContactPaths) { try { var url = baseUrl + path; using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5)); var resp = await _httpClient.GetAsync(url, cts.Token); if (!resp.IsSuccessStatusCode) continue; var html = await resp.Content.ReadAsStringAsync(); // Strip HTML tags var text = Regex.Replace(html, @"<[^>]+>", " "); text = Regex.Replace(text, @"[^\x20-\x7E]", " "); text = Regex.Replace(text, @"\s{3,}", " "); // Extract only lines with contact signals var contactLines = text.Split(' ', StringSplitOptions.RemoveEmptyEntries) .Where(w => w.Contains("@") || Regex.IsMatch(w, @"\+?[\d\-\(\)]{7,}") || w.Contains("email") || w.Contains("phone") || w.Contains("contact") || w.Contains("mobile") || w.Contains("tel")) .Take(50); var contactText = string.Join(" ", contactLines); if (!string.IsNullOrWhiteSpace(contactText)) { sb.Append($"[{baseUrl}]: {contactText} | "); break; // Found contact page for this domain, move to next } } catch { /* timeout or unreachable — skip */ } } } return sb.Length > 2000 ? sb.ToString()[..2000] : sb.ToString(); } // ── Groq ───────────────────────────────────────────────────────────────── private async Task> FilterWithGroqAsync( string itemName, string itemDescription, string searchContent) { var prompt = $"Extract top 10 unique suppliers for: {itemName} {itemDescription}. " + "Prioritize Philippines suppliers first. " + "IMPORTANT: Look carefully in CONTACT_PAGES_DATA section for real phone numbers and emails. " + "Extract exact email addresses and phone numbers found. " + "For domains without contact data found, infer email as sales@domain or info@domain. " + "Prefer budget-friendly suppliers. No duplicates. " + "Return ONLY a raw JSON array: company_name, country, phone_number, contact_email, website, estimated_price_usd, item_specifications. " + $"Null for missing. JSON array only. Data: {searchContent}"; var payload = new { model = _config["Groq:Model"] ?? "llama-3.1-8b-instant", stream = false, max_tokens = 2048, temperature = 0.1, messages = new[] { new { role = "system", content = "You are a supplier data extractor. Extract real contact details from provided content. Return ONLY a valid JSON array, no markdown, no explanation." }, new { role = "user", content = prompt } } }; var request = new HttpRequestMessage(HttpMethod.Post, _config["Groq:ApiUrl"]); request.Headers.Add("Authorization", $"Bearer {_config["Groq:ApiKey"]}"); request.Content = new StringContent( JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"); var response = await _httpClient.SendAsync(request); response.EnsureSuccessStatusCode(); var body = await response.Content.ReadAsStringAsync(); var groqResp = JsonSerializer.Deserialize(body, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }); var rawText = groqResp?.Choices?[0]?.Message?.Content ?? string.Empty; var match = Regex.Match(rawText, @"\[[\s\S]*\]"); if (!match.Success) return new List(); var groqList = JsonSerializer.Deserialize>(match.Value, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? new List(); var seen = new HashSet(StringComparer.OrdinalIgnoreCase); var suppliers = new List(); foreach (var s in groqList) { var key = (s.CompanyName ?? "").Trim().ToLower(); if (string.IsNullOrEmpty(key) || seen.Contains(key)) continue; seen.Add(key); if (string.IsNullOrEmpty(s.ContactEmail)) continue; suppliers.Add(new SupplierResponse { SupplierName = s.CompanyName, EmailAddress = s.ContactEmail, ContactNo = s.PhoneNumber ?? string.Empty, Address = s.Country ?? string.Empty, IsActive = true, VatInc = false, Currency = "PHP", CurrencyId = 1, PaymentTermsId = 1, PaymentTerms = "30 Days", LeadTime = "7-14 Days", TinNo = string.Empty, ContactPerson = string.Empty, Website =s.Website ?? string.Empty, }); if (suppliers.Count >= 10) break; } return suppliers; } } }