Fix get_business_card_info for pooled workers

- Clear cookies and navigate to about:blank before loading URL
  (ensures clean state when reusing pooled driver)
- Simplified regex patterns for rating/reviews extraction
- Uses partial word matching like scrape_reviews

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-23 18:09:51 +00:00
parent e3136281b8
commit 1bd30c0789

View File

@@ -1177,6 +1177,14 @@ def get_business_card_info(url: str, headless: bool = True, driver=None, return_
except:
pass
# Clear state if reusing a pooled driver (ensures clean page load)
if driver_provided:
try:
driver.delete_all_cookies()
driver.get("about:blank")
except:
pass
# Navigate to URL
driver.get(url)
@@ -1211,19 +1219,20 @@ def get_business_card_info(url: str, headless: bool = True, driver=None, return_
if (h1) result.name = h1.textContent.trim();
// Rating and reviews from span[role="img"] aria-labels
// Handles multiple languages: stars/estrellas/étoiles, reviews/reseñas/avis
// Same pattern as scrape_reviews for consistency
var spans = document.querySelectorAll('span[role="img"]');
for (var i = 0; i < spans.length; i++) {
var label = spans[i].getAttribute('aria-label') || '';
// Rating: "4.8 stars" or "4,8 estrellas" or "4,8 étoiles"
var rMatch = label.match(/^([\\d,.]+)\\s*(stars?|estrellas?|étoiles?|sterne?|stelle)/i);
// Rating: "4.8 stars", "4,8 estrellas", etc (partial match)
var rMatch = label.match(/^([\\d,.]+)\\s*(star|estrella|étoile|stern|stell)/i);
if (rMatch && !result.rating) {
result.rating = parseFloat(rMatch[1].replace(',', '.'));
}
// Reviews: "79 reviews" or "79 reseñas" or "79 avis"
var revMatch = label.match(/^([\\d,\\.]+)\\s*(reviews?|reseñas?|avis|bewertungen|recensioni)/i);
// Reviews: same as scrape_reviews - /^([\d,.]+)\s*review/i
// Plus Spanish "reseña" which doesn't contain "review"
var revMatch = label.match(/^([\\d,\\.]+)\\s*(review|reseña|avis|bewertung|recension)/i);
if (revMatch && !result.total_reviews) {
result.total_reviews = parseInt(revMatch[1].replace(/[,\\.]/g, ''));
}