Fix get_business_card_info for pooled workers
- Clear cookies and navigate to about:blank before loading URL (ensures clean state when reusing pooled driver) - Simplified regex patterns for rating/reviews extraction - Uses partial word matching like scrape_reviews Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1177,6 +1177,14 @@ def get_business_card_info(url: str, headless: bool = True, driver=None, return_
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Clear state if reusing a pooled driver (ensures clean page load)
|
||||||
|
if driver_provided:
|
||||||
|
try:
|
||||||
|
driver.delete_all_cookies()
|
||||||
|
driver.get("about:blank")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
# Navigate to URL
|
# Navigate to URL
|
||||||
driver.get(url)
|
driver.get(url)
|
||||||
|
|
||||||
@@ -1211,19 +1219,20 @@ def get_business_card_info(url: str, headless: bool = True, driver=None, return_
|
|||||||
if (h1) result.name = h1.textContent.trim();
|
if (h1) result.name = h1.textContent.trim();
|
||||||
|
|
||||||
// Rating and reviews from span[role="img"] aria-labels
|
// Rating and reviews from span[role="img"] aria-labels
|
||||||
// Handles multiple languages: stars/estrellas/étoiles, reviews/reseñas/avis
|
// Same pattern as scrape_reviews for consistency
|
||||||
var spans = document.querySelectorAll('span[role="img"]');
|
var spans = document.querySelectorAll('span[role="img"]');
|
||||||
for (var i = 0; i < spans.length; i++) {
|
for (var i = 0; i < spans.length; i++) {
|
||||||
var label = spans[i].getAttribute('aria-label') || '';
|
var label = spans[i].getAttribute('aria-label') || '';
|
||||||
|
|
||||||
// Rating: "4.8 stars" or "4,8 estrellas" or "4,8 étoiles"
|
// Rating: "4.8 stars", "4,8 estrellas", etc (partial match)
|
||||||
var rMatch = label.match(/^([\\d,.]+)\\s*(stars?|estrellas?|étoiles?|sterne?|stelle)/i);
|
var rMatch = label.match(/^([\\d,.]+)\\s*(star|estrella|étoile|stern|stell)/i);
|
||||||
if (rMatch && !result.rating) {
|
if (rMatch && !result.rating) {
|
||||||
result.rating = parseFloat(rMatch[1].replace(',', '.'));
|
result.rating = parseFloat(rMatch[1].replace(',', '.'));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reviews: "79 reviews" or "79 reseñas" or "79 avis"
|
// Reviews: same as scrape_reviews - /^([\d,.]+)\s*review/i
|
||||||
var revMatch = label.match(/^([\\d,\\.]+)\\s*(reviews?|reseñas?|avis|bewertungen|recensioni)/i);
|
// Plus Spanish "reseña" which doesn't contain "review"
|
||||||
|
var revMatch = label.match(/^([\\d,\\.]+)\\s*(review|reseña|avis|bewertung|recension)/i);
|
||||||
if (revMatch && !result.total_reviews) {
|
if (revMatch && !result.total_reviews) {
|
||||||
result.total_reviews = parseInt(revMatch[1].replace(/[,\\.]/g, ''));
|
result.total_reviews = parseInt(revMatch[1].replace(/[,\\.]/g, ''));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user