Add polling for total count detection on page load
- Poll for up to 5s waiting for span[role="img"][aria-label*="review"] - Element may not be present immediately after consent handling - Tested: Soho Club 247/247 reviews in 31.4s with correct total Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -236,25 +236,29 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
|
|||||||
|
|
||||||
# Extract total review count BEFORE clicking reviews tab (it's on Overview)
|
# Extract total review count BEFORE clicking reviews tab (it's on Overview)
|
||||||
# ROBUST: Use aria-label="X reviews" on span[role="img"]
|
# ROBUST: Use aria-label="X reviews" on span[role="img"]
|
||||||
|
# Poll for up to 5s since page might still be loading after consent
|
||||||
total_reviews = None
|
total_reviews = None
|
||||||
try:
|
start = time.time()
|
||||||
total_reviews = driver.execute_script("""
|
while time.time() - start < 5:
|
||||||
// ROBUST: Find span[role="img"][aria-label*="review"] - contains "(X)" text
|
try:
|
||||||
// aria-label format: "260 reviews" or "1,234 reviews"
|
total_reviews = driver.execute_script("""
|
||||||
var reviewSpans = document.querySelectorAll('span[role="img"][aria-label*="review"]');
|
// ROBUST: Find span[role="img"] with aria-label starting with number + "review"
|
||||||
for (var i = 0; i < reviewSpans.length; i++) {
|
var reviewSpans = document.querySelectorAll('span[role="img"]');
|
||||||
var label = reviewSpans[i].getAttribute('aria-label') || '';
|
for (var i = 0; i < reviewSpans.length; i++) {
|
||||||
var match = label.match(/^([\\d,\\.]+)\\s*review/i);
|
var label = reviewSpans[i].getAttribute('aria-label') || '';
|
||||||
if (match) {
|
var match = label.match(/^([\\d,\\.]+)\\s*review/i);
|
||||||
return parseInt(match[1].replace(/[,\\.]/g, ''));
|
if (match) {
|
||||||
|
return parseInt(match[1].replace(/[,\\.]/g, ''));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
return null;
|
||||||
return null;
|
""")
|
||||||
""")
|
if total_reviews:
|
||||||
if total_reviews:
|
print(f"📊 Total reviews on page: {total_reviews}")
|
||||||
print(f"📊 Total reviews on page: {total_reviews}")
|
break
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
# Click reviews tab - poll until found
|
# Click reviews tab - poll until found
|
||||||
review_keywords = ["review", "reseña", "avis", "bewertung", "recensione", "opiniones"]
|
review_keywords = ["review", "reseña", "avis", "bewertung", "recensione", "opiniones"]
|
||||||
|
|||||||
Reference in New Issue
Block a user