diff --git a/modules/scraper_clean.py b/modules/scraper_clean.py index 3dacea2..4abef09 100644 --- a/modules/scraper_clean.py +++ b/modules/scraper_clean.py @@ -236,25 +236,29 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in # Extract total review count BEFORE clicking reviews tab (it's on Overview) # ROBUST: Use aria-label="X reviews" on span[role="img"] + # Poll for up to 5s since page might still be loading after consent total_reviews = None - try: - total_reviews = driver.execute_script(""" - // ROBUST: Find span[role="img"][aria-label*="review"] - contains "(X)" text - // aria-label format: "260 reviews" or "1,234 reviews" - var reviewSpans = document.querySelectorAll('span[role="img"][aria-label*="review"]'); - for (var i = 0; i < reviewSpans.length; i++) { - var label = reviewSpans[i].getAttribute('aria-label') || ''; - var match = label.match(/^([\\d,\\.]+)\\s*review/i); - if (match) { - return parseInt(match[1].replace(/[,\\.]/g, '')); + start = time.time() + while time.time() - start < 5: + try: + total_reviews = driver.execute_script(""" + // ROBUST: Find span[role="img"] with aria-label starting with number + "review" + var reviewSpans = document.querySelectorAll('span[role="img"]'); + for (var i = 0; i < reviewSpans.length; i++) { + var label = reviewSpans[i].getAttribute('aria-label') || ''; + var match = label.match(/^([\\d,\\.]+)\\s*review/i); + if (match) { + return parseInt(match[1].replace(/[,\\.]/g, '')); + } } - } - return null; - """) - if total_reviews: - print(f"📊 Total reviews on page: {total_reviews}") - except: - pass + return null; + """) + if total_reviews: + print(f"📊 Total reviews on page: {total_reviews}") + break + except: + pass + time.sleep(0.1) # Click reviews tab - poll until found review_keywords = ["review", "reseña", "avis", "bewertung", "recensione", "opiniones"]