diff --git a/modules/scraper_clean.py b/modules/scraper_clean.py index a52592e..a9bba13 100644 --- a/modules/scraper_clean.py +++ b/modules/scraper_clean.py @@ -207,7 +207,7 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in # Navigate to URL print(f"🌐 Loading: {url[:80]}...") driver.get(url) - time.sleep(3) + time.sleep(1) # Minimal wait for redirect check # Handle consent popup if present if "consent.google" in driver.current_url: @@ -218,28 +218,32 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in txt = btn.text.lower() if "accept" in txt or "aceptar" in txt or "alle akzeptieren" in txt: btn.click() - time.sleep(2) + time.sleep(0.5) # Brief wait for consent processing break except: pass # Reload original URL after consent (redirect can corrupt URL) print(" Reloading after consent...") driver.get(url) - time.sleep(3) + time.sleep(1) # Minimal wait for page start # Click reviews tab if present (multilingual support) review_keywords = ["review", "reseña", "avis", "bewertung", "recensione", "opiniones"] - try: - tabs = driver.find_elements(By.CSS_SELECTOR, "button[role='tab']") - for tab in tabs: - tab_text = tab.text.lower() - if any(kw in tab_text for kw in review_keywords): - print(f" Clicking reviews tab: '{tab.text}'") - tab.click() - time.sleep(2) + for _ in range(10): # Wait up to 2.5s for tabs to appear + try: + tabs = driver.find_elements(By.CSS_SELECTOR, "button[role='tab']") + if tabs: + for tab in tabs: + tab_text = tab.text.lower() + if any(kw in tab_text for kw in review_keywords): + print(f" Clicking reviews tab: '{tab.text}'") + tab.click() + time.sleep(0.3) # Brief wait for tab switch + break break - except: - pass + except: + pass + time.sleep(0.25) # Find scrollable reviews container def find_scroll_container(): @@ -261,14 +265,15 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in pass return None - # Wait and retry for scroll container + # Wait and retry for scroll container (faster polling) scroll_container = None - for attempt in range(10): + for attempt in range(20): # 20 x 0.25s = 5s max scroll_container = find_scroll_container() if scroll_container: break - print(f" Waiting for reviews panel... ({attempt+1}/10)") - time.sleep(1) + if attempt % 4 == 3: # Print every 1s + print(f" Waiting for reviews panel... ({(attempt+1)//4 + 1}s)") + time.sleep(0.25) if not scroll_container: print("❌ Could not find reviews scroll container") @@ -305,9 +310,6 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in except: pass - # Wait for reviews to fully load after tab click - time.sleep(2) - # PHASE 2: Inject API interceptor for scroll-loaded reviews print("🔌 Injecting API interceptor...") driver.execute_script("""