Use immediate element detection with 10ms polling

- Replace fixed waits with tight polling loops
- 10ms sleep between polls (responsive but low CPU)
- Consent, tabs, scroll container all detected immediately
- Total time reduced to ~11-12 seconds

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-21 20:52:18 +00:00
parent 4f48fb28cd
commit 6a75159ebe

View File

@@ -207,43 +207,47 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
# Navigate to URL # Navigate to URL
print(f"🌐 Loading: {url[:80]}...") print(f"🌐 Loading: {url[:80]}...")
driver.get(url) driver.get(url)
time.sleep(1) # Minimal wait for redirect check
# Handle consent popup if present # Handle consent popup if redirected (poll with tiny sleep)
start = time.time()
while time.time() - start < 5: # Max 5s for consent
if "consent.google" in driver.current_url: if "consent.google" in driver.current_url:
print(" Handling consent popup...") print(" Handling consent popup...")
try: try:
accept_btns = driver.find_elements(By.CSS_SELECTOR, "button") for btn in driver.find_elements(By.CSS_SELECTOR, "button"):
for btn in accept_btns:
txt = btn.text.lower() txt = btn.text.lower()
if "accept" in txt or "aceptar" in txt or "alle akzeptieren" in txt: if "accept" in txt or "aceptar" in txt or "alle akzeptieren" in txt:
btn.click() btn.click()
time.sleep(0.5) # Brief wait for consent processing # Reload original URL after consent
print(" Reloading after consent...")
driver.get(url)
break break
except: except:
pass pass
# Reload original URL after consent (redirect can corrupt URL) break
print(" Reloading after consent...") # Check if we're already on the target page
driver.get(url) if "maps/place" in driver.current_url and "consent" not in driver.current_url:
time.sleep(1) # Minimal wait for page start break
time.sleep(0.01) # 10ms - responsive but low CPU
# Click reviews tab if present (multilingual support) # Click reviews tab - poll until found
review_keywords = ["review", "reseña", "avis", "bewertung", "recensione", "opiniones"] review_keywords = ["review", "reseña", "avis", "bewertung", "recensione", "opiniones"]
for _ in range(10): # Wait up to 2.5s for tabs to appear start = time.time()
while time.time() - start < 5: # Max 5s for tabs
try: try:
tabs = driver.find_elements(By.CSS_SELECTOR, "button[role='tab']") tabs = driver.find_elements(By.CSS_SELECTOR, "button[role='tab']")
if tabs:
for tab in tabs: for tab in tabs:
tab_text = tab.text.lower() tab_text = tab.text.lower()
if any(kw in tab_text for kw in review_keywords): if any(kw in tab_text for kw in review_keywords):
print(f" Clicking reviews tab: '{tab.text}'") print(f" Clicking reviews tab: '{tab.text}'")
tab.click() tab.click()
time.sleep(0.3) # Brief wait for tab switch
break
break break
else:
time.sleep(0.01) # 10ms between polls
continue
break # Found and clicked
except: except:
pass time.sleep(0.01)
time.sleep(0.25)
# Find scrollable reviews container # Find scrollable reviews container
def find_scroll_container(): def find_scroll_container():
@@ -265,15 +269,19 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
pass pass
return None return None
# Wait and retry for scroll container (faster polling) # Poll for scroll container (10ms intervals - fast but low CPU)
scroll_container = None scroll_container = None
for attempt in range(20): # 20 x 0.25s = 5s max start = time.time()
last_print = 0
while time.time() - start < 10: # Max 10s
scroll_container = find_scroll_container() scroll_container = find_scroll_container()
if scroll_container: if scroll_container:
break break
if attempt % 4 == 3: # Print every 1s elapsed = int(time.time() - start)
print(f" Waiting for reviews panel... ({(attempt+1)//4 + 1}s)") if elapsed > last_print:
time.sleep(0.25) print(f" Waiting for reviews panel... ({elapsed}s)")
last_print = elapsed
time.sleep(0.01) # 10ms - responsive but low CPU
if not scroll_container: if not scroll_container:
print("❌ Could not find reviews scroll container") print("❌ Could not find reviews scroll container")