Stop immediately when all reviews collected
- Check total_reviews before recovery attempts - Exit loop as soon as current_count >= total_reviews - Reduces scrape time significantly (13s vs 56s for 247 reviews) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -785,18 +785,24 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
|
|||||||
else:
|
else:
|
||||||
print(f" 📊 {current_count} reviews | idle: {elapsed:.1f}s/{timeout_no_new}s", flush=True)
|
print(f" 📊 {current_count} reviews | idle: {elapsed:.1f}s/{timeout_no_new}s", flush=True)
|
||||||
|
|
||||||
# STUCK DETECTION: If no new reviews for 3s+, try to unstick
|
# Stop conditions - check BEFORE recovery attempts
|
||||||
# Trigger at 3s, 6s, 9s... (every 3 seconds while stuck)
|
|
||||||
if elapsed >= 3 and int(elapsed) % 3 == 0:
|
|
||||||
print(f" 🔧 Recovery attempt #{recovery_count[0] + 1}...", flush=True)
|
|
||||||
unstick_scroll()
|
|
||||||
|
|
||||||
# Stop conditions
|
|
||||||
if current_count >= max_reviews:
|
if current_count >= max_reviews:
|
||||||
print(f"✅ Reached max: {current_count}")
|
print(f"✅ Reached max: {current_count}")
|
||||||
stop_scrolling.set()
|
stop_scrolling.set()
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Also stop if we have all reviews from the page
|
||||||
|
if total_reviews and current_count >= total_reviews:
|
||||||
|
print(f"✅ All {current_count} reviews collected")
|
||||||
|
stop_scrolling.set()
|
||||||
|
break
|
||||||
|
|
||||||
|
# STUCK DETECTION: If no new reviews for 3s+, try to unstick
|
||||||
|
# Only if we haven't collected all reviews yet
|
||||||
|
if elapsed >= 3 and int(elapsed) % 3 == 0:
|
||||||
|
print(f" 🔧 Recovery attempt #{recovery_count[0] + 1}...", flush=True)
|
||||||
|
unstick_scroll()
|
||||||
|
|
||||||
# Check scroll state - track if content is still being added
|
# Check scroll state - track if content is still being added
|
||||||
try:
|
try:
|
||||||
scroll_state = driver.execute_script("""
|
scroll_state = driver.execute_script("""
|
||||||
|
|||||||
Reference in New Issue
Block a user