Fix: Add early no-reviews detection and hide analytics for empty jobs
Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
71
test_lithuanian_hospital.py
Normal file
71
test_lithuanian_hospital.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for Lithuanian hospital to verify structural pattern matching works.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from modules.fast_scraper import fast_scrape_reviews
|
||||
|
||||
# Configure logging to see what's happening
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def test_lithuanian_hospital():
|
||||
"""Test scraping the Lithuanian hospital that was getting 0 reviews"""
|
||||
|
||||
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine"
|
||||
|
||||
log.info("=" * 80)
|
||||
log.info("Testing Lithuanian Hospital: Panevėžio respublikinė ligoninė")
|
||||
log.info("Expected: 271 reviews")
|
||||
log.info("Previous result: 0 reviews (selector mismatch)")
|
||||
log.info("=" * 80)
|
||||
|
||||
# Run the scraper with headless mode OFF so we can see what's happening
|
||||
result = fast_scrape_reviews(
|
||||
url=url,
|
||||
headless=False, # Show browser for debugging
|
||||
max_scrolls=999999 # Unlimited - use idle detection
|
||||
)
|
||||
|
||||
log.info("=" * 80)
|
||||
log.info("RESULTS:")
|
||||
log.info(f"Success: {result['success']}")
|
||||
log.info(f"Reviews found: {result['count']}")
|
||||
log.info(f"Total reviews on page: {result.get('total_reviews', 'Unknown')}")
|
||||
log.info(f"Time taken: {result['time']:.2f}s")
|
||||
|
||||
if result.get('message'):
|
||||
log.info(f"Message: {result['message']}")
|
||||
|
||||
if result.get('error'):
|
||||
log.error(f"Error: {result['error']}")
|
||||
|
||||
log.info("=" * 80)
|
||||
|
||||
# Show first few reviews if found
|
||||
if result['count'] > 0:
|
||||
log.info(f"\nFirst 3 reviews:")
|
||||
for i, review in enumerate(result['reviews'][:3], 1):
|
||||
log.info(f"\n Review {i}:")
|
||||
log.info(f" Author: {review.get('author', 'N/A')}")
|
||||
log.info(f" Rating: {review.get('rating', 'N/A')}")
|
||||
log.info(f" Date: {review.get('date_text', 'N/A')}")
|
||||
log.info(f" Text: {review.get('text', 'N/A')[:100]}...")
|
||||
|
||||
# Verify the fix worked
|
||||
if result['count'] > 200:
|
||||
log.info("\n✅ SUCCESS! Structural pattern matching found reviews!")
|
||||
log.info(f" Got {result['count']} reviews (expected ~271)")
|
||||
elif result['count'] == 0:
|
||||
log.error("\n❌ FAILED! Still getting 0 reviews - selector issue not fixed")
|
||||
else:
|
||||
log.warning(f"\n⚠️ PARTIAL: Got {result['count']} reviews (expected ~271)")
|
||||
log.warning(" May need to increase idle detection patience")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_lithuanian_hospital()
|
||||
Reference in New Issue
Block a user