Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
72 lines
2.5 KiB
Python
72 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for Lithuanian hospital to verify structural pattern matching works.
|
|
"""
|
|
|
|
import logging
|
|
from modules.fast_scraper import fast_scrape_reviews
|
|
|
|
# Configure logging to see what's happening
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
def test_lithuanian_hospital():
|
|
"""Test scraping the Lithuanian hospital that was getting 0 reviews"""
|
|
|
|
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine"
|
|
|
|
log.info("=" * 80)
|
|
log.info("Testing Lithuanian Hospital: Panevėžio respublikinė ligoninė")
|
|
log.info("Expected: 271 reviews")
|
|
log.info("Previous result: 0 reviews (selector mismatch)")
|
|
log.info("=" * 80)
|
|
|
|
# Run the scraper with headless mode OFF so we can see what's happening
|
|
result = fast_scrape_reviews(
|
|
url=url,
|
|
headless=False, # Show browser for debugging
|
|
max_scrolls=999999 # Unlimited - use idle detection
|
|
)
|
|
|
|
log.info("=" * 80)
|
|
log.info("RESULTS:")
|
|
log.info(f"Success: {result['success']}")
|
|
log.info(f"Reviews found: {result['count']}")
|
|
log.info(f"Total reviews on page: {result.get('total_reviews', 'Unknown')}")
|
|
log.info(f"Time taken: {result['time']:.2f}s")
|
|
|
|
if result.get('message'):
|
|
log.info(f"Message: {result['message']}")
|
|
|
|
if result.get('error'):
|
|
log.error(f"Error: {result['error']}")
|
|
|
|
log.info("=" * 80)
|
|
|
|
# Show first few reviews if found
|
|
if result['count'] > 0:
|
|
log.info(f"\nFirst 3 reviews:")
|
|
for i, review in enumerate(result['reviews'][:3], 1):
|
|
log.info(f"\n Review {i}:")
|
|
log.info(f" Author: {review.get('author', 'N/A')}")
|
|
log.info(f" Rating: {review.get('rating', 'N/A')}")
|
|
log.info(f" Date: {review.get('date_text', 'N/A')}")
|
|
log.info(f" Text: {review.get('text', 'N/A')[:100]}...")
|
|
|
|
# Verify the fix worked
|
|
if result['count'] > 200:
|
|
log.info("\n✅ SUCCESS! Structural pattern matching found reviews!")
|
|
log.info(f" Got {result['count']} reviews (expected ~271)")
|
|
elif result['count'] == 0:
|
|
log.error("\n❌ FAILED! Still getting 0 reviews - selector issue not fixed")
|
|
else:
|
|
log.warning(f"\n⚠️ PARTIAL: Got {result['count']} reviews (expected ~271)")
|
|
log.warning(" May need to increase idle detection patience")
|
|
|
|
if __name__ == "__main__":
|
|
test_lithuanian_hospital()
|