Files
whyrating-engine-legacy/test_lithuanian_hospital.py
Alejandro Gutiérrez e98da314a5 Fix: Add early no-reviews detection and hide analytics for empty jobs
Changes:
- Early detection for "no reviews" messages in 11 languages
- Checks for disabled reviews tabs and 0-review indicators
- Returns early (saves 30-40s) when no reviews exist
- Frontend hides analytics/export buttons when reviews_count = 0
- Structural pattern matching improvements (work in progress)

Known issue:
- Lithuanian hospital page has different structure (no tabs found)
- Needs separate investigation - may use different Google Maps layout

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-18 20:14:04 +00:00

72 lines
2.5 KiB
Python

#!/usr/bin/env python3
"""
Test script for Lithuanian hospital to verify structural pattern matching works.
"""
import logging
from modules.fast_scraper import fast_scrape_reviews
# Configure logging to see what's happening
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
log = logging.getLogger(__name__)
def test_lithuanian_hospital():
"""Test scraping the Lithuanian hospital that was getting 0 reviews"""
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine"
log.info("=" * 80)
log.info("Testing Lithuanian Hospital: Panevėžio respublikinė ligoninė")
log.info("Expected: 271 reviews")
log.info("Previous result: 0 reviews (selector mismatch)")
log.info("=" * 80)
# Run the scraper with headless mode OFF so we can see what's happening
result = fast_scrape_reviews(
url=url,
headless=False, # Show browser for debugging
max_scrolls=999999 # Unlimited - use idle detection
)
log.info("=" * 80)
log.info("RESULTS:")
log.info(f"Success: {result['success']}")
log.info(f"Reviews found: {result['count']}")
log.info(f"Total reviews on page: {result.get('total_reviews', 'Unknown')}")
log.info(f"Time taken: {result['time']:.2f}s")
if result.get('message'):
log.info(f"Message: {result['message']}")
if result.get('error'):
log.error(f"Error: {result['error']}")
log.info("=" * 80)
# Show first few reviews if found
if result['count'] > 0:
log.info(f"\nFirst 3 reviews:")
for i, review in enumerate(result['reviews'][:3], 1):
log.info(f"\n Review {i}:")
log.info(f" Author: {review.get('author', 'N/A')}")
log.info(f" Rating: {review.get('rating', 'N/A')}")
log.info(f" Date: {review.get('date_text', 'N/A')}")
log.info(f" Text: {review.get('text', 'N/A')[:100]}...")
# Verify the fix worked
if result['count'] > 200:
log.info("\n✅ SUCCESS! Structural pattern matching found reviews!")
log.info(f" Got {result['count']} reviews (expected ~271)")
elif result['count'] == 0:
log.error("\n❌ FAILED! Still getting 0 reviews - selector issue not fixed")
else:
log.warning(f"\n⚠️ PARTIAL: Got {result['count']} reviews (expected ~271)")
log.warning(" May need to increase idle detection patience")
if __name__ == "__main__":
test_lithuanian_hospital()