#!/usr/bin/env python3 """ Test if English locale exposes better date formats """ import json from modules.fast_scraper import fast_scrape_reviews # Try both Spanish and English URLs urls = { 'spanish': "https://www.google.com/maps/place/Soho+Club/data=!4m7!3m6!1s0x46dd947294b213bf:0x864c7a232527adb4!8m2!3d54.67869!4d25.2667181!16s%2Fg%2F1thhj5ml!19sChIJvxOylHKU3UYRtK0nJSN6TIY?authuser=0&hl=es&rclk=1", 'english': "https://www.google.com/maps/place/Soho+Club/data=!4m7!3m6!1s0x46dd947294b213bf:0x864c7a232527adb4!8m2!3d54.67869!4d25.2667181!16s%2Fg%2F1thhj5ml!19sChIJvxOylHKU3UYRtK0nJSN6TIY?authuser=0&hl=en&rclk=1" } results = {} for lang, url in urls.items(): print(f"\n{'='*80}") print(f"Testing: {lang.upper()}") print('='*80) result = fast_scrape_reviews(url, headless=True) reviews = result.get('reviews', []) print(f"Extracted {len(reviews)} reviews") if reviews: # Show first 5 review dates sample = [] for i, rev in enumerate(reviews[:5], 1): date_info = { 'author': rev.get('author'), 'date_text': rev.get('date_text'), 'debug_date_info': rev.get('debug_date_info') } sample.append(date_info) print(f"\nReview {i}:") print(f" Author: {date_info['author']}") print(f" Date: {date_info['date_text']}") if date_info.get('debug_date_info'): date_attrs = date_info['debug_date_info'].get('date_elem_attrs', {}) print(f" Date element attributes: {date_attrs}") results[lang] = { 'count': len(reviews), 'sample': sample } # Save comparison with open('/tmp/date_format_comparison.json', 'w') as f: json.dump(results, f, indent=2) print(f"\n{'='*80}") print("COMPARISON SAVED TO: /tmp/date_format_comparison.json") print('='*80) # Quick comparison if 'spanish' in results and 'english' in results: print("\nšŸ“Š SPANISH vs ENGLISH DATE FORMATS:") print("-" * 80) sp_sample = results['spanish'].get('sample', []) en_sample = results['english'].get('sample', []) for i in range(min(len(sp_sample), len(en_sample))): sp_date = sp_sample[i].get('date_text', 'N/A') en_date = en_sample[i].get('date_text', 'N/A') # Check if formats are different marker = "šŸ”„" if sp_date != en_date else "=" print(f" {marker} Review {i+1}:") print(f" ES: '{sp_date}'") print(f" EN: '{en_date}'") print()