Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
127 lines
4.1 KiB
Python
127 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Diagnostic script to find the correct selector for Lithuanian hospital reviews.
|
|
Opens the browser and pauses so we can inspect the page manually.
|
|
"""
|
|
|
|
import time
|
|
from seleniumbase import Driver
|
|
|
|
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine"
|
|
|
|
print("Opening browser...")
|
|
driver = Driver(uc=True, headless=False)
|
|
|
|
try:
|
|
# Add English locale for consistency
|
|
if '?' in url:
|
|
url += '&hl=en'
|
|
else:
|
|
url += '?hl=en'
|
|
|
|
driver.get(url)
|
|
print(f"Loaded: {url}")
|
|
|
|
# Wait for page to load
|
|
time.sleep(5)
|
|
|
|
# Handle GDPR
|
|
try:
|
|
form_btns = driver.find_elements('css selector', 'form button')
|
|
for btn in form_btns:
|
|
btn_text = (btn.text or '').lower()
|
|
if 'accept all' in btn_text or 'aceptar todo' in btn_text:
|
|
print(f"Clicking GDPR consent: {btn.text}")
|
|
btn.click()
|
|
time.sleep(2)
|
|
break
|
|
except:
|
|
pass
|
|
|
|
# Click reviews tab
|
|
time.sleep(2)
|
|
tabs = driver.find_elements('css selector', 'button[role="tab"]')
|
|
for tab in tabs:
|
|
text = (tab.text or '').lower()
|
|
aria = (tab.get_attribute('aria-label') or '').lower()
|
|
if 'review' in text or 'review' in aria:
|
|
print(f"Clicking reviews tab: {tab.text or aria[:30]}")
|
|
driver.execute_script("arguments[0].click();", tab)
|
|
time.sleep(3)
|
|
break
|
|
|
|
# Try different selectors and show what we find
|
|
selectors_to_try = [
|
|
('div.jftiEf.fontBodyMedium', 'Known selector 1'),
|
|
('div.jftiEf', 'Known selector 2'),
|
|
('div[data-review-id]', 'Known selector 3'),
|
|
('div[jsaction*="review"]', 'jsaction with review'),
|
|
('[role="article"]', 'role=article'),
|
|
('div[data-review-id]', 'data-review-id attribute'),
|
|
('div.fontBodyMedium', 'Just fontBodyMedium class'),
|
|
('div[class*="review"]', 'Class containing "review"'),
|
|
]
|
|
|
|
print("\n" + "="*80)
|
|
print("TESTING SELECTORS:")
|
|
print("="*80)
|
|
|
|
for selector, description in selectors_to_try:
|
|
count = driver.execute_script(
|
|
f"return document.querySelectorAll('{selector}').length;"
|
|
)
|
|
print(f"{description:30} | {selector:40} | Found: {count}")
|
|
|
|
# Show sample HTML of first few elements matching the most promising selector
|
|
print("\n" + "="*80)
|
|
print("SAMPLE HTML FROM FIRST MATCH:")
|
|
print("="*80)
|
|
|
|
sample_html = driver.execute_script("""
|
|
const selectors = [
|
|
'div.jftiEf.fontBodyMedium',
|
|
'div.jftiEf',
|
|
'[role="article"]',
|
|
'div[jsaction*="review"]'
|
|
];
|
|
|
|
for (let selector of selectors) {
|
|
const elements = document.querySelectorAll(selector);
|
|
if (elements.length > 0) {
|
|
const first = elements[0];
|
|
return {
|
|
selector: selector,
|
|
count: elements.length,
|
|
outerHTML: first.outerHTML.substring(0, 500),
|
|
classes: first.className,
|
|
hasRating: !!first.querySelector('[aria-label*="star" i]'),
|
|
hasAuthor: !!first.querySelector('img'),
|
|
textLength: first.textContent.length
|
|
};
|
|
}
|
|
}
|
|
return null;
|
|
""")
|
|
|
|
if sample_html:
|
|
print(f"Selector: {sample_html['selector']}")
|
|
print(f"Total found: {sample_html['count']}")
|
|
print(f"Classes: {sample_html['classes']}")
|
|
print(f"Has rating: {sample_html['hasRating']}")
|
|
print(f"Has author img: {sample_html['hasAuthor']}")
|
|
print(f"Text length: {sample_html['textLength']}")
|
|
print(f"\nSample HTML (first 500 chars):")
|
|
print(sample_html['outerHTML'])
|
|
|
|
print("\n" + "="*80)
|
|
print("Browser will stay open for 60 seconds so you can inspect manually...")
|
|
print("Use DevTools to find the correct selector!")
|
|
print("="*80)
|
|
|
|
# Keep browser open for inspection
|
|
time.sleep(60)
|
|
|
|
finally:
|
|
driver.quit()
|
|
print("\nBrowser closed.")
|