Files
whyrating-engine-legacy/diagnose_selectors.py
Alejandro Gutiérrez e98da314a5 Fix: Add early no-reviews detection and hide analytics for empty jobs
Changes:
- Early detection for "no reviews" messages in 11 languages
- Checks for disabled reviews tabs and 0-review indicators
- Returns early (saves 30-40s) when no reviews exist
- Frontend hides analytics/export buttons when reviews_count = 0
- Structural pattern matching improvements (work in progress)

Known issue:
- Lithuanian hospital page has different structure (no tabs found)
- Needs separate investigation - may use different Google Maps layout

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-18 20:14:04 +00:00

127 lines
4.1 KiB
Python

#!/usr/bin/env python3
"""
Diagnostic script to find the correct selector for Lithuanian hospital reviews.
Opens the browser and pauses so we can inspect the page manually.
"""
import time
from seleniumbase import Driver
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine"
print("Opening browser...")
driver = Driver(uc=True, headless=False)
try:
# Add English locale for consistency
if '?' in url:
url += '&hl=en'
else:
url += '?hl=en'
driver.get(url)
print(f"Loaded: {url}")
# Wait for page to load
time.sleep(5)
# Handle GDPR
try:
form_btns = driver.find_elements('css selector', 'form button')
for btn in form_btns:
btn_text = (btn.text or '').lower()
if 'accept all' in btn_text or 'aceptar todo' in btn_text:
print(f"Clicking GDPR consent: {btn.text}")
btn.click()
time.sleep(2)
break
except:
pass
# Click reviews tab
time.sleep(2)
tabs = driver.find_elements('css selector', 'button[role="tab"]')
for tab in tabs:
text = (tab.text or '').lower()
aria = (tab.get_attribute('aria-label') or '').lower()
if 'review' in text or 'review' in aria:
print(f"Clicking reviews tab: {tab.text or aria[:30]}")
driver.execute_script("arguments[0].click();", tab)
time.sleep(3)
break
# Try different selectors and show what we find
selectors_to_try = [
('div.jftiEf.fontBodyMedium', 'Known selector 1'),
('div.jftiEf', 'Known selector 2'),
('div[data-review-id]', 'Known selector 3'),
('div[jsaction*="review"]', 'jsaction with review'),
('[role="article"]', 'role=article'),
('div[data-review-id]', 'data-review-id attribute'),
('div.fontBodyMedium', 'Just fontBodyMedium class'),
('div[class*="review"]', 'Class containing "review"'),
]
print("\n" + "="*80)
print("TESTING SELECTORS:")
print("="*80)
for selector, description in selectors_to_try:
count = driver.execute_script(
f"return document.querySelectorAll('{selector}').length;"
)
print(f"{description:30} | {selector:40} | Found: {count}")
# Show sample HTML of first few elements matching the most promising selector
print("\n" + "="*80)
print("SAMPLE HTML FROM FIRST MATCH:")
print("="*80)
sample_html = driver.execute_script("""
const selectors = [
'div.jftiEf.fontBodyMedium',
'div.jftiEf',
'[role="article"]',
'div[jsaction*="review"]'
];
for (let selector of selectors) {
const elements = document.querySelectorAll(selector);
if (elements.length > 0) {
const first = elements[0];
return {
selector: selector,
count: elements.length,
outerHTML: first.outerHTML.substring(0, 500),
classes: first.className,
hasRating: !!first.querySelector('[aria-label*="star" i]'),
hasAuthor: !!first.querySelector('img'),
textLength: first.textContent.length
};
}
}
return null;
""")
if sample_html:
print(f"Selector: {sample_html['selector']}")
print(f"Total found: {sample_html['count']}")
print(f"Classes: {sample_html['classes']}")
print(f"Has rating: {sample_html['hasRating']}")
print(f"Has author img: {sample_html['hasAuthor']}")
print(f"Text length: {sample_html['textLength']}")
print(f"\nSample HTML (first 500 chars):")
print(sample_html['outerHTML'])
print("\n" + "="*80)
print("Browser will stay open for 60 seconds so you can inspect manually...")
print("Use DevTools to find the correct selector!")
print("="*80)
# Keep browser open for inspection
time.sleep(60)
finally:
driver.quit()
print("\nBrowser closed.")