Fix: Add early no-reviews detection and hide analytics for empty jobs
Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
126
diagnose_selectors.py
Normal file
126
diagnose_selectors.py
Normal file
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Diagnostic script to find the correct selector for Lithuanian hospital reviews.
|
||||
Opens the browser and pauses so we can inspect the page manually.
|
||||
"""
|
||||
|
||||
import time
|
||||
from seleniumbase import Driver
|
||||
|
||||
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine"
|
||||
|
||||
print("Opening browser...")
|
||||
driver = Driver(uc=True, headless=False)
|
||||
|
||||
try:
|
||||
# Add English locale for consistency
|
||||
if '?' in url:
|
||||
url += '&hl=en'
|
||||
else:
|
||||
url += '?hl=en'
|
||||
|
||||
driver.get(url)
|
||||
print(f"Loaded: {url}")
|
||||
|
||||
# Wait for page to load
|
||||
time.sleep(5)
|
||||
|
||||
# Handle GDPR
|
||||
try:
|
||||
form_btns = driver.find_elements('css selector', 'form button')
|
||||
for btn in form_btns:
|
||||
btn_text = (btn.text or '').lower()
|
||||
if 'accept all' in btn_text or 'aceptar todo' in btn_text:
|
||||
print(f"Clicking GDPR consent: {btn.text}")
|
||||
btn.click()
|
||||
time.sleep(2)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
# Click reviews tab
|
||||
time.sleep(2)
|
||||
tabs = driver.find_elements('css selector', 'button[role="tab"]')
|
||||
for tab in tabs:
|
||||
text = (tab.text or '').lower()
|
||||
aria = (tab.get_attribute('aria-label') or '').lower()
|
||||
if 'review' in text or 'review' in aria:
|
||||
print(f"Clicking reviews tab: {tab.text or aria[:30]}")
|
||||
driver.execute_script("arguments[0].click();", tab)
|
||||
time.sleep(3)
|
||||
break
|
||||
|
||||
# Try different selectors and show what we find
|
||||
selectors_to_try = [
|
||||
('div.jftiEf.fontBodyMedium', 'Known selector 1'),
|
||||
('div.jftiEf', 'Known selector 2'),
|
||||
('div[data-review-id]', 'Known selector 3'),
|
||||
('div[jsaction*="review"]', 'jsaction with review'),
|
||||
('[role="article"]', 'role=article'),
|
||||
('div[data-review-id]', 'data-review-id attribute'),
|
||||
('div.fontBodyMedium', 'Just fontBodyMedium class'),
|
||||
('div[class*="review"]', 'Class containing "review"'),
|
||||
]
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("TESTING SELECTORS:")
|
||||
print("="*80)
|
||||
|
||||
for selector, description in selectors_to_try:
|
||||
count = driver.execute_script(
|
||||
f"return document.querySelectorAll('{selector}').length;"
|
||||
)
|
||||
print(f"{description:30} | {selector:40} | Found: {count}")
|
||||
|
||||
# Show sample HTML of first few elements matching the most promising selector
|
||||
print("\n" + "="*80)
|
||||
print("SAMPLE HTML FROM FIRST MATCH:")
|
||||
print("="*80)
|
||||
|
||||
sample_html = driver.execute_script("""
|
||||
const selectors = [
|
||||
'div.jftiEf.fontBodyMedium',
|
||||
'div.jftiEf',
|
||||
'[role="article"]',
|
||||
'div[jsaction*="review"]'
|
||||
];
|
||||
|
||||
for (let selector of selectors) {
|
||||
const elements = document.querySelectorAll(selector);
|
||||
if (elements.length > 0) {
|
||||
const first = elements[0];
|
||||
return {
|
||||
selector: selector,
|
||||
count: elements.length,
|
||||
outerHTML: first.outerHTML.substring(0, 500),
|
||||
classes: first.className,
|
||||
hasRating: !!first.querySelector('[aria-label*="star" i]'),
|
||||
hasAuthor: !!first.querySelector('img'),
|
||||
textLength: first.textContent.length
|
||||
};
|
||||
}
|
||||
}
|
||||
return null;
|
||||
""")
|
||||
|
||||
if sample_html:
|
||||
print(f"Selector: {sample_html['selector']}")
|
||||
print(f"Total found: {sample_html['count']}")
|
||||
print(f"Classes: {sample_html['classes']}")
|
||||
print(f"Has rating: {sample_html['hasRating']}")
|
||||
print(f"Has author img: {sample_html['hasAuthor']}")
|
||||
print(f"Text length: {sample_html['textLength']}")
|
||||
print(f"\nSample HTML (first 500 chars):")
|
||||
print(sample_html['outerHTML'])
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("Browser will stay open for 60 seconds so you can inspect manually...")
|
||||
print("Use DevTools to find the correct selector!")
|
||||
print("="*80)
|
||||
|
||||
# Keep browser open for inspection
|
||||
time.sleep(60)
|
||||
|
||||
finally:
|
||||
driver.quit()
|
||||
print("\nBrowser closed.")
|
||||
Reference in New Issue
Block a user