Fix: Add early no-reviews detection and hide analytics for empty jobs

Changes:
- Early detection for "no reviews" messages in 11 languages
- Checks for disabled reviews tabs and 0-review indicators
- Returns early (saves 30-40s) when no reviews exist
- Frontend hides analytics/export buttons when reviews_count = 0
- Structural pattern matching improvements (work in progress)

Known issue:
- Lithuanian hospital page has different structure (no tabs found)
- Needs separate investigation - may use different Google Maps layout

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-18 20:14:04 +00:00
parent c8c24ae483
commit e98da314a5
9 changed files with 1107 additions and 0 deletions

106
check_page_structure.py Normal file
View File

@@ -0,0 +1,106 @@
#!/usr/bin/env python3
"""
Check the actual page structure - maybe reviews are already visible without clicking a tab!
"""
import time
from seleniumbase import Driver
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine"
driver = Driver(uc=True, headless=False)
try:
driver.get(url)
print(f"Initial URL: {url}")
time.sleep(5)
# GDPR
try:
form_btns = driver.find_elements('css selector', 'form button')
for btn in form_btns:
if 'accept' in (btn.text or '').lower():
btn.click()
time.sleep(2)
break
except:
pass
# Check final URL
final_url = driver.current_url
print(f"Final URL after redirect: {final_url}")
# Wait a bit more for dynamic content
time.sleep(3)
# Check page structure
print("\n" + "="*80)
print("PAGE STRUCTURE ANALYSIS")
print("="*80)
page_info = driver.execute_script("""
return {
tabs_found: document.querySelectorAll('button[role="tab"]').length,
reviews_with_standard_selector: document.querySelectorAll('div.jftiEf.fontBodyMedium').length,
reviews_with_jftiEf: document.querySelectorAll('div.jftiEf').length,
divs_with_ratings: document.querySelectorAll('[aria-label*="star" i]').length,
review_containers: document.querySelectorAll('div.fontBodyMedium').length,
page_text_sample: document.body.innerText.substring(0, 500),
has_review_text: document.body.innerText.toLowerCase().includes('review'),
has_atsiliepimai_text: document.body.innerText.toLowerCase().includes('atsiliepimai')
};
""")
print(f"\nTabs with role='tab': {page_info['tabs_found']}")
print(f"div.jftiEf.fontBodyMedium: {page_info['reviews_with_standard_selector']}")
print(f"div.jftiEf: {page_info['reviews_with_jftiEf']}")
print(f"Elements with star ratings: {page_info['divs_with_ratings']}")
print(f"div.fontBodyMedium: {page_info['review_containers']}")
print(f"Contains 'review': {page_info['has_review_text']}")
print(f"Contains 'atsiliepimai' (Lithuanian): {page_info['has_atsiliepimai_text']}")
print(f"\nPage text sample (first 500 chars):")
print(page_info['page_text_sample'])
# Try to find ANY element with rating
print("\n" + "="*80)
print("SEARCHING FOR RATING ELEMENTS")
print("="*80)
rating_search = driver.execute_script("""
const elements = Array.from(document.querySelectorAll('*'));
const withRatings = [];
for (let elem of elements) {
const ariaLabel = elem.getAttribute('aria-label') || '';
if (ariaLabel.toLowerCase().includes('star') || ariaLabel.toLowerCase().includes('žvaigžd')) {
withRatings.push({
tag: elem.tagName,
ariaLabel: ariaLabel.substring(0, 100),
classes: elem.className.substring(0, 100),
parentTag: elem.parentElement ? elem.parentElement.tagName : null,
parentClasses: elem.parentElement ? elem.parentElement.className.substring(0, 100) : null
});
}
}
return withRatings.slice(0, 10); // First 10
""")
print(f"\nFound {len(rating_search)} elements with 'star' in aria-label:")
for i, elem in enumerate(rating_search[:5], 1):
print(f"\n Element {i}:")
print(f" Tag: {elem['tag']}")
print(f" Aria-label: {elem['ariaLabel']}")
print(f" Classes: {elem['classes']}")
print(f" Parent tag: {elem['parentTag']}")
print(f" Parent classes: {elem['parentClasses']}")
print(f"\n{'='*80}")
print("Browser open for manual inspection...")
print("LOOK AT THE PAGE - Are reviews visible? What's their structure?")
print(f"{'='*80}")
time.sleep(180) # 3 minutes
finally:
driver.quit()