Fix: Add early no-reviews detection and hide analytics for empty jobs

Changes:
- Early detection for "no reviews" messages in 11 languages
- Checks for disabled reviews tabs and 0-review indicators
- Returns early (saves 30-40s) when no reviews exist
- Frontend hides analytics/export buttons when reviews_count = 0
- Structural pattern matching improvements (work in progress)

Known issue:
- Lithuanian hospital page has different structure (no tabs found)
- Needs separate investigation - may use different Google Maps layout

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-18 20:14:04 +00:00
parent c8c24ae483
commit e98da314a5
9 changed files with 1107 additions and 0 deletions

166
brute_force_selector.py Normal file
View File

@@ -0,0 +1,166 @@
#!/usr/bin/env python3
"""
Brute force approach: Try every possible div class combination and see which gives us reviews.
"""
import time
from seleniumbase import Driver
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine&hl=en"
driver = Driver(uc=True, headless=False)
try:
driver.get(url)
time.sleep(5)
# GDPR
try:
form_btns = driver.find_elements('css selector', 'form button')
for btn in form_btns:
if 'accept all' in (btn.text or '').lower():
btn.click()
time.sleep(2)
break
except:
pass
# Click reviews tab
time.sleep(2)
tabs = driver.find_elements('css selector', 'button[role="tab"]')
for tab in tabs:
if 'review' in (tab.text or '').lower() or 'review' in (tab.get_attribute('aria-label') or '').lower():
driver.execute_script("arguments[0].click();", tab)
time.sleep(5)
break
# Scroll to load reviews
try:
pane = driver.find_element('css selector', 'div.m6QErb.WNBkOb.XiKgde')
for _ in range(10):
driver.execute_script("arguments[0].scrollBy(0, 400);", pane)
time.sleep(0.3)
except:
pass
print("\n" + "="*80)
print("BRUTE FORCE SELECTOR SEARCH")
print("="*80)
# Get ALL unique class combinations from divs inside the reviews pane
candidates = driver.execute_script("""
// Find the reviews pane
const pane = document.querySelector('div.m6QErb.WNBkOb.XiKgde');
if (!pane) return {error: 'Pane not found'};
// Get all divs inside the pane
const allDivs = Array.from(pane.querySelectorAll('div'));
// For each div, check if it looks like a review
const candidates = [];
for (let div of allDivs) {
// Skip if no classes
if (!div.className || div.className.length === 0) continue;
// Check for review indicators
const hasRating = !!div.querySelector('[aria-label*="star" i]');
const hasText = div.textContent.length > 50 && div.textContent.length < 1000; // Individual review size
const hasAuthor = !!div.querySelector('button[aria-label*="photo" i], img');
// Calculate score
let score = 0;
if (hasRating) score += 3;
if (hasText) score += 2;
if (hasAuthor) score += 1;
if (score >= 4) { // Must have rating + text at minimum
candidates.push({
classes: div.className,
selector: 'div.' + div.className.split(' ').filter(c => c).join('.'),
score: score,
text_length: div.textContent.length,
sample_text: div.textContent.substring(0, 100)
});
}
}
// Count how many elements match each selector
const selectorCounts = {};
for (let candidate of candidates) {
const count = pane.querySelectorAll(candidate.selector).length;
if (!selectorCounts[candidate.selector]) {
selectorCounts[candidate.selector] = {
count: count,
score: candidate.score,
text_length: candidate.text_length,
sample: candidate.sample_text
};
}
}
// Sort by count (we want selectors that match many reviews)
const sorted = Object.entries(selectorCounts)
.sort((a, b) => b[1].count - a[1].count)
.slice(0, 10);
return {
top_selectors: sorted.map(([selector, info]) => ({
selector: selector,
count: info.count,
score: info.score,
text_length: info.text_length,
sample: info.sample
}))
};
""")
if 'error' in candidates:
print(f"ERROR: {candidates['error']}")
else:
print(f"\nTop 10 candidate selectors (sorted by count):\n")
for i, candidate in enumerate(candidates['top_selectors'], 1):
print(f"{i}. {candidate['selector']}")
print(f" Count: {candidate['count']} | Score: {candidate['score']} | Text length: {candidate['text_length']}")
print(f" Sample: {candidate['sample'][:80]}...")
print()
# Test the top selector
if candidates['top_selectors']:
top_selector = candidates['top_selectors'][0]['selector']
print(f"\n{'='*80}")
print(f"TESTING TOP SELECTOR: {top_selector}")
print(f"{'='*80}")
test_result = driver.execute_script(f"""
const elements = document.querySelectorAll('{top_selector}');
const reviews = [];
for (let i = 0; i < Math.min(3, elements.length); i++) {{
const elem = elements[i];
const review = {{
has_author: !!elem.querySelector('button, img'),
has_rating: !!elem.querySelector('[aria-label*="star" i]'),
has_date: !!elem.textContent.match(/\\d+\\s*(day|week|month|year|ago)/i),
text_length: elem.textContent.length,
text_sample: elem.textContent.substring(0, 150)
}};
reviews.push(review);
}}
return reviews;
""")
print(f"\nFirst 3 elements using {top_selector}:")
for i, rev in enumerate(test_result, 1):
print(f"\n Element {i}:")
for key, value in rev.items():
print(f" {key}: {value}")
print(f"\n{'='*80}")
print("Browser staying open for 60 seconds...")
print(f"{'='*80}")
time.sleep(60)
finally:
driver.quit()