Files
whyrating-engine-legacy/find_actual_reviews.py
Alejandro Gutiérrez e98da314a5 Fix: Add early no-reviews detection and hide analytics for empty jobs
Changes:
- Early detection for "no reviews" messages in 11 languages
- Checks for disabled reviews tabs and 0-review indicators
- Returns early (saves 30-40s) when no reviews exist
- Frontend hides analytics/export buttons when reviews_count = 0
- Structural pattern matching improvements (work in progress)

Known issue:
- Lithuanian hospital page has different structure (no tabs found)
- Needs separate investigation - may use different Google Maps layout

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-18 20:14:04 +00:00

157 lines
5.6 KiB
Python

#!/usr/bin/env python3
"""
Find the ACTUAL selector for reviews by looking for elements with review structure.
"""
import time
from seleniumbase import Driver
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine&hl=en"
driver = Driver(uc=True, headless=False)
try:
driver.get(url)
time.sleep(5)
# GDPR
try:
form_btns = driver.find_elements('css selector', 'form button')
for btn in form_btns:
if 'accept all' in (btn.text or '').lower():
btn.click()
time.sleep(2)
break
except:
pass
# Click reviews tab
time.sleep(2)
tabs = driver.find_elements('css selector', 'button[role="tab"]')
for tab in tabs:
if 'review' in (tab.text or '').lower() or 'review' in (tab.get_attribute('aria-label') or '').lower():
driver.execute_script("arguments[0].click();", tab)
time.sleep(5)
break
# Scroll to load reviews
try:
pane = driver.find_element('css selector', 'div.m6QErb.WNBkOb.XiKgde')
for _ in range(3):
driver.execute_script("arguments[0].scrollBy(0, 500);", pane)
time.sleep(1)
except:
pass
# Use JavaScript to find ALL elements that look like reviews
print("\n" + "="*80)
print("FINDING ACTUAL REVIEW ELEMENTS BY STRUCTURE:")
print("="*80)
review_info = driver.execute_script("""
// Find all elements that have BOTH a rating AND substantial text
const allDivs = Array.from(document.querySelectorAll('div'));
const reviews = [];
for (let div of allDivs) {
// Must have a rating (star aria-label)
const ratingElem = div.querySelector('[aria-label*="star" i], [aria-label*="rating" i]');
if (!ratingElem) continue;
// Must have decent text content (>50 chars to avoid buttons)
if (div.textContent.length < 50) continue;
// Get the classes and attributes
const info = {
classes: div.className,
has_author: !!div.querySelector('button, [aria-label*="photo" i]'),
has_avatar: !!div.querySelector('img'),
has_date: !!div.textContent.match(/\\d+\\s*(day|week|month|year|ago)/i),
text_length: div.textContent.length,
sample_text: div.textContent.substring(0, 150),
tag_name: div.tagName,
jslog: div.getAttribute('jslog'),
data_review_id: div.getAttribute('data-review-id'),
jsaction: div.getAttribute('jsaction')
};
reviews.push(info);
}
return {
total_found: reviews.length,
first_5: reviews.slice(0, 5)
};
""")
print(f"\nFound {review_info['total_found']} elements with review structure")
print(f"\nFirst 5 review-like elements:")
for i, rev in enumerate(review_info['first_5'], 1):
print(f"\n Review {i}:")
print(f" Classes: {rev['classes']}")
print(f" Has author: {rev['has_author']}")
print(f" Has avatar: {rev['has_avatar']}")
print(f" Has date: {rev['has_date']}")
print(f" Text length: {rev['text_length']}")
print(f" jslog: {rev['jslog']}")
print(f" data-review-id: {rev['data_review_id']}")
print(f" Sample: {rev['sample_text'][:80]}...")
# Try to find a common class among review elements
if review_info['total_found'] > 0:
print("\n" + "="*80)
print("FINDING COMMON SELECTOR:")
print("="*80)
common_selector = driver.execute_script("""
// Find common classes among review elements
const reviews = [];
const allDivs = Array.from(document.querySelectorAll('div'));
for (let div of allDivs) {
const ratingElem = div.querySelector('[aria-label*="star" i]');
if (ratingElem && div.textContent.length > 50) {
reviews.push(div);
}
}
if (reviews.length === 0) return null;
// Get classes from first review
const firstClasses = reviews[0].className.split(' ').filter(c => c.length > 0);
// Find classes that appear in ALL reviews
const commonClasses = firstClasses.filter(cls => {
return reviews.every(rev => rev.classList.contains(cls));
});
return {
total_reviews: reviews.length,
common_classes: commonClasses,
suggested_selector: commonClasses.length > 0 ? 'div.' + commonClasses.join('.') : null,
first_review_classes: reviews[0].className
};
""")
if common_selector:
print(f"Total review elements: {common_selector['total_reviews']}")
print(f"Common classes: {common_selector['common_classes']}")
print(f"Suggested selector: {common_selector['suggested_selector']}")
print(f"First review full classes: {common_selector['first_review_classes']}")
# Test the suggested selector
if common_selector['suggested_selector']:
test_count = driver.execute_script(
f"return document.querySelectorAll('{common_selector['suggested_selector']}').length;"
)
print(f"\nTesting suggested selector: Found {test_count} elements")
print("\n" + "="*80)
print("Browser staying open for manual inspection (60s)...")
print("="*80)
time.sleep(60)
finally:
driver.quit()