Files
whyrating-engine-legacy/inspect_pane_content.py
Alejandro Gutiérrez e98da314a5 Fix: Add early no-reviews detection and hide analytics for empty jobs
Changes:
- Early detection for "no reviews" messages in 11 languages
- Checks for disabled reviews tabs and 0-review indicators
- Returns early (saves 30-40s) when no reviews exist
- Frontend hides analytics/export buttons when reviews_count = 0
- Structural pattern matching improvements (work in progress)

Known issue:
- Lithuanian hospital page has different structure (no tabs found)
- Needs separate investigation - may use different Google Maps layout

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-18 20:14:04 +00:00

158 lines
5.6 KiB
Python

#!/usr/bin/env python3
"""
Check what's actually inside the reviews pane after scrolling.
"""
import time
from seleniumbase import Driver
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine&hl=en"
driver = Driver(uc=True, headless=False)
try:
driver.get(url)
time.sleep(5)
# GDPR
try:
form_btns = driver.find_elements('css selector', 'form button')
for btn in form_btns:
if 'accept all' in (btn.text or '').lower():
btn.click()
time.sleep(2)
break
except:
pass
# Click reviews tab
time.sleep(2)
tabs = driver.find_elements('css selector', 'button[role="tab"]')
review_tab_found = False
for tab in tabs:
text = (tab.text or '').lower()
aria = (tab.get_attribute('aria-label') or '').lower()
print(f"Tab: text='{tab.text}', aria='{tab.get_attribute('aria-label')}'")
if 'review' in text or 'review' in aria:
print(f" -> Clicking this tab!")
driver.execute_script("arguments[0].click();", tab)
time.sleep(6) # Wait longer
review_tab_found = True
break
if not review_tab_found:
print("WARNING: Reviews tab not found!")
# Find and scroll the pane
print("\nLooking for scrollable pane...")
pane = None
try:
pane = driver.find_element('css selector', 'div.m6QErb.WNBkOb.XiKgde')
print(f"Found pane: div.m6QErb.WNBkOb.XiKgde")
except:
print("Pane not found with standard selector!")
try:
pane = driver.find_element('css selector', 'div.m6QErb')
print(f"Found pane: div.m6QErb")
except:
print("No pane found at all!")
if pane:
print("\nScrolling pane to load reviews...")
for i in range(15):
driver.execute_script("arguments[0].scrollBy(0, 400);", pane)
time.sleep(0.4)
if (i + 1) % 5 == 0:
print(f" Scrolled {i+1} times...")
# Now check what's in the pane
print("\n" + "="*80)
print("ANALYZING PANE CONTENT")
print("="*80)
content_info = driver.execute_script("""
const pane = document.querySelector('div.m6QErb.WNBkOb.XiKgde') || document.querySelector('div.m6QErb');
if (!pane) return {error: 'No pane found'};
// Get all child divs (direct and nested)
const allDivs = Array.from(pane.querySelectorAll('div'));
// Get all unique class names used
const classNames = new Set();
allDivs.forEach(div => {
if (div.className) {
div.className.split(' ').forEach(cls => {
if (cls.trim()) classNames.add(cls.trim());
});
}
});
// Find divs with ratings
const divsWithRatings = allDivs.filter(div => {
return !!div.querySelector('[aria-label*="star" i]');
});
// Find divs with author photos
const divsWithPhotos = allDivs.filter(div => {
return !!div.querySelector('img[src*="photo"], img[src*="avatar"]');
});
// Find divs with date patterns
const divsWithDates = allDivs.filter(div => {
return !!div.textContent.match(/\\d+\\s*(day|week|month|year|hour|minute|ago)/i);
});
// Find divs with ALL three
const reviewLikeDivs = allDivs.filter(div => {
const hasRating = !!div.querySelector('[aria-label*="star" i]');
const hasPhoto = !!div.querySelector('img');
const hasDate = !!div.textContent.match(/\\d+\\s*(day|week|month|year|hour|ago)/i);
const textLen = div.textContent.length;
return hasRating && hasPhoto && hasDate && textLen > 50 && textLen < 2000;
});
return {
total_divs: allDivs.length,
unique_classes: Array.from(classNames).sort(),
divs_with_ratings: divsWithRatings.length,
divs_with_photos: divsWithPhotos.length,
divs_with_dates: divsWithDates.length,
review_like_divs: reviewLikeDivs.length,
review_like_classes: reviewLikeDivs.slice(0, 5).map(d => ({
classes: d.className,
text_length: d.textContent.length,
sample: d.textContent.substring(0, 100)
}))
};
""")
if 'error' in content_info:
print(f"ERROR: {content_info['error']}")
else:
print(f"\nTotal divs in pane: {content_info['total_divs']}")
print(f"Divs with ratings: {content_info['divs_with_ratings']}")
print(f"Divs with photos: {content_info['divs_with_photos']}")
print(f"Divs with dates: {content_info['divs_with_dates']}")
print(f"Divs matching ALL criteria (review-like): {content_info['review_like_divs']}")
print(f"\nFirst 20 unique classes found in pane:")
for cls in content_info['unique_classes'][:20]:
print(f" {cls}")
if content_info['review_like_divs'] > 0:
print(f"\nFirst 5 review-like divs:")
for i, div_info in enumerate(content_info['review_like_classes'], 1):
print(f"\n Div {i}:")
print(f" Classes: {div_info['classes']}")
print(f" Text length: {div_info['text_length']}")
print(f" Sample: {div_info['sample'][:80]}...")
print(f"\n{'='*80}")
print("Browser staying open for manual inspection (120 seconds)...")
print("Look at the DevTools to see the actual review elements!")
print(f"{'='*80}")
time.sleep(120)
finally:
driver.quit()