Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
158 lines
5.6 KiB
Python
158 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Check what's actually inside the reviews pane after scrolling.
|
|
"""
|
|
|
|
import time
|
|
from seleniumbase import Driver
|
|
|
|
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine&hl=en"
|
|
|
|
driver = Driver(uc=True, headless=False)
|
|
|
|
try:
|
|
driver.get(url)
|
|
time.sleep(5)
|
|
|
|
# GDPR
|
|
try:
|
|
form_btns = driver.find_elements('css selector', 'form button')
|
|
for btn in form_btns:
|
|
if 'accept all' in (btn.text or '').lower():
|
|
btn.click()
|
|
time.sleep(2)
|
|
break
|
|
except:
|
|
pass
|
|
|
|
# Click reviews tab
|
|
time.sleep(2)
|
|
tabs = driver.find_elements('css selector', 'button[role="tab"]')
|
|
review_tab_found = False
|
|
for tab in tabs:
|
|
text = (tab.text or '').lower()
|
|
aria = (tab.get_attribute('aria-label') or '').lower()
|
|
print(f"Tab: text='{tab.text}', aria='{tab.get_attribute('aria-label')}'")
|
|
if 'review' in text or 'review' in aria:
|
|
print(f" -> Clicking this tab!")
|
|
driver.execute_script("arguments[0].click();", tab)
|
|
time.sleep(6) # Wait longer
|
|
review_tab_found = True
|
|
break
|
|
|
|
if not review_tab_found:
|
|
print("WARNING: Reviews tab not found!")
|
|
|
|
# Find and scroll the pane
|
|
print("\nLooking for scrollable pane...")
|
|
pane = None
|
|
try:
|
|
pane = driver.find_element('css selector', 'div.m6QErb.WNBkOb.XiKgde')
|
|
print(f"Found pane: div.m6QErb.WNBkOb.XiKgde")
|
|
except:
|
|
print("Pane not found with standard selector!")
|
|
try:
|
|
pane = driver.find_element('css selector', 'div.m6QErb')
|
|
print(f"Found pane: div.m6QErb")
|
|
except:
|
|
print("No pane found at all!")
|
|
|
|
if pane:
|
|
print("\nScrolling pane to load reviews...")
|
|
for i in range(15):
|
|
driver.execute_script("arguments[0].scrollBy(0, 400);", pane)
|
|
time.sleep(0.4)
|
|
if (i + 1) % 5 == 0:
|
|
print(f" Scrolled {i+1} times...")
|
|
|
|
# Now check what's in the pane
|
|
print("\n" + "="*80)
|
|
print("ANALYZING PANE CONTENT")
|
|
print("="*80)
|
|
|
|
content_info = driver.execute_script("""
|
|
const pane = document.querySelector('div.m6QErb.WNBkOb.XiKgde') || document.querySelector('div.m6QErb');
|
|
if (!pane) return {error: 'No pane found'};
|
|
|
|
// Get all child divs (direct and nested)
|
|
const allDivs = Array.from(pane.querySelectorAll('div'));
|
|
|
|
// Get all unique class names used
|
|
const classNames = new Set();
|
|
allDivs.forEach(div => {
|
|
if (div.className) {
|
|
div.className.split(' ').forEach(cls => {
|
|
if (cls.trim()) classNames.add(cls.trim());
|
|
});
|
|
}
|
|
});
|
|
|
|
// Find divs with ratings
|
|
const divsWithRatings = allDivs.filter(div => {
|
|
return !!div.querySelector('[aria-label*="star" i]');
|
|
});
|
|
|
|
// Find divs with author photos
|
|
const divsWithPhotos = allDivs.filter(div => {
|
|
return !!div.querySelector('img[src*="photo"], img[src*="avatar"]');
|
|
});
|
|
|
|
// Find divs with date patterns
|
|
const divsWithDates = allDivs.filter(div => {
|
|
return !!div.textContent.match(/\\d+\\s*(day|week|month|year|hour|minute|ago)/i);
|
|
});
|
|
|
|
// Find divs with ALL three
|
|
const reviewLikeDivs = allDivs.filter(div => {
|
|
const hasRating = !!div.querySelector('[aria-label*="star" i]');
|
|
const hasPhoto = !!div.querySelector('img');
|
|
const hasDate = !!div.textContent.match(/\\d+\\s*(day|week|month|year|hour|ago)/i);
|
|
const textLen = div.textContent.length;
|
|
return hasRating && hasPhoto && hasDate && textLen > 50 && textLen < 2000;
|
|
});
|
|
|
|
return {
|
|
total_divs: allDivs.length,
|
|
unique_classes: Array.from(classNames).sort(),
|
|
divs_with_ratings: divsWithRatings.length,
|
|
divs_with_photos: divsWithPhotos.length,
|
|
divs_with_dates: divsWithDates.length,
|
|
review_like_divs: reviewLikeDivs.length,
|
|
review_like_classes: reviewLikeDivs.slice(0, 5).map(d => ({
|
|
classes: d.className,
|
|
text_length: d.textContent.length,
|
|
sample: d.textContent.substring(0, 100)
|
|
}))
|
|
};
|
|
""")
|
|
|
|
if 'error' in content_info:
|
|
print(f"ERROR: {content_info['error']}")
|
|
else:
|
|
print(f"\nTotal divs in pane: {content_info['total_divs']}")
|
|
print(f"Divs with ratings: {content_info['divs_with_ratings']}")
|
|
print(f"Divs with photos: {content_info['divs_with_photos']}")
|
|
print(f"Divs with dates: {content_info['divs_with_dates']}")
|
|
print(f"Divs matching ALL criteria (review-like): {content_info['review_like_divs']}")
|
|
|
|
print(f"\nFirst 20 unique classes found in pane:")
|
|
for cls in content_info['unique_classes'][:20]:
|
|
print(f" {cls}")
|
|
|
|
if content_info['review_like_divs'] > 0:
|
|
print(f"\nFirst 5 review-like divs:")
|
|
for i, div_info in enumerate(content_info['review_like_classes'], 1):
|
|
print(f"\n Div {i}:")
|
|
print(f" Classes: {div_info['classes']}")
|
|
print(f" Text length: {div_info['text_length']}")
|
|
print(f" Sample: {div_info['sample'][:80]}...")
|
|
|
|
print(f"\n{'='*80}")
|
|
print("Browser staying open for manual inspection (120 seconds)...")
|
|
print("Look at the DevTools to see the actual review elements!")
|
|
print(f"{'='*80}")
|
|
time.sleep(120)
|
|
|
|
finally:
|
|
driver.quit()
|