Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
157 lines
5.6 KiB
Python
157 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Find the ACTUAL selector for reviews by looking for elements with review structure.
|
|
"""
|
|
|
|
import time
|
|
from seleniumbase import Driver
|
|
|
|
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine&hl=en"
|
|
|
|
driver = Driver(uc=True, headless=False)
|
|
|
|
try:
|
|
driver.get(url)
|
|
time.sleep(5)
|
|
|
|
# GDPR
|
|
try:
|
|
form_btns = driver.find_elements('css selector', 'form button')
|
|
for btn in form_btns:
|
|
if 'accept all' in (btn.text or '').lower():
|
|
btn.click()
|
|
time.sleep(2)
|
|
break
|
|
except:
|
|
pass
|
|
|
|
# Click reviews tab
|
|
time.sleep(2)
|
|
tabs = driver.find_elements('css selector', 'button[role="tab"]')
|
|
for tab in tabs:
|
|
if 'review' in (tab.text or '').lower() or 'review' in (tab.get_attribute('aria-label') or '').lower():
|
|
driver.execute_script("arguments[0].click();", tab)
|
|
time.sleep(5)
|
|
break
|
|
|
|
# Scroll to load reviews
|
|
try:
|
|
pane = driver.find_element('css selector', 'div.m6QErb.WNBkOb.XiKgde')
|
|
for _ in range(3):
|
|
driver.execute_script("arguments[0].scrollBy(0, 500);", pane)
|
|
time.sleep(1)
|
|
except:
|
|
pass
|
|
|
|
# Use JavaScript to find ALL elements that look like reviews
|
|
print("\n" + "="*80)
|
|
print("FINDING ACTUAL REVIEW ELEMENTS BY STRUCTURE:")
|
|
print("="*80)
|
|
|
|
review_info = driver.execute_script("""
|
|
// Find all elements that have BOTH a rating AND substantial text
|
|
const allDivs = Array.from(document.querySelectorAll('div'));
|
|
|
|
const reviews = [];
|
|
|
|
for (let div of allDivs) {
|
|
// Must have a rating (star aria-label)
|
|
const ratingElem = div.querySelector('[aria-label*="star" i], [aria-label*="rating" i]');
|
|
if (!ratingElem) continue;
|
|
|
|
// Must have decent text content (>50 chars to avoid buttons)
|
|
if (div.textContent.length < 50) continue;
|
|
|
|
// Get the classes and attributes
|
|
const info = {
|
|
classes: div.className,
|
|
has_author: !!div.querySelector('button, [aria-label*="photo" i]'),
|
|
has_avatar: !!div.querySelector('img'),
|
|
has_date: !!div.textContent.match(/\\d+\\s*(day|week|month|year|ago)/i),
|
|
text_length: div.textContent.length,
|
|
sample_text: div.textContent.substring(0, 150),
|
|
tag_name: div.tagName,
|
|
jslog: div.getAttribute('jslog'),
|
|
data_review_id: div.getAttribute('data-review-id'),
|
|
jsaction: div.getAttribute('jsaction')
|
|
};
|
|
|
|
reviews.push(info);
|
|
}
|
|
|
|
return {
|
|
total_found: reviews.length,
|
|
first_5: reviews.slice(0, 5)
|
|
};
|
|
""")
|
|
|
|
print(f"\nFound {review_info['total_found']} elements with review structure")
|
|
print(f"\nFirst 5 review-like elements:")
|
|
for i, rev in enumerate(review_info['first_5'], 1):
|
|
print(f"\n Review {i}:")
|
|
print(f" Classes: {rev['classes']}")
|
|
print(f" Has author: {rev['has_author']}")
|
|
print(f" Has avatar: {rev['has_avatar']}")
|
|
print(f" Has date: {rev['has_date']}")
|
|
print(f" Text length: {rev['text_length']}")
|
|
print(f" jslog: {rev['jslog']}")
|
|
print(f" data-review-id: {rev['data_review_id']}")
|
|
print(f" Sample: {rev['sample_text'][:80]}...")
|
|
|
|
# Try to find a common class among review elements
|
|
if review_info['total_found'] > 0:
|
|
print("\n" + "="*80)
|
|
print("FINDING COMMON SELECTOR:")
|
|
print("="*80)
|
|
|
|
common_selector = driver.execute_script("""
|
|
// Find common classes among review elements
|
|
const reviews = [];
|
|
const allDivs = Array.from(document.querySelectorAll('div'));
|
|
|
|
for (let div of allDivs) {
|
|
const ratingElem = div.querySelector('[aria-label*="star" i]');
|
|
if (ratingElem && div.textContent.length > 50) {
|
|
reviews.push(div);
|
|
}
|
|
}
|
|
|
|
if (reviews.length === 0) return null;
|
|
|
|
// Get classes from first review
|
|
const firstClasses = reviews[0].className.split(' ').filter(c => c.length > 0);
|
|
|
|
// Find classes that appear in ALL reviews
|
|
const commonClasses = firstClasses.filter(cls => {
|
|
return reviews.every(rev => rev.classList.contains(cls));
|
|
});
|
|
|
|
return {
|
|
total_reviews: reviews.length,
|
|
common_classes: commonClasses,
|
|
suggested_selector: commonClasses.length > 0 ? 'div.' + commonClasses.join('.') : null,
|
|
first_review_classes: reviews[0].className
|
|
};
|
|
""")
|
|
|
|
if common_selector:
|
|
print(f"Total review elements: {common_selector['total_reviews']}")
|
|
print(f"Common classes: {common_selector['common_classes']}")
|
|
print(f"Suggested selector: {common_selector['suggested_selector']}")
|
|
print(f"First review full classes: {common_selector['first_review_classes']}")
|
|
|
|
# Test the suggested selector
|
|
if common_selector['suggested_selector']:
|
|
test_count = driver.execute_script(
|
|
f"return document.querySelectorAll('{common_selector['suggested_selector']}').length;"
|
|
)
|
|
print(f"\nTesting suggested selector: Found {test_count} elements")
|
|
|
|
print("\n" + "="*80)
|
|
print("Browser staying open for manual inspection (60s)...")
|
|
print("="*80)
|
|
time.sleep(60)
|
|
|
|
finally:
|
|
driver.quit()
|