Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
164 lines
5.7 KiB
Python
164 lines
5.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Better diagnostic: Actually wait for reviews panel to load and find correct selector.
|
|
"""
|
|
|
|
import time
|
|
from seleniumbase import Driver
|
|
|
|
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine"
|
|
|
|
print("Opening browser...")
|
|
driver = Driver(uc=True, headless=False)
|
|
|
|
try:
|
|
# Add English locale
|
|
if '?' in url:
|
|
url += '&hl=en'
|
|
else:
|
|
url += '?hl=en'
|
|
|
|
driver.get(url)
|
|
print(f"Loaded: {url}")
|
|
time.sleep(5)
|
|
|
|
# Handle GDPR
|
|
try:
|
|
form_btns = driver.find_elements('css selector', 'form button')
|
|
for btn in form_btns:
|
|
btn_text = (btn.text or '').lower()
|
|
if 'accept all' in btn_text:
|
|
print(f"Clicking GDPR: {btn.text}")
|
|
btn.click()
|
|
time.sleep(2)
|
|
break
|
|
except:
|
|
pass
|
|
|
|
# Click reviews tab and WAIT for panel to load
|
|
print("\nClicking reviews tab...")
|
|
time.sleep(2)
|
|
tabs = driver.find_elements('css selector', 'button[role="tab"]')
|
|
for tab in tabs:
|
|
text = (tab.text or '').lower()
|
|
aria = (tab.get_attribute('aria-label') or '').lower()
|
|
if 'review' in text or 'review' in aria:
|
|
print(f"Found reviews tab: {tab.text or aria[:50]}")
|
|
driver.execute_script("arguments[0].click();", tab)
|
|
print("Clicked! Waiting for reviews panel to load...")
|
|
time.sleep(5) # Wait longer for reviews to actually load
|
|
break
|
|
|
|
# Try scrolling the reviews pane to load more
|
|
print("\nTrying to find and scroll reviews pane...")
|
|
pane_selectors = [
|
|
'div.m6QErb.WNBkOb.XiKgde',
|
|
'div.m6QErb',
|
|
'div[role="main"]'
|
|
]
|
|
|
|
for selector in pane_selectors:
|
|
try:
|
|
pane = driver.find_element('css selector', selector)
|
|
print(f"Found pane: {selector}")
|
|
driver.execute_script("arguments[0].scrollBy(0, 500);", pane)
|
|
time.sleep(2)
|
|
driver.execute_script("arguments[0].scrollBy(0, 500);", pane)
|
|
time.sleep(2)
|
|
break
|
|
except:
|
|
continue
|
|
|
|
# NOW check for review selectors
|
|
print("\n" + "="*80)
|
|
print("CHECKING REVIEW SELECTORS AFTER PANEL LOADED:")
|
|
print("="*80)
|
|
|
|
selectors_to_try = [
|
|
('div.jftiEf.fontBodyMedium', 'Standard Google Maps reviews'),
|
|
('div.jftiEf', 'Just jftiEf class'),
|
|
('div.fontBodyMedium', 'Just fontBodyMedium'),
|
|
('div[data-review-id]', 'data-review-id attribute'),
|
|
('div[jsaction*="review"]', 'jsaction with review'),
|
|
('[data-review]', 'data-review attribute'),
|
|
('div[class*="review" i]', 'Class containing review'),
|
|
('[role="article"]', 'role=article'),
|
|
('div[jslog]', 'Elements with jslog (Google tracking)'),
|
|
]
|
|
|
|
for selector, description in selectors_to_try:
|
|
count = driver.execute_script(
|
|
f"return document.querySelectorAll('{selector}').length;"
|
|
)
|
|
print(f"{description:35} | {selector:40} | Found: {count}")
|
|
|
|
# Get detailed info about most promising selector
|
|
print("\n" + "="*80)
|
|
print("ANALYZING MOST PROMISING SELECTOR:")
|
|
print("="*80)
|
|
|
|
analysis = driver.execute_script("""
|
|
// Try selectors in order of likelihood
|
|
const selectors = [
|
|
'div.jftiEf.fontBodyMedium',
|
|
'div.jftiEf',
|
|
'div.fontBodyMedium',
|
|
'div[jslog*="impression"]',
|
|
'[role="article"]'
|
|
];
|
|
|
|
for (let selector of selectors) {
|
|
const elements = document.querySelectorAll(selector);
|
|
if (elements.length > 5) { // Need at least a few to be reviews
|
|
// Analyze first element
|
|
const first = elements[0];
|
|
const analysis = {
|
|
selector: selector,
|
|
total_found: elements.length,
|
|
first_element: {
|
|
tag: first.tagName,
|
|
classes: first.className,
|
|
has_rating: !!first.querySelector('[aria-label*="star" i]'),
|
|
has_author: !!first.querySelector('button, a, div[aria-label]'),
|
|
has_avatar: !!first.querySelector('img'),
|
|
has_date: !!first.textContent.match(/\\d+\\s*(day|week|month|year|hour|minute)/i),
|
|
text_length: first.textContent.length,
|
|
sample_text: first.textContent.substring(0, 100)
|
|
}
|
|
};
|
|
|
|
// Check if multiple elements have review characteristics
|
|
let reviewLikeCount = 0;
|
|
for (let i = 0; i < Math.min(10, elements.length); i++) {
|
|
const elem = elements[i];
|
|
const hasRating = !!elem.querySelector('[aria-label*="star" i]');
|
|
const hasText = elem.textContent.length > 30;
|
|
if (hasRating && hasText) reviewLikeCount++;
|
|
}
|
|
analysis.review_like_count_in_first_10 = reviewLikeCount;
|
|
|
|
return analysis;
|
|
}
|
|
}
|
|
|
|
return {error: 'No selector found with >5 elements'};
|
|
""")
|
|
|
|
if 'error' in analysis:
|
|
print(f"ERROR: {analysis['error']}")
|
|
else:
|
|
print(f"Best selector: {analysis['selector']}")
|
|
print(f"Total found: {analysis['total_found']}")
|
|
print(f"Review-like in first 10: {analysis['review_like_count_in_first_10']}")
|
|
print(f"\nFirst element analysis:")
|
|
for key, value in analysis['first_element'].items():
|
|
print(f" {key}: {value}")
|
|
|
|
print("\n" + "="*80)
|
|
print("Keeping browser open for 120 seconds for manual inspection...")
|
|
print("="*80)
|
|
time.sleep(120)
|
|
|
|
finally:
|
|
driver.quit()
|