Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
107 lines
3.9 KiB
Python
107 lines
3.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Check the actual page structure - maybe reviews are already visible without clicking a tab!
|
|
"""
|
|
|
|
import time
|
|
from seleniumbase import Driver
|
|
|
|
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine"
|
|
|
|
driver = Driver(uc=True, headless=False)
|
|
|
|
try:
|
|
driver.get(url)
|
|
print(f"Initial URL: {url}")
|
|
time.sleep(5)
|
|
|
|
# GDPR
|
|
try:
|
|
form_btns = driver.find_elements('css selector', 'form button')
|
|
for btn in form_btns:
|
|
if 'accept' in (btn.text or '').lower():
|
|
btn.click()
|
|
time.sleep(2)
|
|
break
|
|
except:
|
|
pass
|
|
|
|
# Check final URL
|
|
final_url = driver.current_url
|
|
print(f"Final URL after redirect: {final_url}")
|
|
|
|
# Wait a bit more for dynamic content
|
|
time.sleep(3)
|
|
|
|
# Check page structure
|
|
print("\n" + "="*80)
|
|
print("PAGE STRUCTURE ANALYSIS")
|
|
print("="*80)
|
|
|
|
page_info = driver.execute_script("""
|
|
return {
|
|
tabs_found: document.querySelectorAll('button[role="tab"]').length,
|
|
reviews_with_standard_selector: document.querySelectorAll('div.jftiEf.fontBodyMedium').length,
|
|
reviews_with_jftiEf: document.querySelectorAll('div.jftiEf').length,
|
|
divs_with_ratings: document.querySelectorAll('[aria-label*="star" i]').length,
|
|
review_containers: document.querySelectorAll('div.fontBodyMedium').length,
|
|
page_text_sample: document.body.innerText.substring(0, 500),
|
|
has_review_text: document.body.innerText.toLowerCase().includes('review'),
|
|
has_atsiliepimai_text: document.body.innerText.toLowerCase().includes('atsiliepimai')
|
|
};
|
|
""")
|
|
|
|
print(f"\nTabs with role='tab': {page_info['tabs_found']}")
|
|
print(f"div.jftiEf.fontBodyMedium: {page_info['reviews_with_standard_selector']}")
|
|
print(f"div.jftiEf: {page_info['reviews_with_jftiEf']}")
|
|
print(f"Elements with star ratings: {page_info['divs_with_ratings']}")
|
|
print(f"div.fontBodyMedium: {page_info['review_containers']}")
|
|
print(f"Contains 'review': {page_info['has_review_text']}")
|
|
print(f"Contains 'atsiliepimai' (Lithuanian): {page_info['has_atsiliepimai_text']}")
|
|
|
|
print(f"\nPage text sample (first 500 chars):")
|
|
print(page_info['page_text_sample'])
|
|
|
|
# Try to find ANY element with rating
|
|
print("\n" + "="*80)
|
|
print("SEARCHING FOR RATING ELEMENTS")
|
|
print("="*80)
|
|
|
|
rating_search = driver.execute_script("""
|
|
const elements = Array.from(document.querySelectorAll('*'));
|
|
const withRatings = [];
|
|
|
|
for (let elem of elements) {
|
|
const ariaLabel = elem.getAttribute('aria-label') || '';
|
|
if (ariaLabel.toLowerCase().includes('star') || ariaLabel.toLowerCase().includes('žvaigžd')) {
|
|
withRatings.push({
|
|
tag: elem.tagName,
|
|
ariaLabel: ariaLabel.substring(0, 100),
|
|
classes: elem.className.substring(0, 100),
|
|
parentTag: elem.parentElement ? elem.parentElement.tagName : null,
|
|
parentClasses: elem.parentElement ? elem.parentElement.className.substring(0, 100) : null
|
|
});
|
|
}
|
|
}
|
|
|
|
return withRatings.slice(0, 10); // First 10
|
|
""")
|
|
|
|
print(f"\nFound {len(rating_search)} elements with 'star' in aria-label:")
|
|
for i, elem in enumerate(rating_search[:5], 1):
|
|
print(f"\n Element {i}:")
|
|
print(f" Tag: {elem['tag']}")
|
|
print(f" Aria-label: {elem['ariaLabel']}")
|
|
print(f" Classes: {elem['classes']}")
|
|
print(f" Parent tag: {elem['parentTag']}")
|
|
print(f" Parent classes: {elem['parentClasses']}")
|
|
|
|
print(f"\n{'='*80}")
|
|
print("Browser open for manual inspection...")
|
|
print("LOOK AT THE PAGE - Are reviews visible? What's their structure?")
|
|
print(f"{'='*80}")
|
|
time.sleep(180) # 3 minutes
|
|
|
|
finally:
|
|
driver.quit()
|