Fix: Add early no-reviews detection and hide analytics for empty jobs
Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
156
find_actual_reviews.py
Normal file
156
find_actual_reviews.py
Normal file
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Find the ACTUAL selector for reviews by looking for elements with review structure.
|
||||
"""
|
||||
|
||||
import time
|
||||
from seleniumbase import Driver
|
||||
|
||||
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine&hl=en"
|
||||
|
||||
driver = Driver(uc=True, headless=False)
|
||||
|
||||
try:
|
||||
driver.get(url)
|
||||
time.sleep(5)
|
||||
|
||||
# GDPR
|
||||
try:
|
||||
form_btns = driver.find_elements('css selector', 'form button')
|
||||
for btn in form_btns:
|
||||
if 'accept all' in (btn.text or '').lower():
|
||||
btn.click()
|
||||
time.sleep(2)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
# Click reviews tab
|
||||
time.sleep(2)
|
||||
tabs = driver.find_elements('css selector', 'button[role="tab"]')
|
||||
for tab in tabs:
|
||||
if 'review' in (tab.text or '').lower() or 'review' in (tab.get_attribute('aria-label') or '').lower():
|
||||
driver.execute_script("arguments[0].click();", tab)
|
||||
time.sleep(5)
|
||||
break
|
||||
|
||||
# Scroll to load reviews
|
||||
try:
|
||||
pane = driver.find_element('css selector', 'div.m6QErb.WNBkOb.XiKgde')
|
||||
for _ in range(3):
|
||||
driver.execute_script("arguments[0].scrollBy(0, 500);", pane)
|
||||
time.sleep(1)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Use JavaScript to find ALL elements that look like reviews
|
||||
print("\n" + "="*80)
|
||||
print("FINDING ACTUAL REVIEW ELEMENTS BY STRUCTURE:")
|
||||
print("="*80)
|
||||
|
||||
review_info = driver.execute_script("""
|
||||
// Find all elements that have BOTH a rating AND substantial text
|
||||
const allDivs = Array.from(document.querySelectorAll('div'));
|
||||
|
||||
const reviews = [];
|
||||
|
||||
for (let div of allDivs) {
|
||||
// Must have a rating (star aria-label)
|
||||
const ratingElem = div.querySelector('[aria-label*="star" i], [aria-label*="rating" i]');
|
||||
if (!ratingElem) continue;
|
||||
|
||||
// Must have decent text content (>50 chars to avoid buttons)
|
||||
if (div.textContent.length < 50) continue;
|
||||
|
||||
// Get the classes and attributes
|
||||
const info = {
|
||||
classes: div.className,
|
||||
has_author: !!div.querySelector('button, [aria-label*="photo" i]'),
|
||||
has_avatar: !!div.querySelector('img'),
|
||||
has_date: !!div.textContent.match(/\\d+\\s*(day|week|month|year|ago)/i),
|
||||
text_length: div.textContent.length,
|
||||
sample_text: div.textContent.substring(0, 150),
|
||||
tag_name: div.tagName,
|
||||
jslog: div.getAttribute('jslog'),
|
||||
data_review_id: div.getAttribute('data-review-id'),
|
||||
jsaction: div.getAttribute('jsaction')
|
||||
};
|
||||
|
||||
reviews.push(info);
|
||||
}
|
||||
|
||||
return {
|
||||
total_found: reviews.length,
|
||||
first_5: reviews.slice(0, 5)
|
||||
};
|
||||
""")
|
||||
|
||||
print(f"\nFound {review_info['total_found']} elements with review structure")
|
||||
print(f"\nFirst 5 review-like elements:")
|
||||
for i, rev in enumerate(review_info['first_5'], 1):
|
||||
print(f"\n Review {i}:")
|
||||
print(f" Classes: {rev['classes']}")
|
||||
print(f" Has author: {rev['has_author']}")
|
||||
print(f" Has avatar: {rev['has_avatar']}")
|
||||
print(f" Has date: {rev['has_date']}")
|
||||
print(f" Text length: {rev['text_length']}")
|
||||
print(f" jslog: {rev['jslog']}")
|
||||
print(f" data-review-id: {rev['data_review_id']}")
|
||||
print(f" Sample: {rev['sample_text'][:80]}...")
|
||||
|
||||
# Try to find a common class among review elements
|
||||
if review_info['total_found'] > 0:
|
||||
print("\n" + "="*80)
|
||||
print("FINDING COMMON SELECTOR:")
|
||||
print("="*80)
|
||||
|
||||
common_selector = driver.execute_script("""
|
||||
// Find common classes among review elements
|
||||
const reviews = [];
|
||||
const allDivs = Array.from(document.querySelectorAll('div'));
|
||||
|
||||
for (let div of allDivs) {
|
||||
const ratingElem = div.querySelector('[aria-label*="star" i]');
|
||||
if (ratingElem && div.textContent.length > 50) {
|
||||
reviews.push(div);
|
||||
}
|
||||
}
|
||||
|
||||
if (reviews.length === 0) return null;
|
||||
|
||||
// Get classes from first review
|
||||
const firstClasses = reviews[0].className.split(' ').filter(c => c.length > 0);
|
||||
|
||||
// Find classes that appear in ALL reviews
|
||||
const commonClasses = firstClasses.filter(cls => {
|
||||
return reviews.every(rev => rev.classList.contains(cls));
|
||||
});
|
||||
|
||||
return {
|
||||
total_reviews: reviews.length,
|
||||
common_classes: commonClasses,
|
||||
suggested_selector: commonClasses.length > 0 ? 'div.' + commonClasses.join('.') : null,
|
||||
first_review_classes: reviews[0].className
|
||||
};
|
||||
""")
|
||||
|
||||
if common_selector:
|
||||
print(f"Total review elements: {common_selector['total_reviews']}")
|
||||
print(f"Common classes: {common_selector['common_classes']}")
|
||||
print(f"Suggested selector: {common_selector['suggested_selector']}")
|
||||
print(f"First review full classes: {common_selector['first_review_classes']}")
|
||||
|
||||
# Test the suggested selector
|
||||
if common_selector['suggested_selector']:
|
||||
test_count = driver.execute_script(
|
||||
f"return document.querySelectorAll('{common_selector['suggested_selector']}').length;"
|
||||
)
|
||||
print(f"\nTesting suggested selector: Found {test_count} elements")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("Browser staying open for manual inspection (60s)...")
|
||||
print("="*80)
|
||||
time.sleep(60)
|
||||
|
||||
finally:
|
||||
driver.quit()
|
||||
Reference in New Issue
Block a user