Fix: Add early no-reviews detection and hide analytics for empty jobs
Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
157
inspect_pane_content.py
Normal file
157
inspect_pane_content.py
Normal file
@@ -0,0 +1,157 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Check what's actually inside the reviews pane after scrolling.
|
||||
"""
|
||||
|
||||
import time
|
||||
from seleniumbase import Driver
|
||||
|
||||
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine&hl=en"
|
||||
|
||||
driver = Driver(uc=True, headless=False)
|
||||
|
||||
try:
|
||||
driver.get(url)
|
||||
time.sleep(5)
|
||||
|
||||
# GDPR
|
||||
try:
|
||||
form_btns = driver.find_elements('css selector', 'form button')
|
||||
for btn in form_btns:
|
||||
if 'accept all' in (btn.text or '').lower():
|
||||
btn.click()
|
||||
time.sleep(2)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
# Click reviews tab
|
||||
time.sleep(2)
|
||||
tabs = driver.find_elements('css selector', 'button[role="tab"]')
|
||||
review_tab_found = False
|
||||
for tab in tabs:
|
||||
text = (tab.text or '').lower()
|
||||
aria = (tab.get_attribute('aria-label') or '').lower()
|
||||
print(f"Tab: text='{tab.text}', aria='{tab.get_attribute('aria-label')}'")
|
||||
if 'review' in text or 'review' in aria:
|
||||
print(f" -> Clicking this tab!")
|
||||
driver.execute_script("arguments[0].click();", tab)
|
||||
time.sleep(6) # Wait longer
|
||||
review_tab_found = True
|
||||
break
|
||||
|
||||
if not review_tab_found:
|
||||
print("WARNING: Reviews tab not found!")
|
||||
|
||||
# Find and scroll the pane
|
||||
print("\nLooking for scrollable pane...")
|
||||
pane = None
|
||||
try:
|
||||
pane = driver.find_element('css selector', 'div.m6QErb.WNBkOb.XiKgde')
|
||||
print(f"Found pane: div.m6QErb.WNBkOb.XiKgde")
|
||||
except:
|
||||
print("Pane not found with standard selector!")
|
||||
try:
|
||||
pane = driver.find_element('css selector', 'div.m6QErb')
|
||||
print(f"Found pane: div.m6QErb")
|
||||
except:
|
||||
print("No pane found at all!")
|
||||
|
||||
if pane:
|
||||
print("\nScrolling pane to load reviews...")
|
||||
for i in range(15):
|
||||
driver.execute_script("arguments[0].scrollBy(0, 400);", pane)
|
||||
time.sleep(0.4)
|
||||
if (i + 1) % 5 == 0:
|
||||
print(f" Scrolled {i+1} times...")
|
||||
|
||||
# Now check what's in the pane
|
||||
print("\n" + "="*80)
|
||||
print("ANALYZING PANE CONTENT")
|
||||
print("="*80)
|
||||
|
||||
content_info = driver.execute_script("""
|
||||
const pane = document.querySelector('div.m6QErb.WNBkOb.XiKgde') || document.querySelector('div.m6QErb');
|
||||
if (!pane) return {error: 'No pane found'};
|
||||
|
||||
// Get all child divs (direct and nested)
|
||||
const allDivs = Array.from(pane.querySelectorAll('div'));
|
||||
|
||||
// Get all unique class names used
|
||||
const classNames = new Set();
|
||||
allDivs.forEach(div => {
|
||||
if (div.className) {
|
||||
div.className.split(' ').forEach(cls => {
|
||||
if (cls.trim()) classNames.add(cls.trim());
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Find divs with ratings
|
||||
const divsWithRatings = allDivs.filter(div => {
|
||||
return !!div.querySelector('[aria-label*="star" i]');
|
||||
});
|
||||
|
||||
// Find divs with author photos
|
||||
const divsWithPhotos = allDivs.filter(div => {
|
||||
return !!div.querySelector('img[src*="photo"], img[src*="avatar"]');
|
||||
});
|
||||
|
||||
// Find divs with date patterns
|
||||
const divsWithDates = allDivs.filter(div => {
|
||||
return !!div.textContent.match(/\\d+\\s*(day|week|month|year|hour|minute|ago)/i);
|
||||
});
|
||||
|
||||
// Find divs with ALL three
|
||||
const reviewLikeDivs = allDivs.filter(div => {
|
||||
const hasRating = !!div.querySelector('[aria-label*="star" i]');
|
||||
const hasPhoto = !!div.querySelector('img');
|
||||
const hasDate = !!div.textContent.match(/\\d+\\s*(day|week|month|year|hour|ago)/i);
|
||||
const textLen = div.textContent.length;
|
||||
return hasRating && hasPhoto && hasDate && textLen > 50 && textLen < 2000;
|
||||
});
|
||||
|
||||
return {
|
||||
total_divs: allDivs.length,
|
||||
unique_classes: Array.from(classNames).sort(),
|
||||
divs_with_ratings: divsWithRatings.length,
|
||||
divs_with_photos: divsWithPhotos.length,
|
||||
divs_with_dates: divsWithDates.length,
|
||||
review_like_divs: reviewLikeDivs.length,
|
||||
review_like_classes: reviewLikeDivs.slice(0, 5).map(d => ({
|
||||
classes: d.className,
|
||||
text_length: d.textContent.length,
|
||||
sample: d.textContent.substring(0, 100)
|
||||
}))
|
||||
};
|
||||
""")
|
||||
|
||||
if 'error' in content_info:
|
||||
print(f"ERROR: {content_info['error']}")
|
||||
else:
|
||||
print(f"\nTotal divs in pane: {content_info['total_divs']}")
|
||||
print(f"Divs with ratings: {content_info['divs_with_ratings']}")
|
||||
print(f"Divs with photos: {content_info['divs_with_photos']}")
|
||||
print(f"Divs with dates: {content_info['divs_with_dates']}")
|
||||
print(f"Divs matching ALL criteria (review-like): {content_info['review_like_divs']}")
|
||||
|
||||
print(f"\nFirst 20 unique classes found in pane:")
|
||||
for cls in content_info['unique_classes'][:20]:
|
||||
print(f" {cls}")
|
||||
|
||||
if content_info['review_like_divs'] > 0:
|
||||
print(f"\nFirst 5 review-like divs:")
|
||||
for i, div_info in enumerate(content_info['review_like_classes'], 1):
|
||||
print(f"\n Div {i}:")
|
||||
print(f" Classes: {div_info['classes']}")
|
||||
print(f" Text length: {div_info['text_length']}")
|
||||
print(f" Sample: {div_info['sample'][:80]}...")
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print("Browser staying open for manual inspection (120 seconds)...")
|
||||
print("Look at the DevTools to see the actual review elements!")
|
||||
print(f"{'='*80}")
|
||||
time.sleep(120)
|
||||
|
||||
finally:
|
||||
driver.quit()
|
||||
Reference in New Issue
Block a user