Fix: Add early no-reviews detection and hide analytics for empty jobs
Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
106
check_page_structure.py
Normal file
106
check_page_structure.py
Normal file
@@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Check the actual page structure - maybe reviews are already visible without clicking a tab!
|
||||
"""
|
||||
|
||||
import time
|
||||
from seleniumbase import Driver
|
||||
|
||||
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine"
|
||||
|
||||
driver = Driver(uc=True, headless=False)
|
||||
|
||||
try:
|
||||
driver.get(url)
|
||||
print(f"Initial URL: {url}")
|
||||
time.sleep(5)
|
||||
|
||||
# GDPR
|
||||
try:
|
||||
form_btns = driver.find_elements('css selector', 'form button')
|
||||
for btn in form_btns:
|
||||
if 'accept' in (btn.text or '').lower():
|
||||
btn.click()
|
||||
time.sleep(2)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
# Check final URL
|
||||
final_url = driver.current_url
|
||||
print(f"Final URL after redirect: {final_url}")
|
||||
|
||||
# Wait a bit more for dynamic content
|
||||
time.sleep(3)
|
||||
|
||||
# Check page structure
|
||||
print("\n" + "="*80)
|
||||
print("PAGE STRUCTURE ANALYSIS")
|
||||
print("="*80)
|
||||
|
||||
page_info = driver.execute_script("""
|
||||
return {
|
||||
tabs_found: document.querySelectorAll('button[role="tab"]').length,
|
||||
reviews_with_standard_selector: document.querySelectorAll('div.jftiEf.fontBodyMedium').length,
|
||||
reviews_with_jftiEf: document.querySelectorAll('div.jftiEf').length,
|
||||
divs_with_ratings: document.querySelectorAll('[aria-label*="star" i]').length,
|
||||
review_containers: document.querySelectorAll('div.fontBodyMedium').length,
|
||||
page_text_sample: document.body.innerText.substring(0, 500),
|
||||
has_review_text: document.body.innerText.toLowerCase().includes('review'),
|
||||
has_atsiliepimai_text: document.body.innerText.toLowerCase().includes('atsiliepimai')
|
||||
};
|
||||
""")
|
||||
|
||||
print(f"\nTabs with role='tab': {page_info['tabs_found']}")
|
||||
print(f"div.jftiEf.fontBodyMedium: {page_info['reviews_with_standard_selector']}")
|
||||
print(f"div.jftiEf: {page_info['reviews_with_jftiEf']}")
|
||||
print(f"Elements with star ratings: {page_info['divs_with_ratings']}")
|
||||
print(f"div.fontBodyMedium: {page_info['review_containers']}")
|
||||
print(f"Contains 'review': {page_info['has_review_text']}")
|
||||
print(f"Contains 'atsiliepimai' (Lithuanian): {page_info['has_atsiliepimai_text']}")
|
||||
|
||||
print(f"\nPage text sample (first 500 chars):")
|
||||
print(page_info['page_text_sample'])
|
||||
|
||||
# Try to find ANY element with rating
|
||||
print("\n" + "="*80)
|
||||
print("SEARCHING FOR RATING ELEMENTS")
|
||||
print("="*80)
|
||||
|
||||
rating_search = driver.execute_script("""
|
||||
const elements = Array.from(document.querySelectorAll('*'));
|
||||
const withRatings = [];
|
||||
|
||||
for (let elem of elements) {
|
||||
const ariaLabel = elem.getAttribute('aria-label') || '';
|
||||
if (ariaLabel.toLowerCase().includes('star') || ariaLabel.toLowerCase().includes('žvaigžd')) {
|
||||
withRatings.push({
|
||||
tag: elem.tagName,
|
||||
ariaLabel: ariaLabel.substring(0, 100),
|
||||
classes: elem.className.substring(0, 100),
|
||||
parentTag: elem.parentElement ? elem.parentElement.tagName : null,
|
||||
parentClasses: elem.parentElement ? elem.parentElement.className.substring(0, 100) : null
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return withRatings.slice(0, 10); // First 10
|
||||
""")
|
||||
|
||||
print(f"\nFound {len(rating_search)} elements with 'star' in aria-label:")
|
||||
for i, elem in enumerate(rating_search[:5], 1):
|
||||
print(f"\n Element {i}:")
|
||||
print(f" Tag: {elem['tag']}")
|
||||
print(f" Aria-label: {elem['ariaLabel']}")
|
||||
print(f" Classes: {elem['classes']}")
|
||||
print(f" Parent tag: {elem['parentTag']}")
|
||||
print(f" Parent classes: {elem['parentClasses']}")
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print("Browser open for manual inspection...")
|
||||
print("LOOK AT THE PAGE - Are reviews visible? What's their structure?")
|
||||
print(f"{'='*80}")
|
||||
time.sleep(180) # 3 minutes
|
||||
|
||||
finally:
|
||||
driver.quit()
|
||||
Reference in New Issue
Block a user