Fix: Add early no-reviews detection and hide analytics for empty jobs
Changes: - Early detection for "no reviews" messages in 11 languages - Checks for disabled reviews tabs and 0-review indicators - Returns early (saves 30-40s) when no reviews exist - Frontend hides analytics/export buttons when reviews_count = 0 - Structural pattern matching improvements (work in progress) Known issue: - Lithuanian hospital page has different structure (no tabs found) - Needs separate investigation - may use different Google Maps layout Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
166
brute_force_selector.py
Normal file
166
brute_force_selector.py
Normal file
@@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Brute force approach: Try every possible div class combination and see which gives us reviews.
|
||||
"""
|
||||
|
||||
import time
|
||||
from seleniumbase import Driver
|
||||
|
||||
url = "https://www.google.com/maps/search/?api=1&query=panevezio%20respubliikine%20ligonine&hl=en"
|
||||
|
||||
driver = Driver(uc=True, headless=False)
|
||||
|
||||
try:
|
||||
driver.get(url)
|
||||
time.sleep(5)
|
||||
|
||||
# GDPR
|
||||
try:
|
||||
form_btns = driver.find_elements('css selector', 'form button')
|
||||
for btn in form_btns:
|
||||
if 'accept all' in (btn.text or '').lower():
|
||||
btn.click()
|
||||
time.sleep(2)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
# Click reviews tab
|
||||
time.sleep(2)
|
||||
tabs = driver.find_elements('css selector', 'button[role="tab"]')
|
||||
for tab in tabs:
|
||||
if 'review' in (tab.text or '').lower() or 'review' in (tab.get_attribute('aria-label') or '').lower():
|
||||
driver.execute_script("arguments[0].click();", tab)
|
||||
time.sleep(5)
|
||||
break
|
||||
|
||||
# Scroll to load reviews
|
||||
try:
|
||||
pane = driver.find_element('css selector', 'div.m6QErb.WNBkOb.XiKgde')
|
||||
for _ in range(10):
|
||||
driver.execute_script("arguments[0].scrollBy(0, 400);", pane)
|
||||
time.sleep(0.3)
|
||||
except:
|
||||
pass
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("BRUTE FORCE SELECTOR SEARCH")
|
||||
print("="*80)
|
||||
|
||||
# Get ALL unique class combinations from divs inside the reviews pane
|
||||
candidates = driver.execute_script("""
|
||||
// Find the reviews pane
|
||||
const pane = document.querySelector('div.m6QErb.WNBkOb.XiKgde');
|
||||
if (!pane) return {error: 'Pane not found'};
|
||||
|
||||
// Get all divs inside the pane
|
||||
const allDivs = Array.from(pane.querySelectorAll('div'));
|
||||
|
||||
// For each div, check if it looks like a review
|
||||
const candidates = [];
|
||||
|
||||
for (let div of allDivs) {
|
||||
// Skip if no classes
|
||||
if (!div.className || div.className.length === 0) continue;
|
||||
|
||||
// Check for review indicators
|
||||
const hasRating = !!div.querySelector('[aria-label*="star" i]');
|
||||
const hasText = div.textContent.length > 50 && div.textContent.length < 1000; // Individual review size
|
||||
const hasAuthor = !!div.querySelector('button[aria-label*="photo" i], img');
|
||||
|
||||
// Calculate score
|
||||
let score = 0;
|
||||
if (hasRating) score += 3;
|
||||
if (hasText) score += 2;
|
||||
if (hasAuthor) score += 1;
|
||||
|
||||
if (score >= 4) { // Must have rating + text at minimum
|
||||
candidates.push({
|
||||
classes: div.className,
|
||||
selector: 'div.' + div.className.split(' ').filter(c => c).join('.'),
|
||||
score: score,
|
||||
text_length: div.textContent.length,
|
||||
sample_text: div.textContent.substring(0, 100)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Count how many elements match each selector
|
||||
const selectorCounts = {};
|
||||
for (let candidate of candidates) {
|
||||
const count = pane.querySelectorAll(candidate.selector).length;
|
||||
if (!selectorCounts[candidate.selector]) {
|
||||
selectorCounts[candidate.selector] = {
|
||||
count: count,
|
||||
score: candidate.score,
|
||||
text_length: candidate.text_length,
|
||||
sample: candidate.sample_text
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by count (we want selectors that match many reviews)
|
||||
const sorted = Object.entries(selectorCounts)
|
||||
.sort((a, b) => b[1].count - a[1].count)
|
||||
.slice(0, 10);
|
||||
|
||||
return {
|
||||
top_selectors: sorted.map(([selector, info]) => ({
|
||||
selector: selector,
|
||||
count: info.count,
|
||||
score: info.score,
|
||||
text_length: info.text_length,
|
||||
sample: info.sample
|
||||
}))
|
||||
};
|
||||
""")
|
||||
|
||||
if 'error' in candidates:
|
||||
print(f"ERROR: {candidates['error']}")
|
||||
else:
|
||||
print(f"\nTop 10 candidate selectors (sorted by count):\n")
|
||||
for i, candidate in enumerate(candidates['top_selectors'], 1):
|
||||
print(f"{i}. {candidate['selector']}")
|
||||
print(f" Count: {candidate['count']} | Score: {candidate['score']} | Text length: {candidate['text_length']}")
|
||||
print(f" Sample: {candidate['sample'][:80]}...")
|
||||
print()
|
||||
|
||||
# Test the top selector
|
||||
if candidates['top_selectors']:
|
||||
top_selector = candidates['top_selectors'][0]['selector']
|
||||
print(f"\n{'='*80}")
|
||||
print(f"TESTING TOP SELECTOR: {top_selector}")
|
||||
print(f"{'='*80}")
|
||||
|
||||
test_result = driver.execute_script(f"""
|
||||
const elements = document.querySelectorAll('{top_selector}');
|
||||
const reviews = [];
|
||||
|
||||
for (let i = 0; i < Math.min(3, elements.length); i++) {{
|
||||
const elem = elements[i];
|
||||
const review = {{
|
||||
has_author: !!elem.querySelector('button, img'),
|
||||
has_rating: !!elem.querySelector('[aria-label*="star" i]'),
|
||||
has_date: !!elem.textContent.match(/\\d+\\s*(day|week|month|year|ago)/i),
|
||||
text_length: elem.textContent.length,
|
||||
text_sample: elem.textContent.substring(0, 150)
|
||||
}};
|
||||
reviews.push(review);
|
||||
}}
|
||||
|
||||
return reviews;
|
||||
""")
|
||||
|
||||
print(f"\nFirst 3 elements using {top_selector}:")
|
||||
for i, rev in enumerate(test_result, 1):
|
||||
print(f"\n Element {i}:")
|
||||
for key, value in rev.items():
|
||||
print(f" {key}: {value}")
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print("Browser staying open for 60 seconds...")
|
||||
print(f"{'='*80}")
|
||||
time.sleep(60)
|
||||
|
||||
finally:
|
||||
driver.quit()
|
||||
Reference in New Issue
Block a user