#!/usr/bin/env python3 """ Debug script to extract review count from search results BEFORE auto-navigation. """ import time from seleniumbase import Driver from selenium.webdriver.common.by import By driver = Driver(uc=True, headless=True) url = "https://www.google.com/maps/search/?api=1&query=soho+vilna+club&hl=en" print(f"Navigating to: {url}") driver.get(url) time.sleep(2) # Handle GDPR if 'consent.google.com' in driver.current_url: print("Handling GDPR...") form_btns = driver.find_elements(By.CSS_SELECTOR, 'form button') for btn in form_btns: if 'accept all' in (btn.text or '').lower(): btn.click() time.sleep(2) break # SHORT WAIT - extract quickly before auto-navigation! time.sleep(1.5) print(f"Current URL (should still be /search/): {driver.current_url}") is_search = '/search/' in driver.current_url print(f"Still on search results: {is_search}\n") # FAST extraction from search results sidebar result = driver.execute_script(""" const info = { businessName: null, rating: null, reviewCount: null, searchResults: [], allTextWithNumbers: [] }; console.log('[EXTRACTION] Starting search results extraction...'); // Get business name from first result card const nameSelectors = [ 'div[role="article"] h3', 'div[role="article"] div.fontHeadlineSmall', 'div[aria-label*="Results"] h3', 'a[href*="/place/"] h3', 'div.Nv2PK h3' // Google Maps class for business name in search results ]; for (const selector of nameSelectors) { const elem = document.querySelector(selector); if (elem && elem.textContent) { info.businessName = elem.textContent.trim(); console.log(`[EXTRACTION] Found name via ${selector}: ${info.businessName}`); break; } } // Get rating from first result const ratingElem = document.querySelector('div[role="article"] [role="img"][aria-label*="star"], a[href*="/place/"] [role="img"][aria-label*="star"]'); if (ratingElem) { const ariaLabel = ratingElem.getAttribute('aria-label'); const match = ariaLabel.match(/([0-9.]+)/); if (match) { info.rating = parseFloat(match[1]); console.log(`[EXTRACTION] Found rating: ${info.rating}`); } } // CRITICAL: Extract review count from search results sidebar // Look for patterns like "152 reviews", "247 reviews", etc. const numberPattern = /(\\d[\\d,\\.]*)\\s*(?:review|reseña|avis)/i; // Strategy 1: Check first result card/article const resultCards = document.querySelectorAll('div[role="article"], a[href*="/place/"], div.Nv2PK'); console.log(`[EXTRACTION] Found ${resultCards.length} result cards`); for (let card of resultCards) { const text = card.textContent || ''; console.log(`[EXTRACTION] Card text (first 200 chars): ${text.substring(0, 200)}`); const match = text.match(numberPattern); if (match) { const num = parseInt(match[1].replace(/[,\\.\\s]/g, '')); if (num > 0 && num < 1000000) { info.reviewCount = num; console.log(`[EXTRACTION] ✓ Found review count in card: ${num}`); break; } } // Only check first card break; } // Strategy 2: Check all elements in left sidebar/panel if (!info.reviewCount) { console.log('[EXTRACTION] Strategy 2: Checking all sidebar elements...'); const leftPanel = document.querySelector('div[role="main"]') || document.querySelector('[aria-label*="Results"]') || document.body; const allElements = leftPanel.querySelectorAll('span, div, a, button'); console.log(`[EXTRACTION] Checking ${allElements.length} elements in sidebar...`); for (let elem of allElements) { const text = elem.textContent || ''; // Skip very long text blocks (likely not the review count) if (text.length > 0 && text.length < 150) { const match = text.match(numberPattern); if (match) { const num = parseInt(match[1].replace(/[,\\.\\s]/g, '')); if (num > 0 && num < 1000000) { info.allTextWithNumbers.push({ tag: elem.tagName, text: text, number: num }); if (!info.reviewCount) { info.reviewCount = num; console.log(`[EXTRACTION] ✓ Found via sidebar scan: ${num} from "${text}"`); } } } } } } console.log(`[EXTRACTION] Final result: ${info.reviewCount} reviews`); return info; """) print("="*80) print("EXTRACTION RESULTS (from search results page):") print("="*80) print(f"Business Name: {result['businessName']}") print(f"Rating: {result['rating']}") print(f"Review Count: {result['reviewCount']}") if result['allTextWithNumbers']: print(f"\n{'='*80}") print("ALL ELEMENTS WITH REVIEW NUMBERS (first 10):") print("="*80) for i, item in enumerate(result['allTextWithNumbers'][:10], 1): print(f"\n{i}. <{item['tag']}> Number: {item['number']}") print(f" Text: {item['text'][:100]}") # Check browser console console_logs = driver.get_log('browser') print(f"\n{'='*80}") print("BROWSER CONSOLE LOGS:") print("="*80) for log in console_logs: if '[EXTRACTION]' in log['message']: print(log['message']) # Wait a bit longer to see if Google auto-navigates print(f"\n{'='*80}") print("Waiting 5 more seconds to see if Google auto-navigates...") print("="*80) time.sleep(5) print(f"URL after waiting: {driver.current_url}") print(f"Still on search results: {'/search/' in driver.current_url}") driver.quit()