#!/usr/bin/env python3 """ Debug script to inspect the actual HTML structure on Google Maps search results. This will help us identify where the review count is located in the DOM. """ import time from seleniumbase import Driver from selenium.webdriver.common.by import By # Initialize driver print("Starting Chrome...") driver = Driver( uc=True, headless=True, page_load_strategy="normal" ) # Navigate to Google Maps search for Instinto url = "https://www.google.com/maps/search/?api=1&query=instinto+las+palmas&hl=en" print(f"\nNavigating to: {url}") driver.get(url) time.sleep(3) # Handle GDPR consent if present if 'consent.google.com' in driver.current_url: print("Handling GDPR consent...") try: form_btns = driver.find_elements(By.CSS_SELECTOR, 'form button') for btn in form_btns: btn_text = (btn.text or '').lower() if 'accept all' in btn_text or 'aceptar todo' in btn_text: print(f"Clicking: {btn.text}") btn.click() time.sleep(3) break else: if len(form_btns) >= 2: print("Using fallback - clicking second button") form_btns[1].click() time.sleep(3) except Exception as e: print(f"GDPR handling error: {e}") # Wait for page to load print("\nWaiting for page to fully load...") time.sleep(5) print(f"\nCurrent URL: {driver.current_url}") # Get all text content on the page all_text = driver.execute_script("return document.body.innerText;") print("\n" + "="*80) print("ALL TEXT ON PAGE (first 3000 chars):") print("="*80) print(all_text[:3000]) # Search for elements containing "152" or "review" print("\n" + "="*80) print("SEARCHING FOR ELEMENTS CONTAINING '152' OR 'review':") print("="*80) elements_with_numbers = driver.execute_script(""" const results = []; const allElements = document.querySelectorAll('*'); for (let elem of allElements) { const text = elem.textContent || ''; const ownText = elem.innerText || ''; // Only check elements that directly contain the text (not nested) if (ownText && ownText.length < 200 && (ownText.includes('152') || /\\d+\\s*review/i.test(ownText))) { results.push({ tag: elem.tagName, class: elem.className, id: elem.id, text: ownText.substring(0, 100), href: elem.href || null, role: elem.getAttribute('role'), ariaLabel: elem.getAttribute('aria-label') }); } } return results.slice(0, 50); // First 50 matches """) for i, elem in enumerate(elements_with_numbers, 1): print(f"\n{i}. <{elem['tag']}> " f"class='{elem['class'][:50] if elem['class'] else ''}' " f"id='{elem['id']}'") if elem['role']: print(f" role: {elem['role']}") if elem['ariaLabel']: print(f" aria-label: {elem['ariaLabel'][:100]}") if elem['href']: print(f" href: {elem['href'][:100]}") print(f" text: {elem['text']}") # Also check what the extraction script would find print("\n" + "="*80) print("RUNNING ACTUAL EXTRACTION SCRIPT:") print("="*80) extract_script = """ const info = { name: null, address: null, rating: null, total_reviews: null, debug_info: [] }; // Extract business name const nameSelectors = [ 'h1.DUwDvf', '[role="main"] h1', 'h1.fontHeadlineLarge' ]; for (const selector of nameSelectors) { const elem = document.querySelector(selector); if (elem && elem.textContent) { info.name = elem.textContent.trim(); info.debug_info.push(`Found name via: ${selector}`); break; } } // Extract rating const ratingElem = document.querySelector('[role="img"][aria-label*="star"]'); if (ratingElem) { const ariaLabel = ratingElem.getAttribute('aria-label'); const match = ariaLabel.match(/([0-9.]+)/); if (match) { info.rating = parseFloat(match[1]); info.debug_info.push(`Found rating: ${info.rating} from aria-label: ${ariaLabel}`); } } // Extract total review count const numberPattern = /(\\d[\\d,\\.]*)\\s*(?:review|reseña|avis)/i; // Check search panel selectors const searchPanelSelectors = [ 'a[href*="reviews"]', 'button[jsaction*="reviews"]', 'div[role="link"]', ]; for (const selector of searchPanelSelectors) { const elements = document.querySelectorAll(selector); info.debug_info.push(`Checking ${selector}: found ${elements.length} elements`); for (let elem of elements) { const text = elem.textContent || ''; if (text.length < 200) { info.debug_info.push(` - text: "${text.substring(0, 100)}"`); } const match = text.match(numberPattern); if (match) { const num = parseInt(match[1].replace(/[,\\.\\s]/g, '')); if (num > 0 && num < 1000000) { info.total_reviews = num; info.debug_info.push(` ✓ FOUND via ${selector}: ${num}`); break; } } } if (info.total_reviews) break; } // If not found, try all spans/divs if (!info.total_reviews) { const allElements = document.querySelectorAll('span, div, a'); info.debug_info.push(`Checking all spans/divs/links: ${allElements.length} elements`); let checked = 0; for (let elem of allElements) { const text = elem.textContent || ''; if (text.length < 100) { const match = text.match(numberPattern); if (match) { checked++; if (checked <= 10) { // Log first 10 matches info.debug_info.push(` - potential match: "${text.substring(0, 80)}"`); } const num = parseInt(match[1].replace(/[,\\.\\s]/g, '')); if (num > 0 && num < 1000000) { info.total_reviews = num; info.debug_info.push(` ✓ FOUND via all elements: ${num} from "${text.substring(0, 80)}"`); break; } } } } } return info; """ result = driver.execute_script(extract_script) print(f"\nExtracted Info:") print(f" Name: {result.get('name')}") print(f" Rating: {result.get('rating')}") print(f" Total Reviews: {result.get('total_reviews')}") print(f"\nDebug Info:") for debug_line in result.get('debug_info', []): print(f" {debug_line}") print("\n" + "="*80) print("Done! Closing browser.") print("="*80) driver.quit()