whyrating-engine-legacy/debug_search_results.py

#!/usr/bin/env python3
"""
Debug script to extract review count from search results BEFORE auto-navigation.
"""
import time
from seleniumbase import Driver
from selenium.webdriver.common.by import By

driver = Driver(uc=True, headless=True)

url = "https://www.google.com/maps/search/?api=1&query=soho+vilna+club&hl=en"
print(f"Navigating to: {url}")
driver.get(url)
time.sleep(2)

# Handle GDPR
if 'consent.google.com' in driver.current_url:
    print("Handling GDPR...")
    form_btns = driver.find_elements(By.CSS_SELECTOR, 'form button')
    for btn in form_btns:
        if 'accept all' in (btn.text or '').lower():
            btn.click()
            time.sleep(2)
            break

# SHORT WAIT - extract quickly before auto-navigation!
time.sleep(1.5)

print(f"Current URL (should still be /search/): {driver.current_url}")
is_search = '/search/' in driver.current_url
print(f"Still on search results: {is_search}\n")

# FAST extraction from search results sidebar
result = driver.execute_script("""
    const info = {
        businessName: null,
        rating: null,
        reviewCount: null,
        searchResults: [],
        allTextWithNumbers: []
    };

    console.log('[EXTRACTION] Starting search results extraction...');

    // Get business name from first result card
    const nameSelectors = [
        'div[role="article"] h3',
        'div[role="article"] div.fontHeadlineSmall',
        'div[aria-label*="Results"] h3',
        'a[href*="/place/"] h3',
        'div.Nv2PK h3'  // Google Maps class for business name in search results
    ];

    for (const selector of nameSelectors) {
        const elem = document.querySelector(selector);
        if (elem && elem.textContent) {
            info.businessName = elem.textContent.trim();
            console.log(`[EXTRACTION] Found name via ${selector}: ${info.businessName}`);
            break;
        }
    }

    // Get rating from first result
    const ratingElem = document.querySelector('div[role="article"] [role="img"][aria-label*="star"], a[href*="/place/"] [role="img"][aria-label*="star"]');
    if (ratingElem) {
        const ariaLabel = ratingElem.getAttribute('aria-label');
        const match = ariaLabel.match(/([0-9.]+)/);
        if (match) {
            info.rating = parseFloat(match[1]);
            console.log(`[EXTRACTION] Found rating: ${info.rating}`);
        }
    }

    // CRITICAL: Extract review count from search results sidebar
    // Look for patterns like "152 reviews", "247 reviews", etc.
    const numberPattern = /(\\d[\\d,\\.]*)\\s*(?:review|reseña|avis)/i;

    // Strategy 1: Check first result card/article
    const resultCards = document.querySelectorAll('div[role="article"], a[href*="/place/"], div.Nv2PK');
    console.log(`[EXTRACTION] Found ${resultCards.length} result cards`);

    for (let card of resultCards) {
        const text = card.textContent || '';
        console.log(`[EXTRACTION] Card text (first 200 chars): ${text.substring(0, 200)}`);

        const match = text.match(numberPattern);
        if (match) {
            const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
            if (num > 0 && num < 1000000) {
                info.reviewCount = num;
                console.log(`[EXTRACTION] ✓ Found review count in card: ${num}`);
                break;
            }
        }

        // Only check first card
        break;
    }

    // Strategy 2: Check all elements in left sidebar/panel
    if (!info.reviewCount) {
        console.log('[EXTRACTION] Strategy 2: Checking all sidebar elements...');

        const leftPanel = document.querySelector('div[role="main"]') || document.querySelector('[aria-label*="Results"]') || document.body;
        const allElements = leftPanel.querySelectorAll('span, div, a, button');

        console.log(`[EXTRACTION] Checking ${allElements.length} elements in sidebar...`);

        for (let elem of allElements) {
            const text = elem.textContent || '';

            // Skip very long text blocks (likely not the review count)
            if (text.length > 0 && text.length < 150) {
                const match = text.match(numberPattern);
                if (match) {
                    const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
                    if (num > 0 && num < 1000000) {
                        info.allTextWithNumbers.push({
                            tag: elem.tagName,
                            text: text,
                            number: num
                        });

                        if (!info.reviewCount) {
                            info.reviewCount = num;
                            console.log(`[EXTRACTION] ✓ Found via sidebar scan: ${num} from "${text}"`);
                        }
                    }
                }
            }
        }
    }

    console.log(`[EXTRACTION] Final result: ${info.reviewCount} reviews`);
    return info;
""")

print("="*80)
print("EXTRACTION RESULTS (from search results page):")
print("="*80)
print(f"Business Name: {result['businessName']}")
print(f"Rating: {result['rating']}")
print(f"Review Count: {result['reviewCount']}")

if result['allTextWithNumbers']:
    print(f"\n{'='*80}")
    print("ALL ELEMENTS WITH REVIEW NUMBERS (first 10):")
    print("="*80)
    for i, item in enumerate(result['allTextWithNumbers'][:10], 1):
        print(f"\n{i}. <{item['tag']}> Number: {item['number']}")
        print(f"   Text: {item['text'][:100]}")

# Check browser console
console_logs = driver.get_log('browser')
print(f"\n{'='*80}")
print("BROWSER CONSOLE LOGS:")
print("="*80)
for log in console_logs:
    if '[EXTRACTION]' in log['message']:
        print(log['message'])

# Wait a bit longer to see if Google auto-navigates
print(f"\n{'='*80}")
print("Waiting 5 more seconds to see if Google auto-navigates...")
print("="*80)
time.sleep(5)

print(f"URL after waiting: {driver.current_url}")
print(f"Still on search results: {'/search/' in driver.current_url}")

driver.quit()