whyrating-engine-legacy/debug_wait_for_results.py

#!/usr/bin/env python3
"""
Debug script - wait for search results to load before extracting.
"""
import time
from seleniumbase import Driver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = Driver(uc=True, headless=True)

url = "https://www.google.com/maps/search/?api=1&query=soho+vilna+club&hl=en"
print(f"Navigating to: {url}")
driver.get(url)
time.sleep(2)

# Handle GDPR
if 'consent.google.com' in driver.current_url:
    print("Handling GDPR...")
    form_btns = driver.find_elements(By.CSS_SELECTOR, 'form button')
    for btn in form_btns:
        if 'accept all' in (btn.text or '').lower():
            btn.click()
            time.sleep(2)
            break

print(f"Current URL: {driver.current_url}")
print("Waiting for search results to load...\n")

# Wait for search results to appear (but don't wait so long that Google auto-navigates)
try:
    # Wait for the first result card to appear
    wait = WebDriverWait(driver, 10)
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[role="article"], a[href*="/place/"]')))
    print("✓ Search results loaded!")
except Exception as e:
    print(f"✗ Timeout waiting for results: {e}")

# Give it just a tiny bit more time for content to render
time.sleep(0.5)

print(f"Current URL: {driver.current_url}")
print(f"Still on search results: {'/search/' in driver.current_url}\n")

# Extract
result = driver.execute_script("""
    const info = {
        businessName: null,
        rating: null,
        reviewCount: null,
        debug: []
    };

    // Find first result card
    const resultCard = document.querySelector('div[role="article"], a[href*="/place/"]');
    if (!resultCard) {
        info.debug.push('No result card found');
        return info;
    }

    info.debug.push('Found result card');

    // Get full text of card
    const cardText = resultCard.textContent || '';
    info.debug.push(`Card text length: ${cardText.length}`);
    info.debug.push(`Card text (first 300 chars): ${cardText.substring(0, 300)}`);

    // Extract business name (usually first h3 or div with specific class)
    const nameElem = resultCard.querySelector('h3, div.fontHeadlineSmall, div[class*="fontHeadline"]');
    if (nameElem) {
        info.businessName = nameElem.textContent.trim();
        info.debug.push(`Found name: ${info.businessName}`);
    }

    // Extract rating
    const ratingElem = resultCard.querySelector('[role="img"][aria-label*="star"]');
    if (ratingElem) {
        const ariaLabel = ratingElem.getAttribute('aria-label');
        const match = ariaLabel.match(/([0-9.]+)/);
        if (match) {
            info.rating = parseFloat(match[1]);
            info.debug.push(`Found rating: ${info.rating}`);
        }
    }

    // Extract review count - look for "N reviews" pattern
    const numberPattern = /(\\d[\\d,\\.]*)\\s*(?:review|reseña|avis)/i;
    const match = cardText.match(numberPattern);

    if (match) {
        const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
        if (num > 0 && num < 1000000) {
            info.reviewCount = num;
            info.debug.push(`✓ Found review count: ${num}`);
        }
    } else {
        info.debug.push('No review count pattern found in card text');

        // Try checking individual child elements
        const allChildren = resultCard.querySelectorAll('*');
        info.debug.push(`Card has ${allChildren.length} child elements`);

        for (let child of allChildren) {
            const childText = child.textContent || '';
            if (childText.length < 100 && /review/i.test(childText)) {
                info.debug.push(`Element with "review": ${childText}`);

                const match = childText.match(numberPattern);
                if (match) {
                    const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
                    if (num > 0 && num < 1000000 && !info.reviewCount) {
                        info.reviewCount = num;
                        info.debug.push(`✓ Found via child element: ${num}`);
                    }
                }
            }
        }
    }

    return info;
""")

print("="*80)
print("EXTRACTION RESULTS:")
print("="*80)
print(f"Business Name: {result['businessName']}")
print(f"Rating: {result['rating']}")
print(f"Review Count: {result['reviewCount']}\n")

print("="*80)
print("DEBUG INFO:")
print("="*80)
for debug_line in result['debug']:
    print(f"  {debug_line}")

# Take a screenshot of the search results
screenshot_path = '/tmp/search_results.png'
driver.save_screenshot(screenshot_path)
print(f"\n✓ Screenshot saved to: {screenshot_path}")

driver.quit()