whyrating-engine-legacy/debug_detail_page.py

#!/usr/bin/env python3
"""
Debug script - check detail page after auto-navigation for review count.
"""
import time
from seleniumbase import Driver
from selenium.webdriver.common.by import By

driver = Driver(uc=True, headless=True)

url = "https://www.google.com/maps/search/?api=1&query=soho+vilna+club&hl=en"
print(f"Navigating to: {url}")
driver.get(url)
time.sleep(2)

# Handle GDPR
if 'consent.google.com' in driver.current_url:
    print("Handling GDPR...")
    form_btns = driver.find_elements(By.CSS_SELECTOR, 'form button')
    for btn in form_btns:
        if 'accept all' in (btn.text or '').lower():
            btn.click()
            time.sleep(2)
            break

# Wait for auto-navigation to complete
print("Waiting for Google Maps to auto-navigate to business detail page...")
time.sleep(6)

print(f"Final URL: {driver.current_url}")
print(f"On detail page: {'/place/' in driver.current_url}\n")

# Dump ALL text on the page
all_text = driver.execute_script("return document.body.innerText;")

print("="*80)
print("SEARCHING FOR REVIEW NUMBERS IN PAGE TEXT:")
print("="*80)

# Find all numbers followed by "review"
import re
review_pattern = r'(\d[\d,\.]*)\s*(?:review|reseña|avis)'
matches = re.findall(review_pattern, all_text, re.IGNORECASE)

if matches:
    print(f"✓ Found {len(matches)} potential review count(s) in text:")
    for i, match in enumerate(matches, 1):
        num = match.replace(',', '').replace('.', '')
        print(f"  {i}. {match} ({num})")
else:
    print("✗ No review count found in page text")

# Check specific patterns in the text
print(f"\n{'='*80}")
print("PAGE TEXT ANALYSIS:")
print("="*80)

# Lines containing numbers
lines = all_text.split('\n')
number_lines = [line.strip() for line in lines if re.search(r'\d+', line) and len(line.strip()) < 100 and len(line.strip()) > 0]

print(f"Lines containing numbers (first 30):")
for i, line in enumerate(number_lines[:30], 1):
    print(f"  {i}. {line}")

# Now use JavaScript to find exact element
result = driver.execute_script("""
    const info = {
        foundIn: [],
        reviewCount: null
    };

    const numberPattern = /(\\d[\\d,\\.]*)\\s*(?:review|reseña|avis)/i;

    // Check ALL elements
    const allElements = document.querySelectorAll('*');

    for (let elem of allElements) {
        const text = elem.textContent || '';
        const ownText = elem.innerText || '';

        // Check both textContent and innerText
        for (let txt of [text, ownText]) {
            if (txt && txt.length < 200) {
                const match = txt.match(numberPattern);
                if (match) {
                    const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
                    if (num > 0 && num < 1000000) {
                        info.foundIn.push({
                            tag: elem.tagName,
                            class: elem.className,
                            id: elem.id,
                            role: elem.getAttribute('role'),
                            ariaLabel: elem.getAttribute('aria-label'),
                            text: txt.substring(0, 100),
                            number: num
                        });

                        if (!info.reviewCount) {
                            info.reviewCount = num;
                        }
                    }
                }
            }
        }
    }

    return info;
""")

print(f"\n{'='*80}")
print("JAVASCRIPT EXTRACTION:")
print("="*80)
print(f"Review Count Found: {result['reviewCount']}\n")

if result['foundIn']:
    print(f"Elements containing review numbers (first 15):")
    for i, elem in enumerate(result['foundIn'][:15], 1):
        print(f"\n{i}. <{elem['tag']}> Number: {elem['number']}")
        if elem['class']:
            print(f"   class: {elem['class'][:60]}")
        if elem['role']:
            print(f"   role: {elem['role']}")
        if elem['ariaLabel']:
            print(f"   aria-label: {elem['ariaLabel'][:80]}")
        print(f"   text: {elem['text']}")
else:
    print("No elements with review numbers found")

driver.quit()