whyrating-engine-legacy/debug_tabs.py

#!/usr/bin/env python3
"""
Debug script to find review count on business detail page tabs.
"""
import time
from seleniumbase import Driver
from selenium.webdriver.common.by import By

driver = Driver(uc=True, headless=True)

url = "https://www.google.com/maps/search/?api=1&query=instinto+las+palmas&hl=en"
print(f"Navigating to: {url}")
driver.get(url)
time.sleep(3)

# Handle GDPR
if 'consent.google.com' in driver.current_url:
    form_btns = driver.find_elements(By.CSS_SELECTOR, 'form button')
    for btn in form_btns:
        if 'accept all' in (btn.text or '').lower():
            btn.click()
            time.sleep(3)
            break

time.sleep(5)
print(f"Current URL: {driver.current_url}\n")

# Extract tabs and review count
result = driver.execute_script("""
    const info = {
        tabs: [],
        reviewCount: null,
        allText: []
    };

    // Get all tabs
    const tabs = document.querySelectorAll('button[role="tab"]');
    tabs.forEach((tab, i) => {
        info.tabs.push({
            index: i,
            text: tab.textContent || '',
            ariaLabel: tab.getAttribute('aria-label') || ''
        });
    });

    // Look for review count patterns
    const reviewPattern = /\\((\\d[\\d,\\.]*)\\)/;
    const numberPattern = /(\\d[\\d,\\.]*)\\s*(?:review|reseña|avis)/i;

    for (let tab of tabs) {
        const text = tab.textContent || '';
        const ariaLabel = tab.getAttribute('aria-label') || '';

        let match = text.match(reviewPattern);
        if (!match) match = text.match(numberPattern);
        if (!match) match = ariaLabel.match(reviewPattern);
        if (!match) match = ariaLabel.match(numberPattern);

        if (match) {
            const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
            if (num > 0 && num < 1000000) {
                info.reviewCount = num;
                break;
            }
        }
    }

    // Also check all elements with "review" in text
    const allElements = document.querySelectorAll('*');
    for (let elem of allElements) {
        const text = (elem.textContent || '').trim();
        if (text.length > 0 && text.length < 150 && /review/i.test(text)) {
            if (!info.allText.includes(text)) {
                info.allText.push(text);
            }
        }
    }

    return info;
""")

print("="*80)
print("TABS FOUND:")
print("="*80)
for tab in result['tabs']:
    print(f"\nTab {tab['index']}:")
    print(f"  Text: {tab['text']}")
    print(f"  Aria-label: {tab['ariaLabel']}")

print(f"\n{'='*80}")
print(f"REVIEW COUNT EXTRACTED: {result['reviewCount']}")
print(f"{'='*80}\n")

print("="*80)
print("ALL TEXT CONTAINING 'review' (first 20):")
print("="*80)
for i, text in enumerate(result['allText'][:20], 1):
    print(f"{i}. {text}")

driver.quit()