From e3136281b847b676859f5518206601159c88b987 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Guti=C3=A9rrez?=
 <35082514+alezmad@users.noreply.github.com>
Date: Fri, 23 Jan 2026 17:59:09 +0000
Subject: [PATCH] Remove fast_scraper.py - consolidated into scraper_clean

All functionality now in scraper_clean.py:
- fast_scrape_reviews (main scraper)
- get_business_card_info (validation)

Updated health_checks.py to import from scraper_clean.

Removes 1,935 lines of duplicate/obsolete code.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 modules/fast_scraper.py  | 1935 --------------------------------------
 modules/health_checks.py |    2 +-
 2 files changed, 1 insertion(+), 1936 deletions(-)
 delete mode 100644 modules/fast_scraper.py

diff --git a/modules/fast_scraper.py b/modules/fast_scraper.py
deleted file mode 100644
index cb84533..0000000
--- a/modules/fast_scraper.py
+++ /dev/null
@@ -1,1935 +0,0 @@
-#!/usr/bin/env python3
-"""
-Fast DOM-only scraper module for API integration.
-Based on start_dom_only_fast.py - achieves ~18.9s for all reviews.
-
-This module provides a reusable function for the API server.
-"""
-import logging
-import time
-from typing import List, Dict, Any, Optional
-from seleniumbase import Driver
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.common.exceptions import TimeoutException
-
-log = logging.getLogger(__name__)
-
-
-def check_no_reviews_early(driver) -> tuple[bool, str]:
-    """
-    Early detection for 'no reviews available' scenarios.
-    Returns (has_no_reviews, reason) tuple.
-
-    Uses structural patterns instead of fragile CSS classes for robustness.
-    """
-    try:
-        # Check for common "no reviews" messages in multiple languages
-        no_review_patterns = [
-            'no reviews yet',
-            'be the first to review',
-            "there aren't any reviews",
-            'no hay reseñas',
-            'sin reseñas',
-            "pas encore d'avis",
-            'noch keine bewertungen',
-            'nessuna recensione',
-            'まだレビューがありません',
-            'sem avaliações',
-            'belum ada ulasan'
-        ]
-
-        # Get page text
-        page_text = driver.execute_script("return document.body.innerText.toLowerCase();")
-
-        # Check for "no reviews" messages
-        for pattern in no_review_patterns:
-            if pattern in page_text:
-                return True, f"Found 'no reviews' message: '{pattern}'"
-
-        # Check if review count is explicitly 0
-        # IMPORTANT: Be very specific to avoid false positives!
-        review_count_check = driver.execute_script("""
-            // Only check for EXACT "0 reviews" patterns, not loose matches
-            const patterns = [
-                /^0\\s+reviews?$/im,           // Exactly "0 reviews" on its own line
-                /\\(0\\s+reviews?\\)/i,         // "(0 reviews)"
-                /\\b0\\s+reviews?\\b/i           // "0 reviews" as a complete phrase
-            ];
-
-            const text = document.body.innerText;
-
-            // Split into lines and check each line independently to avoid false positives
-            const lines = text.split('\\n');
-            for (let line of lines) {
-                const trimmed = line.trim();
-                for (let pattern of patterns) {
-                    if (pattern.test(trimmed)) {
-                        // Double-check: line should be short (not a review text itself)
-                        if (trimmed.length < 50) {
-                            return 'Found explicit "0 reviews" text: ' + trimmed;
-                        }
-                    }
-                }
-            }
-
-            return null;
-        """)
-
-        if review_count_check:
-            return True, review_count_check
-
-        # Check if reviews tab is disabled or not clickable
-        reviews_disabled = driver.execute_script("""
-            const tabs = document.querySelectorAll('button[role="tab"]');
-            for (let tab of tabs) {
-                const text = (tab.textContent || '').toLowerCase();
-                const aria = (tab.getAttribute('aria-label') || '').toLowerCase();
-
-                if (text.includes('review') || aria.includes('review')) {
-                    if (tab.disabled || tab.getAttribute('aria-disabled') === 'true') {
-                        return 'Reviews tab is disabled';
-                    }
-                }
-            }
-            return null;
-        """)
-
-        if reviews_disabled:
-            return True, reviews_disabled
-
-        return False, ""
-
-    except Exception as e:
-        log.warning(f"Error in early no-reviews detection: {e}")
-        return False, ""
-
-
-def extract_total_review_count(driver) -> Optional[int]:
-    """
-    Extract the total number of reviews from the Google Maps page.
-    Looks for text patterns like "500 reviews" in various elements.
-    Works on both search results pages and business detail pages.
-
-    Returns:
-        Total review count or None if not found
-    """
-    extract_script = """
-    // Optimized review count extraction - removed verbose logging for speed
-    let total = null;
-
-    const parenthesesPattern = /\\((\\d[\\d,\\.\\s]*)\\)/;
-    const numberPattern = /(\\d[\\d,\\.\\s]*)\\s*(?:review|reseña|avis|recensione|Bewertung|レビュー)/i;
-
-    // PRIORITY 1: Search results page
-    const searchResultsSelectors = [
-        'a[href*="reviews"]',
-        '[role="article"] span',
-        '[role="article"] a',
-        'div.fontBodyMedium',
-        'span.UY7F9',
-    ];
-
-    for (const selector of searchResultsSelectors) {
-        const elements = document.querySelectorAll(selector);
-        for (let i = 0; i < Math.min(elements.length, 20); i++) {
-            const elem = elements[i];
-            const text = elem.textContent || '';
-            const href = elem.getAttribute('href') || '';
-
-            let match = text.match(numberPattern);
-            if (match) {
-                const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
-                if (num > 0 && num < 1000000) {
-                    total = num;
-                    break;
-                }
-            }
-
-            if (href.includes('reviews')) {
-                match = text.match(/(\\d[\\d,\\.\\s]*)/);
-                if (match) {
-                    const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
-                    if (num > 0 && num < 1000000) {
-                        total = num;
-                        break;
-                    }
-                }
-            }
-        }
-        if (total) break;
-    }
-
-    // PRIORITY 2: Tab buttons (business detail page)
-    if (!total) {
-        const buttons = document.querySelectorAll('button[role="tab"]');
-        for (let i = 0; i < buttons.length; i++) {
-            const text = buttons[i].textContent || '';
-            let match = text.match(parenthesesPattern);
-            if (match) {
-                const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
-                total = num;
-                break;
-            }
-            match = text.match(numberPattern);
-            if (match) {
-                const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
-                total = num;
-                break;
-            }
-        }
-    }
-
-    // PRIORITY 3: Aria-labels
-    if (!total) {
-        const elements = document.querySelectorAll('[aria-label]');
-        for (let elem of elements) {
-            const ariaLabel = elem.getAttribute('aria-label') || '';
-            let match = ariaLabel.match(parenthesesPattern);
-            if (match) {
-                const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
-                total = num;
-                break;
-            }
-            match = ariaLabel.match(numberPattern);
-            if (match) {
-                const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
-                total = num;
-                break;
-            }
-        }
-    }
-
-    // PRIORITY 4: Fallback - entire page text
-    if (!total) {
-        const match = document.body.innerText.match(parenthesesPattern);
-        if (match) {
-            const num = parseInt(match[1].replace(/[,\\.\\s]/g, ''));
-            if (num > 0 && num < 1000000) {
-                total = num;
-            }
-        }
-    }
-
-    return total;
-    """
-
-    try:
-        total = driver.execute_script(extract_script)
-
-        # Get debug info from JavaScript
-        debug_script = """
-        const info = {
-            search_results_count: document.querySelectorAll('[role="article"]').length,
-            links_with_reviews: document.querySelectorAll('a[href*="reviews"]').length,
-            page_url: window.location.href,
-            page_title: document.title,
-            sample_texts: []
-        };
-
-        // Get sample text from links that might contain reviews
-        const reviewLinks = document.querySelectorAll('a[href*="reviews"]');
-        for (let i = 0; i < Math.min(5, reviewLinks.length); i++) {
-            info.sample_texts.push(reviewLinks[i].textContent.substring(0, 100));
-        }
-
-        // Also check for text containing "review" keyword
-        const allText = document.body.innerText.substring(0, 2000);
-        const reviewMatches = allText.match(/\\d+[\\s,\\.]*(?:review|reseña|avis)/gi);
-        if (reviewMatches) {
-            info.review_patterns_found = reviewMatches.slice(0, 5);
-        }
-
-        return info;
-        """
-        debug_info = driver.execute_script(debug_script)
-        log.info(f"Page debug: URL={debug_info.get('page_url')}")
-        log.info(f"Page debug: Found {debug_info.get('search_results_count')} search result articles")
-        log.info(f"Page debug: Found {debug_info.get('links_with_reviews')} links containing 'reviews'")
-        if debug_info.get('review_patterns_found'):
-            log.info(f"Page debug: Review patterns in text: {debug_info.get('review_patterns_found')}")
-        if debug_info.get('sample_texts'):
-            log.info(f"Page debug: Sample link texts: {debug_info.get('sample_texts')}")
-
-        if total and total > 0:
-            log.info(f"Extracted total review count: {total}")
-            return total
-        else:
-            log.warning(f"Could not extract total review count from page. Debug: {debug_info}")
-            return None
-    except Exception as e:
-        log.error(f"Error extracting total review count: {e}")
-        return None
-
-
-def extract_all_reviews_js(driver) -> List[Dict[str, Any]]:
-    """Extract ALL reviews using JavaScript - single fast operation."""
-
-    extract_script = """
-    const reviews = [];
-
-    // ROBUST SELECTOR STRATEGY: Try known selectors first, then fall back to structural matching
-    let elements = null;
-
-    // STRATEGY 1: Try known CSS selectors (fast path)
-    const knownSelectors = [
-        'div.jftiEf.fontBodyMedium',
-        'div.jftiEf',
-        'div[data-review-id]',
-        'div[jsaction*="review"]'
-    ];
-
-    for (let selector of knownSelectors) {
-        const found = document.querySelectorAll(selector);
-        if (found.length > 0) {
-            elements = found;
-            console.log('Found', found.length, 'reviews using known selector:', selector);
-            break;
-        }
-    }
-
-    // STRATEGY 2: Structural matching for unknown page layouts
-    // IMPORTANT: Search only within the reviews pane, not the entire page!
-    if (!elements || elements.length === 0) {
-        console.log('Known selectors failed, trying structural matching...');
-
-        // Find the reviews pane first
-        const pane = document.querySelector('div.m6QErb.WNBkOb.XiKgde') ||
-                     document.querySelector('div.m6QErb') ||
-                     document.querySelector('div[role="main"]');
-
-        if (!pane) {
-            console.warn('No reviews pane found');
-            return [];
-        }
-
-        // Find all divs that LOOK like reviews (have review structure) WITHIN the pane
-        const allDivs = pane.querySelectorAll('div');
-        const reviewElements = [];
-
-        for (let div of allDivs) {
-            // Skip if too small
-            if (div.children.length < 2) continue;
-
-            // Check for review indicators
-            const hasAuthor = div.querySelector('[aria-label*="photo" i], img[src*="photo"], img[src*="avatar"]');
-            const hasRating = div.querySelector('[aria-label*="star" i], [aria-label*="rating" i], span[role="img"]');
-            const hasText = Array.from(div.querySelectorAll('span')).some(s => s.textContent.length > 20);
-            const hasDate = div.textContent.match(/\\d+\\s*(day|week|month|year|día|semana|mes|año|dienų|savaitės)/i);
-
-            // Must have at least author, rating, and text to be a review
-            const indicators = [hasAuthor, hasRating, hasText, hasDate].filter(Boolean).length;
-            if (indicators >= 3) {
-                reviewElements.push(div);
-            }
-        }
-
-        if (reviewElements.length > 0) {
-            elements = reviewElements;
-            console.log('Found', reviewElements.length, 'reviews using structural matching');
-        }
-    }
-
-    // STRATEGY 3: Try role="article" as last resort (within pane)
-    if (!elements || elements.length === 0) {
-        const pane = document.querySelector('div.m6QErb.WNBkOb.XiKgde') ||
-                     document.querySelector('div.m6QErb') ||
-                     document.querySelector('div[role="main"]');
-
-        if (pane) {
-            const articles = pane.querySelectorAll('[role="article"]');
-            const validArticles = [];
-
-            for (let article of articles) {
-                const hasRating = article.querySelector('[aria-label*="star" i]');
-                const hasText = article.textContent.length > 30;
-                if (hasRating && hasText) {
-                    validArticles.push(article);
-                }
-            }
-
-            if (validArticles.length > 0) {
-                elements = validArticles;
-                console.log('Found', validArticles.length, 'reviews using role=article');
-            }
-        }
-    }
-
-    if (!elements || elements.length === 0) {
-        console.warn('No review elements found with any strategy');
-        return [];
-    }
-
-    for (let i = 0; i < elements.length; i++) {
-        const elem = elements[i];
-        const review = {};
-
-        try {
-            // Author
-            const authorElem = elem.querySelector('div.d4r55');
-            review.author = authorElem ? authorElem.textContent.trim() : null;
-
-            // Rating
-            const ratingElem = elem.querySelector('span.kvMYJc');
-            if (ratingElem) {
-                const ariaLabel = ratingElem.getAttribute('aria-label');
-                if (ariaLabel) {
-                    const match = ariaLabel.match(/\\d+/);
-                    review.rating = match ? parseFloat(match[0]) : null;
-                }
-            }
-
-            // Text
-            const textElem = elem.querySelector('span.wiI7pd');
-            review.text = textElem ? textElem.textContent.trim() : null;
-
-            // Date
-            const dateElem = elem.querySelector('span.rsqaWe');
-            review.date_text = dateElem ? dateElem.textContent.trim() : null;
-
-            // DEEP DIVE: Find where Google stores the actual timestamp
-            review.timestamp = null;
-            review.debug_date_info = {};
-
-            if (dateElem) {
-                // 1. Check all attributes on date element
-                const allAttrs = {};
-                for (let attr of dateElem.attributes) {
-                    allAttrs[attr.name] = attr.value;
-                }
-                review.debug_date_info.date_elem_attrs = allAttrs;
-
-                // 2. Check parent elements for data
-                let parent = dateElem.parentElement;
-                let parentLevel = 0;
-                while (parent && parentLevel < 3) {
-                    const parentAttrs = {};
-                    for (let attr of parent.attributes) {
-                        if (attr.name.includes('data') || attr.name.includes('time') || attr.name.includes('date')) {
-                            parentAttrs[attr.name] = attr.value;
-                        }
-                    }
-                    if (Object.keys(parentAttrs).length > 0) {
-                        review.debug_date_info[`parent_${parentLevel}_attrs`] = parentAttrs;
-                    }
-                    parent = parent.parentElement;
-                    parentLevel++;
-                }
-
-                // 3. Check the entire review container for hidden data
-                const reviewContainer = elem;
-                const containerAttrs = {};
-                for (let attr of reviewContainer.attributes) {
-                    containerAttrs[attr.name] = attr.value;
-                }
-                review.debug_date_info.container_attrs = containerAttrs;
-
-                // 4. Look for script tags or JSON data near the date
-                const nearbyScripts = elem.querySelectorAll('script');
-                if (nearbyScripts.length > 0) {
-                    review.debug_date_info.has_nearby_scripts = nearbyScripts.length;
-                }
-
-                // 5. Check for any element with 'time' in class or data
-                const timeElements = elem.querySelectorAll('[class*="time"], [data-timestamp], [datetime]');
-                if (timeElements.length > 0) {
-                    const timeData = [];
-                    timeElements.forEach(el => {
-                        timeData.push({
-                            tag: el.tagName,
-                            classes: el.className,
-                            datetime: el.getAttribute('datetime'),
-                            timestamp: el.getAttribute('data-timestamp'),
-                            text: el.textContent.substring(0, 50)
-                        });
-                    });
-                    review.debug_date_info.time_elements = timeData;
-                }
-            }
-
-            // Avatar
-            const avatarElem = elem.querySelector('img.NBa7we');
-            review.avatar_url = avatarElem ? avatarElem.src : null;
-
-            // Profile URL
-            const profileElem = elem.querySelector('button.WEBjve');
-            review.profile_url = profileElem ? profileElem.getAttribute('data-review-id') : null;
-
-            if (review.author && review.date_text) {
-                reviews.push(review);
-            }
-        } catch (e) {
-            // Skip this review
-        }
-    }
-
-    return reviews;
-    """
-
-    # ADDITIONAL: Check for Google's internal state/data objects
-    check_state_script = """
-    // Look for Google Maps' internal data stores
-    const debugInfo = {
-        global_keys: [],
-        app_data: null,
-        window_data: null
-    };
-
-    // Check window object for Google Maps data
-    for (let key in window) {
-        if (key.includes('google') || key.includes('maps') || key.includes('APP') || key.includes('_')) {
-            debugInfo.global_keys.push(key);
-        }
-    }
-
-    // Check for common React/Angular state keys
-    const stateKeys = ['__INITIAL_STATE__', '__NEXT_DATA__', '__APP_STATE__', 'APP_INITIALIZATION_STATE'];
-    for (let key of stateKeys) {
-        if (window[key]) {
-            debugInfo.app_data = key;
-        }
-    }
-
-    // Check for embedded JSON in script tags
-    const scriptTags = document.querySelectorAll('script[type="application/json"], script[type="application/ld+json"]');
-    debugInfo.json_scripts_count = scriptTags.length;
-    if (scriptTags.length > 0) {
-        debugInfo.json_scripts_sample = Array.from(scriptTags).slice(0, 2).map(s => s.textContent.substring(0, 200));
-    }
-
-    return debugInfo;
-    """
-
-    try:
-        reviews_data = driver.execute_script(extract_script)
-        state_debug = driver.execute_script(check_state_script)
-
-        # Log the global state debug info
-        log.info(f"Google Maps state debug: {state_debug}")
-
-        # Add review IDs
-        reviews = []
-        for i, review_data in enumerate(reviews_data):
-            review_id = f"review_{hash(review_data['author'] + review_data['date_text'])}"
-            review_data['review_id'] = review_id
-
-            # Add global state debug to first review only
-            if i == 0:
-                review_data['_google_state_debug'] = state_debug
-
-            reviews.append(review_data)
-
-        return reviews
-
-    except Exception as e:
-        log.error(f"Error in JavaScript extraction: {e}")
-        return []
-
-
-def fast_scrape_reviews(url: str, headless: bool = False, max_scrolls: int = 999999, progress_callback=None, driver=None, return_driver: bool = False) -> Dict[str, Any]:
-    """
-    Ultra-fast DOM-only scraping with JavaScript extraction.
-
-    Args:
-        url: Google Maps URL to scrape
-        headless: Run Chrome in headless mode (default: True)
-        max_scrolls: Maximum scrolls safety limit (default: 999999 - effectively unlimited)
-                    The scraper stops automatically via idle detection when no new reviews load.
-        progress_callback: Optional callback function(current_count, total_count) for progress updates
-        driver: Existing driver instance to reuse (from worker pool)
-        return_driver: If True, don't close driver and return it in result
-
-    Returns:
-        Dictionary with:
-            - reviews: List of review dictionaries
-            - count: Total number of reviews scraped
-            - total_reviews: Total reviews available (from page counter)
-            - time: Time taken in seconds
-            - success: True if successful, False otherwise
-            - error: Error message if failed
-            - driver: Driver instance (if return_driver=True)
-    """
-    start_time = time.time()
-
-    log.info(f"Starting fast scrape for URL: {url[:80]}...")
-
-    # Force English locale for consistent date parsing
-    # English gives cleaner date formats: "3 months ago" vs "Hace 3 meses"
-    # Store original URL in case we need to retry without locale override
-    original_url = url
-    locale_override_applied = False
-
-    if 'hl=' in url:
-        # Replace existing locale
-        url = url.replace('hl=es', 'hl=en').replace('hl=pt', 'hl=en').replace('hl=fr', 'hl=en')
-        locale_override_applied = True
-    else:
-        # Add English locale parameter
-        separator = '&' if '?' in url else '?'
-        url = f"{url}{separator}hl=en"
-        locale_override_applied = True
-
-    log.info(f"Using English locale (hl=en) for consistent date parsing")
-
-    # Track if driver was provided or created
-    driver_provided = driver is not None
-    should_close_driver = not return_driver and not driver_provided
-
-    # Initialize driver with custom user agent to avoid headless detection
-    # Even with headless=False + Xvfb, Chromium still reports as HeadlessChrome
-    if not driver:
-        driver = Driver(
-            uc=True,
-            headless=headless,
-            page_load_strategy="normal",
-            agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
-        )
-
-    try:
-        # Navigate
-        driver.get(url)
-        time.sleep(1.5)
-
-        # Handle GDPR consent page (CRITICAL FIX for headless mode!)
-        if 'consent.google.com' in driver.current_url:
-            try:
-                # Find all form buttons and click "Accept all" / "Aceptar todo"
-                form_btns = driver.find_elements(By.CSS_SELECTOR, 'form button')
-                for btn in form_btns:
-                    btn_text = (btn.text or '').lower()
-                    if 'aceptar todo' in btn_text or 'accept all' in btn_text:
-                        log.info(f"Clicking GDPR consent: {btn.text}")
-                        btn.click()
-                        time.sleep(2)
-                        break
-                else:
-                    # Fallback: click second button (usually "Accept all")
-                    if len(form_btns) >= 2:
-                        log.info("Using fallback: clicking second form button")
-                        form_btns[1].click()
-                        time.sleep(2)
-            except Exception as e:
-                log.warning(f"GDPR consent handling failed: {e}")
-
-        # Dismiss cookie banner on Maps page
-        try:
-            cookie_btns = driver.find_elements(By.CSS_SELECTOR,
-                'button[aria-label*="Accept" i],button[aria-label*="Aceptar" i]')
-            if cookie_btns:
-                cookie_btns[0].click()
-                time.sleep(0.3)
-        except:
-            pass
-
-        # Click reviews tab with retry logic (important for containers)
-        review_keywords = ['reviews', 'review', 'reseñas', 'reseña']
-        reviews_tab_clicked = False
-
-        # Try multiple times to find and click reviews tab
-        for attempt in range(3):
-            if reviews_tab_clicked:
-                break
-
-            time.sleep(0.5)  # Wait between attempts
-
-            for selector in ['button[role="tab"]', '.LRkQ2', 'button']:
-                try:
-                    tabs = driver.find_elements(By.CSS_SELECTOR, selector)
-                    for tab in tabs:
-                        text = (tab.text or '').lower()
-                        aria = (tab.get_attribute('aria-label') or '').lower()
-
-                        if any(kw in text or kw in aria for kw in review_keywords):
-                            log.info(f"Clicking reviews tab: {tab.text or aria[:30]}")
-                            driver.execute_script("arguments[0].click();", tab)
-                            time.sleep(1.5)  # Wait for tab to load
-                            reviews_tab_clicked = True
-                            break
-
-                    if reviews_tab_clicked:
-                        break
-                except Exception as e:
-                    log.debug(f"Tab search attempt {attempt+1} with {selector}: {e}")
-                    continue
-
-        if not reviews_tab_clicked:
-            log.warning("Could not find reviews tab with hl=en locale")
-
-            # FALLBACK: If locale override was applied and tab not found,
-            # retry without locale override (fixes regional pages where hl=en breaks tabs)
-            if locale_override_applied:
-                log.info("Retrying without locale override to find reviews tab...")
-
-                # Reload page with original URL (no hl=en)
-                driver.get(original_url)
-                time.sleep(1.5)
-
-                # Handle GDPR again if needed
-                if 'consent.google.com' in driver.current_url:
-                    try:
-                        form_btns = driver.find_elements(By.CSS_SELECTOR, 'form button')
-                        for btn in form_btns:
-                            btn_text = (btn.text or '').lower()
-                            if any(keyword in btn_text for keyword in ['accept', 'aceptar', 'priim', 'принять', 'accepter']):
-                                log.info(f"Clicking GDPR consent: {btn.text}")
-                                btn.click()
-                                time.sleep(2)
-                                break
-                        else:
-                            if len(form_btns) >= 2:
-                                log.info("Using fallback: clicking second form button")
-                                form_btns[1].click()
-                                time.sleep(2)
-                    except Exception as e:
-                        log.warning(f"GDPR consent handling failed: {e}")
-
-                # Dismiss cookie banner
-                try:
-                    cookie_btns = driver.find_elements(By.CSS_SELECTOR,
-                        'button[aria-label*="Accept" i],button[aria-label*="Aceptar" i]')
-                    if cookie_btns:
-                        cookie_btns[0].click()
-                        time.sleep(0.3)
-                except:
-                    pass
-
-                # Try to find reviews tab with multilingual keywords
-                multilingual_keywords = [
-                    'review', 'reviews',           # English
-                    'reseña', 'reseñas',           # Spanish
-                    'avis',                        # French
-                    'bewertung', 'bewertungen',    # German
-                    'recensione', 'recensioni',    # Italian
-                    'レビュー',                      # Japanese
-                    'avaliação', 'avaliações',     # Portuguese
-                    'отзыв', 'отзывы',             # Russian
-                    'atsiliepimai', 'atsiliepi',   # Lithuanian
-                    'ulasan',                      # Indonesian
-                    '리뷰'                          # Korean
-                ]
-
-                for attempt in range(3):
-                    if reviews_tab_clicked:
-                        break
-
-                    time.sleep(0.5)
-
-                    for selector in ['button[role="tab"]', '.LRkQ2', 'button']:
-                        try:
-                            tabs = driver.find_elements(By.CSS_SELECTOR, selector)
-                            for tab in tabs:
-                                text = (tab.text or '').lower()
-                                aria = (tab.get_attribute('aria-label') or '').lower()
-
-                                if any(kw in text or kw in aria for kw in multilingual_keywords):
-                                    log.info(f"Clicking reviews tab (native locale): {tab.text or aria[:30]}")
-                                    driver.execute_script("arguments[0].click();", tab)
-                                    time.sleep(1.5)
-                                    reviews_tab_clicked = True
-                                    break
-
-                            if reviews_tab_clicked:
-                                break
-                        except Exception as e:
-                            log.debug(f"Native locale tab search attempt {attempt+1} with {selector}: {e}")
-                            continue
-
-                if not reviews_tab_clicked:
-                    log.warning("Could not find reviews tab even without locale override")
-
-        # Wait for reviews section to load
-        time.sleep(2)
-
-        # EARLY DETECTION: Check if there are no reviews before attempting to scrape
-        no_reviews, reason = check_no_reviews_early(driver)
-        if no_reviews:
-            log.info(f"Early detection: No reviews available. Reason: {reason}")
-            return {
-                "reviews": [],
-                "count": 0,
-                "total_reviews": 0,
-                "time": time.time() - start_time,
-                "success": True,
-                "message": f"No reviews available: {reason}"
-            }
-
-        # Extract total review count from the page
-        total_reviews = extract_total_review_count(driver)
-
-        # Double-check: If extracted count is 0, return early
-        if total_reviews == 0:
-            log.info("Total review count is 0, skipping scraping")
-            return {
-                "reviews": [],
-                "count": 0,
-                "total_reviews": 0,
-                "time": time.time() - start_time,
-                "success": True,
-                "message": "Business has 0 reviews"
-            }
-
-        # Report initial progress with total count
-        if progress_callback and total_reviews:
-            try:
-                progress_callback(0, total_reviews)
-            except Exception as e:
-                log.warning(f"Progress callback failed: {e}")
-
-        # Find scrollable pane - try multiple selectors (container-friendly)
-        pane = None
-        pane_selectors = [
-            'div[role="main"] div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde',
-            'div.m6QErb.WNBkOb.XiKgde',
-            'div.m6QErb',  # Fallback to more general selector
-            'div[role="main"]',
-        ]
-
-        wait = WebDriverWait(driver, 5)
-        for selector in pane_selectors:
-            try:
-                pane = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, selector)))
-                log.info(f"Found pane with selector: {selector}")
-                break
-            except TimeoutException:
-                continue
-
-        if not pane:
-            error_msg = "Could not find scrollable pane after trying all selectors"
-            log.error(error_msg)
-            return {
-                "reviews": [],
-                "count": 0,
-                "total_reviews": total_reviews,
-                "time": time.time() - start_time,
-                "success": False,
-                "error": error_msg
-            }
-
-        # Wait longer for initial reviews to load (containers can be slower)
-        time.sleep(2)
-
-        # Setup scroll
-        driver.execute_script("window.scrollablePane = arguments[0];", pane)
-        scroll_script = "window.scrollablePane.scrollBy(0, window.scrollablePane.scrollHeight);"
-
-        # Trigger initial scroll and verify reviews are loading
-        driver.execute_script(scroll_script)
-        time.sleep(0.8)
-
-        # Also scroll the main window (helps in some cases, especially containers)
-        driver.execute_script("window.scrollBy(0, 500);")
-        time.sleep(0.5)
-
-        # JavaScript function to count reviews using ROBUST structural patterns
-        # Instead of relying on CSS classes, we look for containers with review-like structure
-        count_reviews_script = """
-        // STRATEGY 1: Try known selectors first (fast path)
-        const knownSelectors = [
-            'div.jftiEf.fontBodyMedium',
-            'div.jftiEf',
-            'div[data-review-id]',
-            'div[jsaction*="review"]'
-        ];
-
-        for (let selector of knownSelectors) {
-            const found = document.querySelectorAll(selector);
-            if (found.length > 0) {
-                return found.length;
-            }
-        }
-
-        // STRATEGY 2: Structural pattern matching (robust, class-agnostic)
-        // Find containers that LOOK like reviews (have author + rating + text structure)
-        // IMPORTANT: Search only within the reviews pane, not the entire page!
-        const findReviewsByStructure = () => {
-            // Find the reviews pane first
-            const pane = document.querySelector('div.m6QErb.WNBkOb.XiKgde') ||
-                         document.querySelector('div.m6QErb') ||
-                         document.querySelector('div[role="main"]');
-
-            if (!pane) return 0;
-
-            // Search only within the pane
-            const allDivs = pane.querySelectorAll('div');
-            let reviewCount = 0;
-
-            for (let div of allDivs) {
-                // Skip if too small (reviews have substantial content)
-                if (div.children.length < 2) continue;
-
-                // Look for review indicators:
-                // - Has an author name (usually in a span/div with small text)
-                // - Has a rating (span with aria-label containing "star" or "rating")
-                // - Has review text (span/div with longer text content)
-
-                const hasAuthor = div.querySelector('[aria-label*="photo" i], img[src*="photo"], img[src*="avatar"]');
-                const hasRating = div.querySelector('[aria-label*="star" i], [aria-label*="rating" i], span[role="img"]');
-                const hasText = Array.from(div.querySelectorAll('span')).some(s => s.textContent.length > 20);
-                const hasDate = div.textContent.match(/\\d+\\s*(day|week|month|year|día|semana|mes|año|jour|mois|année|dienų|savaitės)/i);
-
-                // If it has at least 3 of these indicators, it's likely a review
-                const indicators = [hasAuthor, hasRating, hasText, hasDate].filter(Boolean).length;
-                if (indicators >= 3) {
-                    reviewCount++;
-                }
-            }
-
-            return reviewCount > 0 ? reviewCount : 0;
-        };
-
-        // STRATEGY 3: Look for role="article" with review-like content (within pane)
-        const pane3 = document.querySelector('div.m6QErb.WNBkOb.XiKgde') ||
-                      document.querySelector('div.m6QErb') ||
-                      document.querySelector('div[role="main"]');
-        if (pane3) {
-            const articles = pane3.querySelectorAll('[role="article"]');
-            if (articles.length > 0) {
-                let validArticles = 0;
-                for (let article of articles) {
-                    // Check if article looks like a review (has rating + text)
-                    const hasRating = article.querySelector('[aria-label*="star" i]');
-                    const hasText = article.textContent.length > 30;
-                    if (hasRating && hasText) {
-                        validArticles++;
-                    }
-                }
-                if (validArticles > 0) return validArticles;
-            }
-        }
-
-        // Try structural matching as last resort
-        const structuralCount = findReviewsByStructure();
-        return structuralCount;
-        """
-
-        # Check if reviews are actually loading
-        initial_count = driver.execute_script(count_reviews_script)
-
-        if initial_count < 5:
-            # Reviews not loaded yet, wait more and try alternative scrolling
-            log.info(f"Waiting for reviews to load (found {initial_count})...")
-
-            # Try clicking on the pane to focus it
-            try:
-                driver.execute_script("arguments[0].click();", pane)
-                time.sleep(0.5)
-            except:
-                pass
-
-            # Scroll both pane and window
-            driver.execute_script(scroll_script)
-            driver.execute_script("window.scrollBy(0, 500);")
-            time.sleep(1.5)
-
-            initial_count = driver.execute_script(count_reviews_script)
-
-            log.info(f"After extra waiting: {initial_count} reviews")
-
-        log.info(f"Scrolling to load all reviews (starting with {initial_count})...")
-
-        # Fast scrolling to load all DOM elements
-        last_count = 0
-        idle_count = 0
-
-        for i in range(max_scrolls):
-            # Scroll to load more
-            prev_count = driver.execute_script(count_reviews_script)
-            driver.execute_script(scroll_script)
-
-            # SMART WAIT: Wait until new reviews actually load
-            max_wait = 1.2
-            wait_step = 0.05
-            waited = 0
-
-            while waited < max_wait:
-                time.sleep(wait_step)
-                waited += wait_step
-
-                current_count = driver.execute_script(count_reviews_script)
-
-                # If reviews loaded, continue immediately!
-                if current_count > prev_count:
-                    idle_count = 0  # Reset idle counter
-                    break
-
-                # Give Google Maps more time to lazy-load (0.6s instead of 0.3s)
-                # Only exit early if we're confident nothing is loading
-                if waited >= 0.6 and current_count == prev_count:
-                    break
-
-            # Track consecutive idle scrolls
-            if current_count == prev_count:
-                idle_count += 1
-                # Be VERY patient: wait for 12 consecutive idle scrolls to ensure we get ALL reviews
-                # (each with up to 1.2s wait = ~14.4s total idle time before giving up)
-                # This ensures Google Maps has plenty of time to lazy-load all content
-                if idle_count >= 12:
-                    log.info(f"Reached end at {current_count} reviews (12 consecutive idle scrolls)")
-                    # Double-check we got all reviews if we know the total
-                    if total_reviews and current_count < total_reviews:
-                        log.warning(f"Only got {current_count}/{total_reviews} reviews ({current_count/total_reviews*100:.1f}%). Some may be hidden or loading slowly.")
-                    break
-
-            # Progress logging and callback every 5 scrolls
-            if (i + 1) % 5 == 0:
-                log.info(f"{current_count} review elements loaded...")
-                if progress_callback and total_reviews:
-                    try:
-                        progress_callback(current_count, total_reviews)
-                    except Exception as e:
-                        log.warning(f"Progress callback failed: {e}")
-
-            # Aggressive memory management every 20 scrolls
-            if (i + 1) % 20 == 0:
-                try:
-                    # Clear console logs to prevent buildup
-                    driver.execute_script("console.clear();")
-
-                    # Force garbage collection in browser
-                    driver.execute_script("""
-                        if (window.gc) { window.gc(); }
-                        // Remove image srcs to free memory (images reload on demand)
-                        document.querySelectorAll('img').forEach(img => {
-                            if (img.complete && !img.classList.contains('needed')) {
-                                img.removeAttribute('src');
-                            }
-                        });
-                    """)
-
-                    # Brief pause to let Chrome breathe
-                    time.sleep(0.1)
-                except Exception:
-                    pass  # Ignore if fails
-
-            last_count = current_count
-
-        # Shorter final scroll
-        for _ in range(2):
-            driver.execute_script(scroll_script)
-            time.sleep(0.3)
-
-        scroll_time = time.time() - start_time
-        log.info(f"Scrolling complete in {scroll_time:.2f}s")
-
-        # Update progress: scrolling done, starting extraction
-        if progress_callback and total_reviews:
-            try:
-                progress_callback(current_count, total_reviews)
-            except Exception as e:
-                log.warning(f"Progress callback failed: {e}")
-
-        # Extract ALL reviews using JavaScript (fast!)
-        log.info("Extracting reviews with JavaScript...")
-        extract_start = time.time()
-
-        all_reviews = extract_all_reviews_js(driver)
-
-        extract_time = time.time() - extract_start
-        log.info(f"Extraction complete in {extract_time:.2f}s")
-
-        # Final progress update with actual extracted count
-        if progress_callback and total_reviews:
-            try:
-                progress_callback(len(all_reviews), total_reviews)
-            except Exception as e:
-                log.warning(f"Progress callback failed: {e}")
-
-        elapsed = time.time() - start_time
-
-        log.info(f"Fast scrape completed: {len(all_reviews)} reviews in {elapsed:.2f}s")
-
-        result = {
-            "reviews": all_reviews,
-            "count": len(all_reviews),
-            "total_reviews": total_reviews,
-            "time": elapsed,
-            "scroll_time": scroll_time,
-            "extract_time": extract_time,
-            "success": True,
-            "error": None
-        }
-
-        if return_driver:
-            result["driver"] = driver
-
-        return result
-
-    except Exception as e:
-        elapsed = time.time() - start_time
-        error_msg = f"Fast scrape failed: {str(e)}"
-        log.error(error_msg)
-
-        # Check if this is a tab crash - try to extract what we have
-        partial_reviews = []
-        is_tab_crash = "tab crashed" in str(e).lower() or "session deleted" in str(e).lower()
-
-        if is_tab_crash and driver:
-            log.warning("Detected tab crash - attempting to extract partial reviews from DOM before crash...")
-            try:
-                # Try to extract reviews that were loaded before crash
-                partial_reviews = extract_all_reviews_js(driver)
-                log.info(f"Recovered {len(partial_reviews)} reviews from crashed session")
-            except Exception as recovery_error:
-                log.error(f"Could not recover reviews: {recovery_error}")
-
-        # Return partial results if we got any
-        if partial_reviews:
-            result = {
-                "reviews": partial_reviews,
-                "count": len(partial_reviews),
-                "total_reviews": None,
-                "time": elapsed,
-                "success": False,  # Mark as failed but with partial data
-                "error": f"{error_msg} (recovered {len(partial_reviews)} reviews)",
-                "partial": True
-            }
-        else:
-            result = {
-                "reviews": [],
-                "count": 0,
-                "total_reviews": None,
-                "time": elapsed,
-                "success": False,
-                "error": error_msg
-            }
-
-        if return_driver:
-            result["driver"] = driver
-
-        return result
-
-    finally:
-        if should_close_driver and driver:
-            try:
-                driver.quit()
-            except:
-                pass
-
-
-def get_business_card_info(url: str, headless: bool = True, driver=None, return_driver: bool = False) -> Dict[str, Any]:
-    """
-    Extract business card information from Google Maps.
-    Uses the same reliable navigation logic as the main scraper.
-
-    Returns business card with:
-    - name
-    - address
-    - rating (float)
-    - total_reviews (int)
-    - success/error
-    """
-    import time as timing_module
-    start_time = timing_module.time()
-    log.info(f"[PROFILE] Getting business card info for: {url}")
-
-    driver_provided = driver is not None
-    should_close_driver = not return_driver and not driver_provided
-
-    try:
-        # Initialize driver if not provided
-        t0 = timing_module.time()
-        if not driver:
-            driver = Driver(
-                uc=True,
-                headless=headless,
-                page_load_strategy="normal",
-                agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
-            )
-            log.info(f"[PROFILE] Driver initialization: {timing_module.time() - t0:.2f}s")
-        else:
-            log.info(f"[PROFILE] Using pooled driver (0.00s)")
-
-        # Force English locale AND US region for consistent parsing/results
-        # This helps avoid geolocation-based variations in Google Maps results
-        if 'hl=' in url:
-            url = url.replace('hl=es', 'hl=en').replace('hl=pt', 'hl=en').replace('hl=fr', 'hl=en')
-        else:
-            separator = '&' if '?' in url else '?'
-            url = f"{url}{separator}hl=en"
-
-        # Add US region parameter if not present
-        if 'gl=' not in url:
-            url = f"{url}&gl=us"
-
-        # Set Chrome geolocation to US (Boston, MA) using CDP
-        # This ensures Google Maps shows US results regardless of server location
-        try:
-            driver.execute_cdp_cmd('Emulation.setGeolocationOverride', {
-                'latitude': 42.3601,
-                'longitude': -71.0589,
-                'accuracy': 100
-            })
-            log.info("Set geolocation to US (Boston, MA)")
-        except Exception as e:
-            log.warning(f"Could not set geolocation: {e}")
-
-        log.info(f"Loading Google Maps page...")
-        t0 = timing_module.time()
-        driver.get(url)
-        log.info(f"[PROFILE] Page load (driver.get): {timing_module.time() - t0:.2f}s")
-
-        t0 = timing_module.time()
-        time.sleep(0.5)  # Initial wait - reduced from 2s
-        log.info(f"[PROFILE] Initial sleep: {timing_module.time() - t0:.2f}s")
-
-        # Handle GDPR consent page
-        t0 = timing_module.time()
-        if 'consent.google.com' in driver.current_url:
-            log.info("Detected GDPR consent page, accepting...")
-            try:
-                form_btns = driver.find_elements(By.CSS_SELECTOR, 'form button')
-                for btn in form_btns:
-                    btn_text = (btn.text or '').lower()
-                    if 'aceptar todo' in btn_text or 'accept all' in btn_text or 'reject all' in btn_text:
-                        log.info(f"Clicking GDPR consent: {btn.text}")
-                        btn.click()
-                        time.sleep(1)
-                        break
-                else:
-                    if len(form_btns) >= 2:
-                        log.info("Using fallback: clicking second form button")
-                        form_btns[1].click()
-                        time.sleep(1)
-            except Exception as e:
-                log.warning(f"GDPR consent handling failed: {e}")
-
-            # After GDPR consent, reload the original URL to ensure proper page state
-            log.info(f"Reloading original URL after GDPR consent...")
-            driver.get(url)
-            time.sleep(1)
-            log.info(f"[PROFILE] GDPR consent handling: {timing_module.time() - t0:.2f}s")
-        else:
-            log.info(f"[PROFILE] No GDPR consent page (0.00s)")
-
-        # Dismiss cookie banner
-        try:
-            cookie_btns = driver.find_elements(By.CSS_SELECTOR,
-                'button[aria-label*="Accept" i],button[aria-label*="Aceptar" i]')
-            if cookie_btns:
-                log.info("Dismissing cookie banner...")
-                cookie_btns[0].click()
-                time.sleep(0.3)  # Reduced from 0.5s
-        except:
-            pass
-        log.info(f"[PROFILE] Cookie banner dismissal: {timing_module.time() - t0:.2f}s")
-
-        # Wait for page to load - use smart waits
-        t0 = timing_module.time()
-        try:
-            log.info("Waiting for Google Maps content to load...")
-            wait = WebDriverWait(driver, 10)
-            # Wait for basic page structure (h1 or heading)
-            wait.until(
-                lambda d: d.find_elements(By.CSS_SELECTOR, 'h1, [role="heading"]')
-            )
-            log.info("Basic page structure loaded")
-
-            # Wait for page to settle - search URLs redirect to place URLs
-            # which triggers additional content loading
-            time.sleep(2)
-
-            # Wait specifically for review count element (aria-label ending with "reviews")
-            # This is the most reliable indicator that the business detail is loaded
-            try:
-                WebDriverWait(driver, 5).until(
-                    lambda d: d.execute_script("""
-                        var elems = document.querySelectorAll('[aria-label]');
-                        for (var i = 0; i < elems.length; i++) {
-                            var label = elems[i].getAttribute('aria-label') || '';
-                            if (/^[0-9]+ reviews?$/.test(label)) return true;
-                        }
-                        return false;
-                    """)
-                )
-                log.info("Review count element loaded")
-            except:
-                # Fallback: Try clicking Reviews tab or rating stars to expose the review count
-                log.info("Review count wait timeout, trying to click Reviews/rating...")
-                try:
-                    # Try 1: Click Reviews tab (if exists)
-                    clicked = driver.execute_script("""
-                        var tabs = document.querySelectorAll('[role="tab"]');
-                        for (var i = 0; i < tabs.length; i++) {
-                            var txt = (tabs[i].textContent || '').toLowerCase();
-                            if (txt.includes('review')) {
-                                tabs[i].click();
-                                return 'tab';
-                            }
-                        }
-                        // Try 2: Click the rating stars element (often links to reviews)
-                        var stars = document.querySelector('[role="img"][aria-label*="star"]');
-                        if (stars) {
-                            var parent = stars.parentElement;
-                            if (parent && parent.tagName.toLowerCase() === 'button') {
-                                parent.click();
-                                return 'stars_button';
-                            }
-                            stars.click();
-                            return 'stars';
-                        }
-                        // Try 3: Click "Write a review" or any review-related button
-                        var btns = document.querySelectorAll('button[aria-label*="review" i]');
-                        for (var b = 0; b < btns.length; b++) {
-                            var label = btns[b].getAttribute('aria-label') || '';
-                            if (!/write/i.test(label) && /review/i.test(label)) {
-                                btns[b].click();
-                                return 'review_btn: ' + label;
-                            }
-                        }
-                        return 'none';
-                    """)
-                    log.info(f"Clicked: {clicked}")
-                    time.sleep(2)  # Wait for reviews panel to load
-                except Exception as e:
-                    log.warning(f"Click attempt failed: {e}")
-
-        except Exception as e:
-            log.warning(f"Timeout waiting for Maps content: {e}")
-            time.sleep(2)  # Fallback wait
-        log.info(f"[PROFILE] Smart wait for content: {timing_module.time() - t0:.2f}s")
-        log.info(f"DEBUG: Current URL: {driver.current_url[:100]}...")
-        log.info(f"DEBUG: Page title: {driver.title}")
-
-        # Extract business card information using JavaScript
-        t0 = timing_module.time()
-        extract_script = """
-        const info = {
-            name: null,
-            address: null,
-            rating: null,
-            total_reviews: null
-        };
-
-        // ============ ROBUST EXTRACTION (no class names, aria/data attributes preferred) ============
-
-        // Helper: Parse review count from text, handling multiple formats
-        function parseReviewCount(text) {
-            if (!text) return null;
-
-            // Pattern 1: Exact "N reviews" format (aria-labels, clean text)
-            // Matches: "27 reviews", "1,234 reviews", "27 reseñas", "27 avis"
-            var match = text.match(/^([0-9][0-9,.]*)[ ]*(?:reviews?|reseñas?|avis|bewertungen?|recensioni?)$/i);
-            if (match) {
-                return parseInt(match[1].replace(/[,. ]/g, ''));
-            }
-
-            // Pattern 2: "(N)" format often used in tabs like "Reviews (27)"
-            match = text.match(/[(]([0-9][0-9,.]*)[)]$/);
-            if (match) {
-                return parseInt(match[1].replace(/[,. ]/g, ''));
-            }
-
-            // Pattern 3: "N reviews" anywhere in short text (< 30 chars to avoid false positives)
-            if (text.length < 30) {
-                match = text.match(/([0-9][0-9,]*)[ ]+(?:reviews?|reseñas?|avis)/i);
-                if (match) {
-                    return parseInt(match[1].replace(/[,. ]/g, ''));
-                }
-            }
-
-            return null;
-        }
-
-        // ============ EXTRACT BUSINESS NAME ============
-        // Priority: h1 (semantic), then role="heading"
-        const h1 = document.querySelector('h1');
-        if (h1 && h1.textContent) {
-            info.name = h1.textContent.trim();
-        }
-        if (!info.name) {
-            const heading = document.querySelector('[role="heading"][aria-level="1"]');
-            if (heading && heading.textContent) {
-                info.name = heading.textContent.trim();
-            }
-        }
-
-        // ============ EXTRACT ADDRESS ============
-        // Priority: data-item-id (semantic), then aria-label containing "address"
-        const addressElem = document.querySelector('[data-item-id*="address"]');
-        if (addressElem && addressElem.textContent) {
-            info.address = addressElem.textContent.trim();
-        }
-        if (!info.address) {
-            const ariaAddress = document.querySelector('[aria-label*="ddress"]');
-            if (ariaAddress && ariaAddress.textContent) {
-                info.address = ariaAddress.textContent.trim();
-            }
-        }
-
-        // ============ EXTRACT RATING ============
-        // Priority: aria-label containing "star" on role="img" elements
-        info._debug_rating_context = [];
-        const ratingElems = document.querySelectorAll('[role="img"][aria-label*="star"]');
-        for (let elem of ratingElems) {
-            const ariaLabel = elem.getAttribute('aria-label') || '';
-            // Match "4.9 stars" or "4,9 stars" (European format)
-            const match = ariaLabel.match(/([0-9][.,]?[0-9]?)\\s*star/i);
-            if (match) {
-                info.rating = parseFloat(match[1].replace(',', '.'));
-                // DEBUG: Capture parent/sibling context to find review count
-                var parent = elem.parentElement;
-                if (parent) {
-                    info._debug_rating_context.push('PARENT: ' + (parent.textContent || '').trim().substring(0, 100));
-                    var grandparent = parent.parentElement;
-                    if (grandparent) {
-                        info._debug_rating_context.push('GRANDPARENT: ' + (grandparent.textContent || '').trim().substring(0, 100));
-                        // Check all children of grandparent for review count
-                        var gpChildren = grandparent.querySelectorAll('*');
-                        for (var c = 0; c < Math.min(gpChildren.length, 30); c++) {
-                            var childText = (gpChildren[c].textContent || '').trim();
-                            if (childText.length > 0 && childText.length < 20 && /[0-9]/.test(childText)) {
-                                info._debug_rating_context.push('GP_CHILD: ' + childText);
-                            }
-                        }
-                        // Also check great-grandparent
-                        var ggp = grandparent.parentElement;
-                        if (ggp) {
-                            info._debug_rating_context.push('GREAT_GP: ' + (ggp.textContent || '').trim().substring(0, 150));
-                        }
-                    }
-                    // Check siblings
-                    var nextSib = parent.nextElementSibling;
-                    if (nextSib) {
-                        info._debug_rating_context.push('NEXT_SIB: ' + (nextSib.textContent || '').trim().substring(0, 100));
-                    }
-                }
-                break;
-            }
-        }
-
-        // ============ EXTRACT TOTAL REVIEWS (ROBUST, ARIA-FIRST) ============
-
-        // PRIORITY 1: aria-label with exact "N reviews" format (most reliable)
-        // Google Maps uses aria-label="27 reviews" for accessibility
-        info._debug_aria = [];
-        info._debug_all_numeric = [];
-        if (!info.total_reviews) {
-            var ariaElems = document.querySelectorAll('[aria-label]');
-            for (var i = 0; i < ariaElems.length; i++) {
-                var ariaLabel = ariaElems[i].getAttribute('aria-label') || '';
-                // Collect all labels containing "review"
-                if (ariaLabel.toLowerCase().indexOf('review') >= 0) {
-                    info._debug_aria.push(ariaLabel);
-                }
-                // Collect all labels starting with a digit
-                if (/^[0-9]/.test(ariaLabel)) {
-                    info._debug_all_numeric.push(ariaLabel);
-                }
-                var count = parseReviewCount(ariaLabel);
-                if (count && count > 0 && count < 100000) {
-                    info.total_reviews = count;
-                    info._debug_matched = ariaLabel;
-                    break;
-                }
-            }
-        }
-
-        // DEBUG: Find all text with parenthetical numbers like "(27)"
-        info._debug_parens = [];
-        info._debug_short_text = [];  // All short text with numbers
-        var allSpans = document.querySelectorAll('span, div, a, button');
-        for (var j = 0; j < Math.min(allSpans.length, 500); j++) {
-            var spanText = allSpans[j].textContent || '';
-            // Capture parenthetical numbers
-            if (spanText.length < 20 && /[(][0-9]+[)]/.test(spanText)) {
-                info._debug_parens.push(spanText.trim());
-            }
-            // Capture ALL short text containing numbers (for debugging)
-            if (spanText.length > 0 && spanText.length < 30 && /[0-9]+/.test(spanText)) {
-                var cleaned = spanText.trim().replace(/\\s+/g, ' ');
-                if (cleaned && info._debug_short_text.indexOf(cleaned) < 0) {
-                    info._debug_short_text.push(cleaned);
-                }
-            }
-        }
-
-        // PRIORITY 2.5: Look for text containing numbers near "review" word anywhere on page
-        // This catches formats like "27 reviews", "reviews: 27", etc. that aren't in aria-labels
-        if (!info.total_reviews) {
-            var allElems = document.querySelectorAll('*');
-            for (var k = 0; k < Math.min(allElems.length, 1000); k++) {
-                var elem = allElems[k];
-                // Skip if has children (we want leaf nodes only)
-                if (elem.children.length > 0) continue;
-                var txt = (elem.textContent || '').trim();
-                // Look for short text with both numbers and "review" word
-                if (txt.length >= 3 && txt.length < 30 && /review/i.test(txt)) {
-                    var match = txt.match(/([0-9][0-9,]*)/);
-                    if (match) {
-                        var count = parseInt(match[1].replace(/,/g, ''));
-                        if (count > 0 && count < 100000) {
-                            info.total_reviews = count;
-                            info._debug_matched = 'LEAF: ' + txt;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-
-        // DEBUG: Collect all tab names
-        info._debug_tabs = [];
-        const tabs = document.querySelectorAll('[role="tab"]');
-        for (let t = 0; t < tabs.length; t++) {
-            info._debug_tabs.push((tabs[t].textContent || '').trim().substring(0, 30));
-        }
-
-        // DEBUG: Collect all buttons with text (might contain review count)
-        info._debug_buttons = [];
-        const buttons = document.querySelectorAll('button');
-        for (let b = 0; b < Math.min(buttons.length, 20); b++) {
-            var btnText = (buttons[b].textContent || '').trim();
-            if (btnText && btnText.length < 40) {
-                info._debug_buttons.push(btnText.substring(0, 40));
-            }
-        }
-
-        // PRIORITY 2: Tabs with role="tab" (Reviews tab often shows count)
-        if (!info.total_reviews) {
-            for (let tab of tabs) {
-                const text = (tab.textContent || '').trim();
-                // Look for "Reviews" tab with count
-                if (text.toLowerCase().includes('review')) {
-                    const count = parseReviewCount(text);
-                    if (count && count > 0) {
-                        info.total_reviews = count;
-                        info._debug_matched = 'TAB: ' + text;
-                        break;
-                    }
-                }
-            }
-        }
-
-        // PRIORITY 2.3: Reviews panel header (after clicking Reviews tab)
-        // Google Maps shows "27 reviews" as heading text in the reviews panel
-        if (!info.total_reviews) {
-            // Look for headings containing review count
-            var headings = document.querySelectorAll('h1, h2, [role="heading"]');
-            for (var h = 0; h < headings.length; h++) {
-                var hText = (headings[h].textContent || '').trim();
-                if (/review/i.test(hText)) {
-                    var match = hText.match(/([0-9][0-9,]*)/);
-                    if (match) {
-                        var count = parseInt(match[1].replace(/,/g, ''));
-                        if (count > 0 && count < 100000) {
-                            info.total_reviews = count;
-                            info._debug_matched = 'HEADING: ' + hText;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-
-        // PRIORITY 2.4: Look for sort button area which often has total count
-        // The sort dropdown area displays "Sort: Newest" and total reviews
-        if (!info.total_reviews) {
-            var sortBtns = document.querySelectorAll('button[data-value="sort"], [aria-label*="Sort"]');
-            for (var s = 0; s < sortBtns.length; s++) {
-                var parent = sortBtns[s].parentElement;
-                if (parent) {
-                    var pText = (parent.textContent || '').trim();
-                    if (/review/i.test(pText)) {
-                        var match = pText.match(/([0-9][0-9,]*)\\s*review/i);
-                        if (match) {
-                            var count = parseInt(match[1].replace(/,/g, ''));
-                            if (count > 0 && count < 100000) {
-                                info.total_reviews = count;
-                                info._debug_matched = 'SORT_AREA: ' + pText.substring(0, 50);
-                                break;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        // PRIORITY 3: Elements with semantic review-related attributes
-        if (!info.total_reviews) {
-            const reviewLinks = document.querySelectorAll('a[href*="review"], button[aria-label*="review" i]');
-            for (let elem of reviewLinks) {
-                const text = (elem.textContent || '').trim();
-                const count = parseReviewCount(text);
-                if (count && count > 0) {
-                    info.total_reviews = count;
-                    break;
-                }
-            }
-        }
-
-        // PRIORITY 4: Look for standalone review count text near rating
-        // Find elements that contain ONLY "N reviews" pattern (not concatenated with rating)
-        if (!info.total_reviews) {
-            const allElements = document.querySelectorAll('span, a');
-            for (let elem of allElements) {
-                // Get direct text content only (not nested children)
-                const text = (elem.textContent || '').trim();
-                // Skip if too long (likely contains other content)
-                if (text.length > 50) continue;
-                // Skip if it looks like rating+reviews concatenated (e.g., "4.927 reviews")
-                if (/^[0-9]\\.[0-9]+[0-9]/.test(text)) continue;
-
-                const count = parseReviewCount(text);
-                if (count && count > 0 && count < 100000) {
-                    info.total_reviews = count;
-                    break;
-                }
-            }
-        }
-
-        // PRIORITY 5: Parse from visible page text using regex on short text blocks
-        if (!info.total_reviews) {
-            const walker = document.createTreeWalker(
-                document.body,
-                NodeFilter.SHOW_TEXT,
-                null,
-                false
-            );
-            while (walker.nextNode()) {
-                const text = walker.currentNode.textContent.trim();
-                if (text.length >= 5 && text.length <= 30) {
-                    // Match "27 reviews" but not "4.927 reviews"
-                    const match = text.match(/(?:^|[^0-9.,])([0-9,]+)\\s+(?:reviews?|reseñas?)/i);
-                    if (match) {
-                        const count = parseInt(match[1].replace(/[,]/g, ''));
-                        if (count > 0 && count < 100000) {
-                            info.total_reviews = count;
-                            info._debug_matched = 'WALKER: ' + text;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-
-        // PRIORITY 6: Extract from embedded JSON in page source (Google embeds data in scripts)
-        if (!info.total_reviews) {
-            var scripts = document.querySelectorAll('script');
-            for (var sc = 0; sc < scripts.length; sc++) {
-                var scriptText = scripts[sc].textContent || '';
-                // Look for patterns like "user_reviews":{"count":27} or reviews_count":27
-                var jsonMatch = scriptText.match(/"(?:user_reviews|reviews?)(?:_count)?"\s*[:\{]\s*"?(\d+)"?/i);
-                if (jsonMatch) {
-                    var count = parseInt(jsonMatch[1]);
-                    if (count > 0 && count < 100000) {
-                        info.total_reviews = count;
-                        info._debug_matched = 'JSON_SCRIPT';
-                        break;
-                    }
-                }
-                // Also look for review count in Google's data format like [\"27 reviews\"]
-                if (!info.total_reviews) {
-                    var dataMatch = scriptText.match(/"(\d+)\s+reviews?"/i);
-                    if (dataMatch) {
-                        var count = parseInt(dataMatch[1]);
-                        if (count > 0 && count < 100000) {
-                            info.total_reviews = count;
-                            info._debug_matched = 'JSON_DATA: ' + dataMatch[0];
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-
-        return info;
-        """
-
-        business_info = driver.execute_script(extract_script)
-        log.info(f"[PROFILE] Business card extraction: {timing_module.time() - t0:.2f}s")
-
-        total_time = timing_module.time() - start_time
-        log.info(f"[PROFILE] *** TOTAL GET_BUSINESS_CARD TIME: {total_time:.2f}s ***")
-        log.info(f"Business card extracted: name={business_info.get('name')}, "
-                f"rating={business_info.get('rating')}, reviews={business_info.get('total_reviews')}")
-        # Debug: log what aria-labels were found
-        if business_info.get('_debug_aria'):
-            log.info(f"DEBUG: Found {len(business_info.get('_debug_aria'))} aria-labels with 'review': {business_info.get('_debug_aria')[:5]}")
-        if business_info.get('_debug_matched'):
-            log.info(f"DEBUG: Matched aria-label: {business_info.get('_debug_matched')}")
-        # Also log all numeric aria-labels (potential review counts)
-        if business_info.get('_debug_all_numeric'):
-            log.info(f"DEBUG: Numeric aria-labels: {business_info.get('_debug_all_numeric')[:10]}")
-        # Log any text with parenthetical numbers like "(27)"
-        if business_info.get('_debug_parens'):
-            log.info(f"DEBUG: Parenthetical text: {business_info.get('_debug_parens')[:5]}")
-        # Log all short text containing numbers (for debugging review count detection)
-        if business_info.get('_debug_short_text'):
-            log.info(f"DEBUG: Short text with numbers: {business_info.get('_debug_short_text')[:15]}")
-        # Log the context around the rating element
-        if business_info.get('_debug_rating_context'):
-            for ctx in business_info.get('_debug_rating_context', []):
-                log.info(f"DEBUG: Rating context: {ctx}")
-        # Log what tabs exist on the page
-        if business_info.get('_debug_tabs'):
-            log.info(f"DEBUG: Page tabs: {business_info.get('_debug_tabs')}")
-        else:
-            log.info(f"DEBUG: No tabs found on page")
-        # Log buttons (might contain review count)
-        if business_info.get('_debug_buttons'):
-            log.info(f"DEBUG: Buttons: {business_info.get('_debug_buttons')[:10]}")
-
-        result = {
-            "name": business_info.get('name'),
-            "address": business_info.get('address'),
-            "rating": business_info.get('rating'),
-            "total_reviews": business_info.get('total_reviews') or 0,
-            "has_reviews": (business_info.get('total_reviews') or 0) > 0,
-            "success": True,
-            "error": None
-        }
-
-        if return_driver:
-            result["driver"] = driver
-        return result
-
-    except Exception as e:
-        total_time = timing_module.time() - start_time
-        error_msg = f"Failed to get business card info: {str(e)}"
-        log.error(error_msg)
-        log.info(f"[PROFILE] *** TOTAL GET_BUSINESS_CARD TIME (FAILED): {total_time:.2f}s ***")
-        result = {
-            "name": None,
-            "address": None,
-            "rating": None,
-            "total_reviews": 0,
-            "has_reviews": False,
-            "success": False,
-            "error": error_msg
-        }
-        if return_driver:
-            result["driver"] = driver
-        return result
-
-    finally:
-        if should_close_driver and driver:
-            try:
-                driver.quit()
-            except:
-                pass
-
-
-def check_reviews_available(url: str, headless: bool = True, driver=None, return_driver: bool = False) -> Dict[str, Any]:
-    """
-    Lightweight check to see if a business has reviews available.
-
-    This function just loads the page and checks for review count without
-    doing the full scraping. Used to enable/disable scrape button in UI.
-
-    Args:
-        url: Google Maps URL to check
-        headless: Run in headless mode (default True)
-        driver: Existing driver instance to reuse (from worker pool)
-        return_driver: If True, don't close driver and return it in result
-
-    Returns:
-        Dict containing:
-            - has_reviews: bool - whether reviews exist
-            - review_count: int - number of reviews (0 if none)
-            - business_name: str - name of business (if found)
-            - success: bool - whether check succeeded
-            - error: str - error message (if failed)
-            - driver: Driver instance (if return_driver=True)
-    """
-    import time as timing_module
-    start_time = timing_module.time()
-    log.info(f"[PROFILE] Starting validation for: {url}")
-
-    driver_provided = driver is not None
-    should_close_driver = not return_driver and not driver_provided
-
-    try:
-        # Initialize driver if not provided
-        t0 = timing_module.time()
-        if not driver:
-            driver = Driver(uc=True, headless=headless)
-            driver.maximize_window()
-            log.info(f"[PROFILE] Driver initialization: {timing_module.time() - t0:.2f}s")
-        else:
-            log.info(f"[PROFILE] Using pooled driver (0.00s)")
-
-        # Navigate to the URL
-        t0 = timing_module.time()
-        log.info(f"Loading Google Maps page...")
-        driver.get(url)
-        log.info(f"[PROFILE] Page load (driver.get): {timing_module.time() - t0:.2f}s")
-
-        t0 = timing_module.time()
-        time.sleep(0.5)  # Initial wait - reduced from 2s
-        log.info(f"[PROFILE] Initial sleep: {timing_module.time() - t0:.2f}s")
-
-        # Handle GDPR consent page (CRITICAL for validation to work!)
-        t0 = timing_module.time()
-        if 'consent.google.com' in driver.current_url:
-            log.info("Detected GDPR consent page, accepting...")
-            try:
-                # Find all form buttons and click "Accept all" / "Aceptar todo"
-                form_btns = driver.find_elements(By.CSS_SELECTOR, 'form button')
-                for btn in form_btns:
-                    btn_text = (btn.text or '').lower()
-                    if 'aceptar todo' in btn_text or 'accept all' in btn_text:
-                        log.info(f"Clicking GDPR consent: {btn.text}")
-                        btn.click()
-                        time.sleep(1)  # Reduced from 2s
-                        break
-                else:
-                    # Fallback: click second button (usually "Accept all")
-                    if len(form_btns) >= 2:
-                        log.info("Using fallback: clicking second form button")
-                        form_btns[1].click()
-                        time.sleep(1)  # Reduced from 2s
-            except Exception as e:
-                log.warning(f"GDPR consent handling failed: {e}")
-            log.info(f"[PROFILE] GDPR consent handling: {timing_module.time() - t0:.2f}s")
-        else:
-            log.info(f"[PROFILE] No GDPR consent page (0.00s)")
-
-        # Dismiss cookie banner on Maps page
-        t0 = timing_module.time()
-        try:
-            cookie_btns = driver.find_elements(By.CSS_SELECTOR,
-                'button[aria-label*="Accept" i],button[aria-label*="Aceptar" i]')
-            if cookie_btns:
-                log.info("Dismissing cookie banner...")
-                cookie_btns[0].click()
-                time.sleep(0.3)  # Reduced from 0.5s
-        except:
-            pass
-        log.info(f"[PROFILE] Cookie banner dismissal: {timing_module.time() - t0:.2f}s")
-
-        # Wait for page to fully load after consent - use smart waits
-        t0 = timing_module.time()
-        try:
-            # Wait for either business card OR search results to appear
-            log.info("Waiting for Google Maps content to load...")
-            wait = WebDriverWait(driver, 10)
-            wait.until(
-                lambda d: d.find_elements(By.CSS_SELECTOR, 'h1.DUwDvf, h1, [role="article"], [data-review-id]')
-            )
-            log.info("Google Maps content loaded successfully")
-        except Exception as e:
-            log.warning(f"Timeout waiting for Maps content: {e}")
-            time.sleep(0.5)  # Minimal fallback wait
-        log.info(f"[PROFILE] Smart wait for content: {timing_module.time() - t0:.2f}s")
-
-        # Try to extract business name
-        t0 = timing_module.time()
-        business_name = None
-        try:
-            business_name_script = """
-            // Try to find business name from various locations
-            let name = null;
-
-            // Method 1: Look for business name in the main panel (most reliable)
-            // This is where the actual business info appears
-            const businessPanelSelectors = [
-                'h1.DUwDvf',  // Main business name heading
-                '[role="main"] h1',  // H1 in main content
-                'h1.fontHeadlineLarge',  // Large headline font
-                'button[jsaction*="pane.header.rating"] h1',  // Near rating button
-            ];
-
-            for (const selector of businessPanelSelectors) {
-                const element = document.querySelector(selector);
-                if (element && element.textContent) {
-                    const text = element.textContent.trim();
-                    // Filter out Google's placeholder/suggestion text
-                    if (text &&
-                        !text.toLowerCase().includes('antes de ir') &&
-                        !text.toLowerCase().includes('before going') &&
-                        !text.toLowerCase().includes('google maps') &&
-                        text.length < 100) {  // Business names shouldn't be super long
-                        name = text;
-                        break;
-                    }
-                }
-            }
-
-            // Method 2: h1 tag (fallback)
-            if (!name) {
-                const h1 = document.querySelector('h1');
-                if (h1 && h1.textContent) {
-                    const text = h1.textContent.trim();
-                    if (!text.toLowerCase().includes('antes de ir') &&
-                        !text.toLowerCase().includes('before going')) {
-                        name = text;
-                    }
-                }
-            }
-
-            // Method 3: Title attribute (last resort)
-            if (!name) {
-                const title = document.title;
-                if (title && !title.includes('Google Maps')) {
-                    name = title.split('-')[0].trim();
-                }
-            }
-
-            return name;
-            """
-            business_name = driver.execute_script(business_name_script)
-            if business_name:
-                log.info(f"Found business name: {business_name}")
-        except Exception as e:
-            log.debug(f"Could not extract business name: {e}")
-        log.info(f"[PROFILE] Business name extraction: {timing_module.time() - t0:.2f}s")
-
-        # Extract total review count
-        t0 = timing_module.time()
-        review_count = extract_total_review_count(driver)
-        log.info(f"[PROFILE] Review count extraction: {timing_module.time() - t0:.2f}s")
-
-        if review_count is None:
-            log.warning("Could not determine review count")
-            total_time = timing_module.time() - start_time
-            log.info(f"[PROFILE] *** TOTAL VALIDATION TIME: {total_time:.2f}s ***")
-            result = {
-                "has_reviews": False,
-                "review_count": 0,
-                "business_name": business_name,
-                "success": True,
-                "error": "Could not find review count on page"
-            }
-            if return_driver:
-                result["driver"] = driver
-            return result
-
-        log.info(f"Found {review_count} reviews available")
-
-        total_time = timing_module.time() - start_time
-        log.info(f"[PROFILE] *** TOTAL VALIDATION TIME: {total_time:.2f}s ***")
-
-        result = {
-            "has_reviews": review_count > 0,
-            "review_count": review_count,
-            "business_name": business_name,
-            "success": True,
-            "error": None
-        }
-        if return_driver:
-            result["driver"] = driver
-        return result
-
-    except Exception as e:
-        total_time = timing_module.time() - start_time
-        error_msg = f"Failed to check reviews: {str(e)}"
-        log.error(error_msg)
-        log.info(f"[PROFILE] *** TOTAL VALIDATION TIME (FAILED): {total_time:.2f}s ***")
-        result = {
-            "has_reviews": False,
-            "review_count": 0,
-            "business_name": None,
-            "success": False,
-            "error": error_msg
-        }
-        if return_driver:
-            result["driver"] = driver
-        return result
-
-    finally:
-        if should_close_driver and driver:
-            try:
-                driver.quit()
-            except:
-                pass
diff --git a/modules/health_checks.py b/modules/health_checks.py
index a2d4db6..2210559 100644
--- a/modules/health_checks.py
+++ b/modules/health_checks.py
@@ -90,7 +90,7 @@ class CanaryMonitor:
         - Scrape time is reasonable
         - Data structure is valid
         """
-        from modules.fast_scraper import fast_scrape_reviews
+        from modules.scraper_clean import fast_scrape_reviews
 
         log.info(f"Running canary scrape test on {self.test_url[:60]}...")
         self.last_run = datetime.now()