#!/usr/bin/env python3
"""
Test metadata extraction: category, review topics, about info.
Uses robust selectors (aria-labels, roles, jsaction) to avoid breakage.
"""
import time
import json
from seleniumbase import Driver
from selenium.webdriver.common.by import By

# Expected values for validation
EXPECTED = {
    "name": "R. Fleitas Peluqueros",
    "category": "Barber shop",
    "review_topics": ["hair salon", "cutting", "price", "siblings", "beard"],
    "about_sections": ["Accessibility", "Amenities", "Planning", "Payments", "Children"]
}

def extract_metadata(driver, url: str) -> dict:
    """Extract all business metadata from Google Maps."""

    # Force English
    if 'hl=' not in url:
        url = f"{url}{'&' if '?' in url else '?'}hl=en&gl=us"

    print(f"   Loading URL: {url[:70]}...")
    driver.get(url)

    # Handle consent popup - poll with 10ms sleep (same as production scraper)
    start = time.time()
    while time.time() - start < 5:
        if "consent.google" in driver.current_url:
            print("   🍪 Consent page detected, clicking accept...")
            try:
                for btn in driver.find_elements(By.CSS_SELECTOR, "button"):
                    txt = btn.text.lower()
                    if "accept" in txt or "aceptar" in txt or "alle akzeptieren" in txt:
                        btn.click()
                        print(f"   ✅ Clicked: '{btn.text}', reloading...")
                        driver.get(url)
                        break
            except:
                pass
            break
        if "maps/place" in driver.current_url or ("maps" in driver.current_url and "consent" not in driver.current_url):
            break
        time.sleep(0.01)  # 10ms polling

    # Wait for page to stabilize
    time.sleep(1)

    result = {
        "name": None,
        "category": None,
        "rating": None,
        "total_reviews": None,
        "review_topics": [],
        "about": {}
    }

    # ========== OVERVIEW TAB (default) ==========
    print("\n📍 Extracting from OVERVIEW tab...")

    overview_data = driver.execute_script("""
        var data = {name: null, category: null, rating: null, total_reviews: null};

        // Business name - h1 is stable
        var h1 = document.querySelector('h1');
        if (h1) data.name = h1.textContent.trim();

        // Category - use jsaction attribute (more stable than class)
        var catBtn = document.querySelector('button[jsaction*="category"]');
        if (catBtn) data.category = catBtn.textContent.trim();

        // Fallback: look for button after rating that's not a link
        if (!data.category) {
            var buttons = document.querySelectorAll('button');
            for (var btn of buttons) {
                var text = btn.textContent.trim();
                // Categories are short words, no numbers, not navigation
                if (text && text.length < 50 && !text.match(/^[0-9]/) &&
                    !text.match(/review|star|direction|save|share|photo/i)) {
                    // Check if it's near the rating area
                    var parent = btn.closest('.LBgpqf, .skqShb, .fontBodyMedium');
                    if (parent) {
                        data.category = text;
                        break;
                    }
                }
            }
        }

        // Rating and reviews from aria-labels (stable)
        var spans = document.querySelectorAll('span[role="img"]');
        for (var span of spans) {
            var label = span.getAttribute('aria-label') || '';

            // Rating: "4.8 stars"
            var rMatch = label.match(/^([\\d,.]+)\\s*star/i);
            if (rMatch && !data.rating) {
                data.rating = parseFloat(rMatch[1].replace(',', '.'));
            }

            // Reviews: "79 reviews"
            var revMatch = label.match(/^([\\d,]+)\\s*review/i);
            if (revMatch && !data.total_reviews) {
                data.total_reviews = parseInt(revMatch[1].replace(/,/g, ''));
            }
        }

        return data;
    """)

    result.update(overview_data)
    print(f"   Name: {result['name']}")
    print(f"   Category: {result['category']}")
    print(f"   Rating: {result['rating']}")
    print(f"   Reviews: {result['total_reviews']}")

    # ========== REVIEWS TAB ==========
    print("\n📝 Clicking REVIEWS tab...")

    # Click reviews tab using aria-label or role (robust)
    clicked = driver.execute_script("""
        // Try multiple selectors for reviews tab
        var selectors = [
            'button[aria-label*="Review"]',
            'button[data-tab-index="1"]',
            'div[role="tablist"] button:nth-child(2)',
            'button[jsaction*="review"]'
        ];

        for (var sel of selectors) {
            var btn = document.querySelector(sel);
            if (btn && btn.textContent.toLowerCase().includes('review')) {
                btn.click();
                return true;
            }
        }

        // Fallback: find by text content
        var buttons = document.querySelectorAll('button');
        for (var btn of buttons) {
            if (btn.textContent.trim().toLowerCase() === 'reviews') {
                btn.click();
                return true;
            }
        }
        return false;
    """)

    if clicked:
        time.sleep(1.5)  # Wait for tab to load

        # Extract review topics from radiogroup (very stable selector)
        topics = driver.execute_script("""
            var topics = [];

            // Primary: use role="radiogroup" with aria-label="Refine reviews"
            var container = document.querySelector('div[role="radiogroup"][aria-label*="Refine"], div[role="radiogroup"][aria-label*="refine"]');

            if (!container) {
                // Fallback: any radiogroup in the reviews area
                container = document.querySelector('div[role="radiogroup"]');
            }

            if (container) {
                var buttons = container.querySelectorAll('button[role="radio"]');
                for (var btn of buttons) {
                    var label = btn.getAttribute('aria-label') || '';
                    // Parse "hair salon, mentioned in 4 reviews" or just get the topic name
                    var match = label.match(/^([^,]+),\\s*mentioned in (\\d+)/i);
                    if (match) {
                        topics.push({
                            topic: match[1].trim(),
                            count: parseInt(match[2])
                        });
                    } else if (label && !label.toLowerCase().includes('all review')) {
                        // Might be in different format
                        var countSpan = btn.querySelector('.bC3Nkc, .fontBodySmall');
                        var nameSpan = btn.querySelector('.uEubGf, span:first-child');
                        if (nameSpan) {
                            var name = nameSpan.textContent.trim();
                            var count = countSpan ? parseInt(countSpan.textContent) : 0;
                            if (name && name.toLowerCase() !== 'all') {
                                topics.push({topic: name, count: count});
                            }
                        }
                    }
                }
            }

            return topics;
        """)

        result['review_topics'] = topics
        print(f"   Found {len(topics)} review topics:")
        for t in topics:
            print(f"      - {t['topic']}: {t['count']} mentions")
    else:
        print("   ⚠️ Could not click Reviews tab")

    # ========== ABOUT TAB ==========
    print("\n📋 Clicking ABOUT tab...")

    clicked = driver.execute_script("""
        // Try multiple selectors for about tab
        var selectors = [
            'button[aria-label*="About"]',
            'button[data-tab-index="2"]',
            'div[role="tablist"] button:nth-child(3)',
            'button[jsaction*="about"]'
        ];

        for (var sel of selectors) {
            var btn = document.querySelector(sel);
            if (btn && btn.textContent.toLowerCase().includes('about')) {
                btn.click();
                return true;
            }
        }

        // Fallback: find by text content
        var buttons = document.querySelectorAll('button');
        for (var btn of buttons) {
            if (btn.textContent.trim().toLowerCase() === 'about') {
                btn.click();
                return true;
            }
        }
        return false;
    """)

    if clicked:
        time.sleep(1.5)  # Wait for tab to load

        # Extract about sections using aria-label and role (stable)
        about = driver.execute_script("""
            var about = {};

            // Find the about region by aria-label or role
            var container = document.querySelector('div[role="region"][aria-label*="About"]');

            if (!container) {
                // Fallback: look for the scrollable area with sections
                container = document.querySelector('.m6QErb[aria-label*="About"]');
            }

            if (!container) {
                // Last resort: find sections by h2 headers
                container = document;
            }

            // Find all section headers (h2 elements)
            var sections = container.querySelectorAll('h2');

            for (var h2 of sections) {
                var sectionName = h2.textContent.trim();
                var items = [];

                // Find the ul list following this h2
                var parent = h2.closest('.iP2t7d, div');
                if (parent) {
                    var listItems = parent.querySelectorAll('li span[aria-label]');
                    for (var li of listItems) {
                        var label = li.getAttribute('aria-label');
                        if (label) {
                            // Parse "Has toilet" or "No wheelchair-accessible car park"
                            var hasFeature = !label.toLowerCase().startsWith('no ');
                            var featureName = label.replace(/^(Has |No )/i, '');
                            items.push({
                                feature: featureName,
                                available: hasFeature
                            });
                        }
                    }
                }

                if (sectionName && items.length > 0) {
                    about[sectionName] = items;
                }
            }

            return about;
        """)

        result['about'] = about
        print(f"   Found {len(about)} about sections:")
        for section, items in about.items():
            print(f"      {section}:")
            for item in items:
                status = "✓" if item['available'] else "✗"
                print(f"         {status} {item['feature']}")
    else:
        print("   ⚠️ Could not click About tab")

    return result


def validate_results(result: dict) -> bool:
    """Validate extracted data against expected values."""
    print("\n" + "="*60)
    print("🔍 VALIDATION:")
    print("="*60)

    all_passed = True

    # Check name
    if result['name'] == EXPECTED['name']:
        print(f"   ✅ Name: {result['name']}")
    else:
        print(f"   ❌ Name: got '{result['name']}', expected '{EXPECTED['name']}'")
        all_passed = False

    # Check category
    if result['category'] == EXPECTED['category']:
        print(f"   ✅ Category: {result['category']}")
    else:
        print(f"   ❌ Category: got '{result['category']}', expected '{EXPECTED['category']}'")
        all_passed = False

    # Check review topics (at least some should match)
    extracted_topics = [t['topic'].lower() for t in result.get('review_topics', [])]
    expected_topics = [t.lower() for t in EXPECTED['review_topics']]
    matching = [t for t in expected_topics if t in extracted_topics]

    if len(matching) >= 3:  # At least 3 topics should match
        print(f"   ✅ Review topics: {len(matching)}/{len(expected_topics)} matched")
    else:
        print(f"   ❌ Review topics: only {len(matching)}/{len(expected_topics)} matched")
        print(f"      Expected: {expected_topics}")
        print(f"      Got: {extracted_topics}")
        all_passed = False

    # Check about sections (at least some should be present)
    about_sections = list(result.get('about', {}).keys())
    expected_sections = EXPECTED['about_sections']
    matching_sections = [s for s in expected_sections if s in about_sections]

    if len(matching_sections) >= 3:
        print(f"   ✅ About sections: {len(matching_sections)}/{len(expected_sections)} matched")
    else:
        print(f"   ❌ About sections: only {len(matching_sections)}/{len(expected_sections)} matched")
        print(f"      Expected: {expected_sections}")
        print(f"      Got: {about_sections}")
        all_passed = False

    return all_passed


def main():
    url = "https://www.google.com/maps/search/?api=1&query=R.+Fleitas+Peluqueros+Gran+Canaria"

    print("🚀 Starting metadata extraction test...")
    print(f"   URL: {url[:60]}...")

    driver = Driver(uc=True, headless=False)

    try:
        # Set geolocation
        try:
            driver.execute_cdp_cmd('Emulation.setGeolocationOverride', {
                'latitude': 42.3601, 'longitude': -71.0589, 'accuracy': 100
            })
        except:
            pass

        result = extract_metadata(driver, url)

        print("\n" + "="*60)
        print("📊 FULL RESULT:")
        print("="*60)
        print(json.dumps(result, indent=2, ensure_ascii=False))

        passed = validate_results(result)

        print("\n" + "="*60)
        if passed:
            print("🎉 ALL VALIDATIONS PASSED!")
        else:
            print("⚠️ SOME VALIDATIONS FAILED")
        print("="*60)

        print("\n👀 Browser stays open for 15 seconds...")
        time.sleep(15)

    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()
        time.sleep(10)
    finally:
        driver.quit()
        print("🔒 Browser closed")


if __name__ == "__main__":
    main()