whyrating-engine-legacy/dump_api_responses.py

#!/usr/bin/env python3
"""
Dump raw API responses for analysis.
This will help us understand Google's exact response format.
"""
import json
import logging
from pathlib import Path
from seleniumbase import SB
from modules.api_interceptor import GoogleMapsAPIInterceptor

logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")

url = "https://www.google.com/maps/place/Soho+Club/data=!4m7!3m6!1s0x46dd947294b213bf:0x864c7a232527adb4!8m2!3d54.67869!4d25.2667181!16s%2Fg%2F1thhj5ml!19sChIJvxOylHKU3UYRtK0nJSN6TIY?authuser=0&hl=es&rclk=1"

output_dir = Path("api_response_samples")
output_dir.mkdir(exist_ok=True)

print(f"[INFO] Starting browser...")
with SB(uc=True, headless=False) as sb:
    print("[INFO] Navigating to Google Maps...")
    sb.open(url)
    sb.sleep(3)

    # Inject interceptor FIRST
    print("[INFO] Injecting API interceptor...")
    interceptor = GoogleMapsAPIInterceptor(sb.driver)
    interceptor.inject_response_interceptor()
    sb.sleep(2)

    # Click reviews tab
    print("[INFO] Looking for reviews tab...")
    try:
        sb.click('.LRkQ2', timeout=5)
        print("[INFO] ✓ Clicked reviews tab")
    except:
        print("[WARN] Could not click reviews tab, trying to continue...")

    sb.sleep(5)

    # Scroll multiple times to trigger API calls
    print("[INFO] Scrolling to trigger API calls...")
    for i in range(10):
        sb.execute_script("window.scrollBy(0, 800)")
        sb.sleep(1.5)

        # Check every few scrolls
        if (i + 1) % 3 == 0:
            responses = interceptor.get_intercepted_responses()
            if responses:
                print(f"[INFO] Captured {len(responses)} responses so far...")

    # Final collection
    print("\n[INFO] Collecting all captured responses...")
    all_responses = interceptor.get_intercepted_responses()

    if not all_responses:
        print("[ERROR] No responses captured!")
        exit(1)

    print(f"[SUCCESS] Captured {len(all_responses)} API responses!\n")

    # Dump each response
    for i, resp in enumerate(all_responses):
        url_str = resp.get('url', 'unknown')
        body = resp.get('body', '')
        size = len(body)

        # Save full response
        full_file = output_dir / f"response_{i:02d}_full.json"
        with open(full_file, 'w', encoding='utf-8') as f:
            json.dump(resp, f, indent=2, ensure_ascii=False)

        # Save just body for easier viewing
        body_file = output_dir / f"response_{i:02d}_body.txt"
        with open(body_file, 'w', encoding='utf-8') as f:
            f.write(body)

        # Try to parse as JSON
        if body.startswith(")]}'"):
            clean_body = body[4:].strip()
        else:
            clean_body = body

        json_file = output_dir / f"response_{i:02d}_parsed.json"
        try:
            parsed = json.loads(clean_body)
            with open(json_file, 'w', encoding='utf-8') as f:
                json.dump(parsed, f, indent=2, ensure_ascii=False)
            print(f"  [{i}] ✓ {url_str[:60]}... ({size:,} bytes)")
            print(f"      Full:   {full_file}")
            print(f"      Body:   {body_file}")
            print(f"      Parsed: {json_file}")
        except:
            print(f"  [{i}] ✓ {url_str[:60]}... ({size:,} bytes) [Not JSON]")
            print(f"      Full: {full_file}")
            print(f"      Body: {body_file}")
        print()

    print(f"\n[SUCCESS] Dumped {len(all_responses)} responses to: {output_dir}/")
    print("\nNext steps:")
    print("  1. Open response_00_parsed.json to study the structure")
    print("  2. Look for arrays containing review data")
    print("  3. Identify patterns for: review ID, author, rating, text, date")
    print("  4. Update the parser patterns in modules/api_interceptor.py")

print("\n[DONE]")