whyrating-engine-legacy/test_fast_api.py

#!/usr/bin/env python3
"""
Test script for the Fast API server.
Demonstrates how to use the updated API with the fast scraper (18.9s).
"""
import requests
import time
import json

# API base URL
BASE_URL = "http://localhost:8000"

def test_api():
    """Test the Fast API endpoints"""

    print("=" * 60)
    print("Testing Fast Google Reviews Scraper API")
    print("=" * 60)
    print()

    # 1. Health check
    print("1. Health Check")
    response = requests.get(f"{BASE_URL}/")
    print(f"   Status: {response.status_code}")
    print(f"   Response: {response.json()}")
    print()

    # 2. Start a scraping job
    print("2. Starting Scraping Job")

    # Read URL from config
    import yaml
    with open('config.yaml', 'r') as f:
        config = yaml.safe_load(f)
        url = config.get('url')

    scrape_request = {
        "url": url,
        "headless": True  # Run in headless mode
    }

    response = requests.post(f"{BASE_URL}/scrape", json=scrape_request)
    print(f"   Status: {response.status_code}")
    result = response.json()
    print(f"   Response: {result}")
    print()

    job_id = result.get('job_id')
    if not job_id:
        print("❌ Failed to start job!")
        return

    print(f"   Job ID: {job_id}")
    print()

    # 3. Poll job status
    print("3. Polling Job Status")
    start_time = time.time()

    while True:
        response = requests.get(f"{BASE_URL}/jobs/{job_id}")
        job = response.json()

        status = job['status']
        progress = job.get('progress', {})

        elapsed = time.time() - start_time
        print(f"   [{elapsed:.1f}s] Status: {status} - {progress.get('message', '')}")

        if status in ['completed', 'failed', 'cancelled']:
            break

        time.sleep(2)  # Poll every 2 seconds

    print()

    # 4. Get final job details
    print("4. Final Job Details")
    response = requests.get(f"{BASE_URL}/jobs/{job_id}")
    job = response.json()

    print(f"   Status: {job['status']}")
    print(f"   Reviews Count: {job.get('reviews_count', 0)}")
    print(f"   Scrape Time: {job.get('scrape_time', 0):.1f}s")

    if job.get('error_message'):
        print(f"   Error: {job['error_message']}")

    if job.get('progress'):
        progress = job['progress']
        if 'scroll_time' in progress:
            print(f"   Scroll Time: {progress['scroll_time']:.1f}s")
        if 'extract_time' in progress:
            print(f"   Extract Time: {progress['extract_time']:.2f}s")

    print()

    # 5. Get reviews data
    if job['status'] == 'completed':
        print("5. Retrieving Reviews Data")
        response = requests.get(f"{BASE_URL}/jobs/{job_id}/reviews")

        if response.status_code == 200:
            reviews_data = response.json()
            reviews = reviews_data['reviews']
            count = reviews_data['count']

            print(f"   Total Reviews: {count}")
            print()

            # Show first 3 reviews
            print("   Sample Reviews:")
            for i, review in enumerate(reviews[:3], 1):
                print(f"   {i}. {review.get('author', 'Unknown')} - {review.get('rating', 0)}★")
                text = review.get('text', '')
                if text:
                    preview = text[:60] + "..." if len(text) > 60 else text
                    print(f"      \"{preview}\"")
            print()

            # Save to file
            output_file = f"api_reviews_{job_id[:8]}.json"
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(reviews, f, indent=2, ensure_ascii=False)
            print(f"   💾 Saved all reviews to: {output_file}")

        else:
            print(f"   ❌ Failed to get reviews: {response.status_code}")
            print(f"   {response.json()}")

    print()

    # 6. Get statistics
    print("6. Job Statistics")
    response = requests.get(f"{BASE_URL}/stats")
    stats = response.json()

    print(f"   Total Jobs: {stats['total_jobs']}")
    print(f"   Running Jobs: {stats['running_jobs']}/{stats['max_concurrent_jobs']}")
    print(f"   By Status: {stats['by_status']}")
    print()

    print("=" * 60)
    print("✅ API Test Complete!")
    print("=" * 60)


if __name__ == "__main__":
    try:
        test_api()
    except requests.exceptions.ConnectionError:
        print("❌ Error: Could not connect to API server!")
        print()
        print("Please start the API server first:")
        print("  python api_server.py")
        print()
    except KeyboardInterrupt:
        print("\n\nTest interrupted by user")
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()