#!/usr/bin/env python3 """ Test script for the Fast API server. Demonstrates how to use the updated API with the fast scraper (18.9s). """ import requests import time import json # API base URL BASE_URL = "http://localhost:8000" def test_api(): """Test the Fast API endpoints""" print("=" * 60) print("Testing Fast Google Reviews Scraper API") print("=" * 60) print() # 1. Health check print("1. Health Check") response = requests.get(f"{BASE_URL}/") print(f" Status: {response.status_code}") print(f" Response: {response.json()}") print() # 2. Start a scraping job print("2. Starting Scraping Job") # Read URL from config import yaml with open('config.yaml', 'r') as f: config = yaml.safe_load(f) url = config.get('url') scrape_request = { "url": url, "headless": True # Run in headless mode } response = requests.post(f"{BASE_URL}/scrape", json=scrape_request) print(f" Status: {response.status_code}") result = response.json() print(f" Response: {result}") print() job_id = result.get('job_id') if not job_id: print("āŒ Failed to start job!") return print(f" Job ID: {job_id}") print() # 3. Poll job status print("3. Polling Job Status") start_time = time.time() while True: response = requests.get(f"{BASE_URL}/jobs/{job_id}") job = response.json() status = job['status'] progress = job.get('progress', {}) elapsed = time.time() - start_time print(f" [{elapsed:.1f}s] Status: {status} - {progress.get('message', '')}") if status in ['completed', 'failed', 'cancelled']: break time.sleep(2) # Poll every 2 seconds print() # 4. Get final job details print("4. Final Job Details") response = requests.get(f"{BASE_URL}/jobs/{job_id}") job = response.json() print(f" Status: {job['status']}") print(f" Reviews Count: {job.get('reviews_count', 0)}") print(f" Scrape Time: {job.get('scrape_time', 0):.1f}s") if job.get('error_message'): print(f" Error: {job['error_message']}") if job.get('progress'): progress = job['progress'] if 'scroll_time' in progress: print(f" Scroll Time: {progress['scroll_time']:.1f}s") if 'extract_time' in progress: print(f" Extract Time: {progress['extract_time']:.2f}s") print() # 5. Get reviews data if job['status'] == 'completed': print("5. Retrieving Reviews Data") response = requests.get(f"{BASE_URL}/jobs/{job_id}/reviews") if response.status_code == 200: reviews_data = response.json() reviews = reviews_data['reviews'] count = reviews_data['count'] print(f" Total Reviews: {count}") print() # Show first 3 reviews print(" Sample Reviews:") for i, review in enumerate(reviews[:3], 1): print(f" {i}. {review.get('author', 'Unknown')} - {review.get('rating', 0)}ā˜…") text = review.get('text', '') if text: preview = text[:60] + "..." if len(text) > 60 else text print(f" \"{preview}\"") print() # Save to file output_file = f"api_reviews_{job_id[:8]}.json" with open(output_file, 'w', encoding='utf-8') as f: json.dump(reviews, f, indent=2, ensure_ascii=False) print(f" šŸ’¾ Saved all reviews to: {output_file}") else: print(f" āŒ Failed to get reviews: {response.status_code}") print(f" {response.json()}") print() # 6. Get statistics print("6. Job Statistics") response = requests.get(f"{BASE_URL}/stats") stats = response.json() print(f" Total Jobs: {stats['total_jobs']}") print(f" Running Jobs: {stats['running_jobs']}/{stats['max_concurrent_jobs']}") print(f" By Status: {stats['by_status']}") print() print("=" * 60) print("āœ… API Test Complete!") print("=" * 60) if __name__ == "__main__": try: test_api() except requests.exceptions.ConnectionError: print("āŒ Error: Could not connect to API server!") print() print("Please start the API server first:") print(" python api_server.py") print() except KeyboardInterrupt: print("\n\nTest interrupted by user") except Exception as e: print(f"\nāŒ Error: {e}") import traceback traceback.print_exc()