diff --git a/api_server_production.py b/api_server_production.py index d276dea..50aa6e2 100644 --- a/api_server_production.py +++ b/api_server_production.py @@ -28,7 +28,23 @@ from fastapi.responses import JSONResponse, StreamingResponse from core.database import DatabaseManager, JobStatus from services.webhook_service import WebhookDispatcher, WebhookManager from utils.health_checks import HealthCheckSystem -from scrapers.google_reviews.v1_0_0 import fast_scrape_reviews, LogCapture, get_business_card_info # Clean scraper +from scrapers.google_reviews.v1_0_0 import fast_scrape_reviews as fast_scrape_v1_0_0, LogCapture, get_business_card_info +from scrapers.google_reviews.v1_1_0 import fast_scrape_reviews as fast_scrape_v1_1_0 + +# Scraper version registry - maps version strings to scraper functions +SCRAPER_VERSIONS = { + "1.0.0": fast_scrape_v1_0_0, + "v1.0.0": fast_scrape_v1_0_0, + "1.1.0": fast_scrape_v1_1_0, + "v1.1.0": fast_scrape_v1_1_0, +} +DEFAULT_SCRAPER_VERSION = "1.1.0" # Latest version as default + +def get_scraper_for_version(version: str = None): + """Get the appropriate scraper function for a version string.""" + if version and version in SCRAPER_VERSIONS: + return SCRAPER_VERSIONS[version], version + return SCRAPER_VERSIONS[DEFAULT_SCRAPER_VERSION], DEFAULT_SCRAPER_VERSION from utils.crash_analyzer import analyze_crash, summarize_crash_patterns, apply_auto_fix from utils.logger import StructuredLogger, LogEntry from workers.chrome_pool import ( @@ -44,6 +60,7 @@ from api.routes import ( batches_router, set_batches_db, dashboard_router, set_dashboard_db, admin_router, set_admin_db, + pipelines_router, set_pipelines_db, ) # Configure logging @@ -92,6 +109,7 @@ async def lifespan(app: FastAPI): set_batches_db(db) set_dashboard_db(db) set_admin_db(db) + set_pipelines_db(db.pool) # Pipeline router uses raw asyncpg pool # Initialize health check system with canary monitoring # DISABLED: Canary tests consume Google Maps requests and trigger rate limiting @@ -153,6 +171,7 @@ app.add_middleware( app.include_router(batches_router) app.include_router(dashboard_router) app.include_router(admin_router) +app.include_router(pipelines_router) # ==================== Request/Response Models ==================== @@ -1122,7 +1141,7 @@ async def check_reviews(request: ScrapeRequest): log.info(f"Creating Chrome with default settings") result = await asyncio.to_thread( - fast_scrape_reviews, + fast_scrape_v1_0_0, # Use v1.0.0 for validation (fastest) url=url, headless=False, # Use Xvfb display validation_only=True, # Return early after getting total_reviews @@ -1704,10 +1723,15 @@ async def run_scraping_job(job_id: UUID): # Schedule the coroutine on the event loop asyncio.run_coroutine_threadsafe(save(), loop) + # Get scraper version from metadata (frontend can specify version) + requested_version = metadata.get('scraper_version') if metadata else None + scraper_func, actual_version = get_scraper_for_version(requested_version) + log.info(f"Using scraper version {actual_version} for job {job_id}") + # Run scraping with progress callback and shared log capture # headless=False because Docker uses Xvfb virtual display result = await asyncio.to_thread( - fast_scrape_reviews, + scraper_func, url=url, headless=False, progress_callback=progress_callback,