feat: Add scraper version routing with v1.1.0 as default

- Import both v1.0.0 and v1.1.0 scraper versions - Add SCRAPER_VERSIONS registry mapping version strings to functions - Add get_scraper_for_version() to route based on job metadata - Default to v1.1.0 (multi-sort) for new jobs - Frontend can select specific version via scraper_version parameter - Validation endpoint continues using v1.0.0 for speed Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 19:04:06 +00:00
parent 7771c734c6
commit d64f06ba9e
1 changed files with 27 additions and 3 deletions
--- a/api_server_production.py
+++ b/api_server_production.py
@@ -28,7 +28,23 @@ from fastapi.responses import JSONResponse, StreamingResponse
 from core.database import DatabaseManager, JobStatus
 from services.webhook_service import WebhookDispatcher, WebhookManager
 from utils.health_checks import HealthCheckSystem
-from scrapers.google_reviews.v1_0_0 import fast_scrape_reviews, LogCapture, get_business_card_info  # Clean scraper
+from scrapers.google_reviews.v1_0_0 import fast_scrape_reviews as fast_scrape_v1_0_0, LogCapture, get_business_card_info
 from scrapers.google_reviews.v1_1_0 import fast_scrape_reviews as fast_scrape_v1_1_0
 # Scraper version registry - maps version strings to scraper functions
 SCRAPER_VERSIONS = {
    "1.0.0": fast_scrape_v1_0_0,
    "v1.0.0": fast_scrape_v1_0_0,
    "1.1.0": fast_scrape_v1_1_0,
    "v1.1.0": fast_scrape_v1_1_0,
 }
 DEFAULT_SCRAPER_VERSION = "1.1.0"  # Latest version as default
 def get_scraper_for_version(version: str = None):
    """Get the appropriate scraper function for a version string."""
    if version and version in SCRAPER_VERSIONS:
        return SCRAPER_VERSIONS[version], version
    return SCRAPER_VERSIONS[DEFAULT_SCRAPER_VERSION], DEFAULT_SCRAPER_VERSION
 from utils.crash_analyzer import analyze_crash, summarize_crash_patterns, apply_auto_fix
 from utils.logger import StructuredLogger, LogEntry
 from workers.chrome_pool import (
@@ -44,6 +60,7 @@ from api.routes import (
    batches_router, set_batches_db,
    dashboard_router, set_dashboard_db,
    admin_router, set_admin_db,
    pipelines_router, set_pipelines_db,
 )
 # Configure logging
@@ -92,6 +109,7 @@ async def lifespan(app: FastAPI):
    set_batches_db(db)
    set_dashboard_db(db)
    set_admin_db(db)
    set_pipelines_db(db.pool)  # Pipeline router uses raw asyncpg pool
    # Initialize health check system with canary monitoring
    # DISABLED: Canary tests consume Google Maps requests and trigger rate limiting
@@ -153,6 +171,7 @@ app.add_middleware(
 app.include_router(batches_router)
 app.include_router(dashboard_router)
 app.include_router(admin_router)
 app.include_router(pipelines_router)
 # ==================== Request/Response Models ====================
@@ -1122,7 +1141,7 @@ async def check_reviews(request: ScrapeRequest):
            log.info(f"Creating Chrome with default settings")
        result = await asyncio.to_thread(
-            fast_scrape_reviews,
+            fast_scrape_v1_0_0,  # Use v1.0.0 for validation (fastest)
            url=url,
            headless=False,  # Use Xvfb display
            validation_only=True,  # Return early after getting total_reviews
@@ -1704,10 +1723,15 @@ async def run_scraping_job(job_id: UUID):
                # Schedule the coroutine on the event loop
                asyncio.run_coroutine_threadsafe(save(), loop)
            # Get scraper version from metadata (frontend can specify version)
            requested_version = metadata.get('scraper_version') if metadata else None
            scraper_func, actual_version = get_scraper_for_version(requested_version)
            log.info(f"Using scraper version {actual_version} for job {job_id}")
            # Run scraping with progress callback and shared log capture
            # headless=False because Docker uses Xvfb virtual display
            result = await asyncio.to_thread(
-                fast_scrape_reviews,
+                scraper_func,
                url=url,
                headless=False,
                progress_callback=progress_callback,