From d64f06ba9eab18aa5d3c7a618c15ae71086c93c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Guti=C3=A9rrez?= <35082514+alezmad@users.noreply.github.com> Date: Sat, 24 Jan 2026 19:04:06 +0000 Subject: [PATCH] feat: Add scraper version routing with v1.1.0 as default - Import both v1.0.0 and v1.1.0 scraper versions - Add SCRAPER_VERSIONS registry mapping version strings to functions - Add get_scraper_for_version() to route based on job metadata - Default to v1.1.0 (multi-sort) for new jobs - Frontend can select specific version via scraper_version parameter - Validation endpoint continues using v1.0.0 for speed Co-Authored-By: Claude Opus 4.5 --- api_server_production.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/api_server_production.py b/api_server_production.py index d276dea..50aa6e2 100644 --- a/api_server_production.py +++ b/api_server_production.py @@ -28,7 +28,23 @@ from fastapi.responses import JSONResponse, StreamingResponse from core.database import DatabaseManager, JobStatus from services.webhook_service import WebhookDispatcher, WebhookManager from utils.health_checks import HealthCheckSystem -from scrapers.google_reviews.v1_0_0 import fast_scrape_reviews, LogCapture, get_business_card_info # Clean scraper +from scrapers.google_reviews.v1_0_0 import fast_scrape_reviews as fast_scrape_v1_0_0, LogCapture, get_business_card_info +from scrapers.google_reviews.v1_1_0 import fast_scrape_reviews as fast_scrape_v1_1_0 + +# Scraper version registry - maps version strings to scraper functions +SCRAPER_VERSIONS = { + "1.0.0": fast_scrape_v1_0_0, + "v1.0.0": fast_scrape_v1_0_0, + "1.1.0": fast_scrape_v1_1_0, + "v1.1.0": fast_scrape_v1_1_0, +} +DEFAULT_SCRAPER_VERSION = "1.1.0" # Latest version as default + +def get_scraper_for_version(version: str = None): + """Get the appropriate scraper function for a version string.""" + if version and version in SCRAPER_VERSIONS: + return SCRAPER_VERSIONS[version], version + return SCRAPER_VERSIONS[DEFAULT_SCRAPER_VERSION], DEFAULT_SCRAPER_VERSION from utils.crash_analyzer import analyze_crash, summarize_crash_patterns, apply_auto_fix from utils.logger import StructuredLogger, LogEntry from workers.chrome_pool import ( @@ -44,6 +60,7 @@ from api.routes import ( batches_router, set_batches_db, dashboard_router, set_dashboard_db, admin_router, set_admin_db, + pipelines_router, set_pipelines_db, ) # Configure logging @@ -92,6 +109,7 @@ async def lifespan(app: FastAPI): set_batches_db(db) set_dashboard_db(db) set_admin_db(db) + set_pipelines_db(db.pool) # Pipeline router uses raw asyncpg pool # Initialize health check system with canary monitoring # DISABLED: Canary tests consume Google Maps requests and trigger rate limiting @@ -153,6 +171,7 @@ app.add_middleware( app.include_router(batches_router) app.include_router(dashboard_router) app.include_router(admin_router) +app.include_router(pipelines_router) # ==================== Request/Response Models ==================== @@ -1122,7 +1141,7 @@ async def check_reviews(request: ScrapeRequest): log.info(f"Creating Chrome with default settings") result = await asyncio.to_thread( - fast_scrape_reviews, + fast_scrape_v1_0_0, # Use v1.0.0 for validation (fastest) url=url, headless=False, # Use Xvfb display validation_only=True, # Return early after getting total_reviews @@ -1704,10 +1723,15 @@ async def run_scraping_job(job_id: UUID): # Schedule the coroutine on the event loop asyncio.run_coroutine_threadsafe(save(), loop) + # Get scraper version from metadata (frontend can specify version) + requested_version = metadata.get('scraper_version') if metadata else None + scraper_func, actual_version = get_scraper_for_version(requested_version) + log.info(f"Using scraper version {actual_version} for job {job_id}") + # Run scraping with progress callback and shared log capture # headless=False because Docker uses Xvfb virtual display result = await asyncio.to_thread( - fast_scrape_reviews, + scraper_func, url=url, headless=False, progress_callback=progress_callback,