feat: Add scraper version routing with v1.1.0 as default
- Import both v1.0.0 and v1.1.0 scraper versions - Add SCRAPER_VERSIONS registry mapping version strings to functions - Add get_scraper_for_version() to route based on job metadata - Default to v1.1.0 (multi-sort) for new jobs - Frontend can select specific version via scraper_version parameter - Validation endpoint continues using v1.0.0 for speed Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -28,7 +28,23 @@ from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from core.database import DatabaseManager, JobStatus
|
||||
from services.webhook_service import WebhookDispatcher, WebhookManager
|
||||
from utils.health_checks import HealthCheckSystem
|
||||
from scrapers.google_reviews.v1_0_0 import fast_scrape_reviews, LogCapture, get_business_card_info # Clean scraper
|
||||
from scrapers.google_reviews.v1_0_0 import fast_scrape_reviews as fast_scrape_v1_0_0, LogCapture, get_business_card_info
|
||||
from scrapers.google_reviews.v1_1_0 import fast_scrape_reviews as fast_scrape_v1_1_0
|
||||
|
||||
# Scraper version registry - maps version strings to scraper functions
|
||||
SCRAPER_VERSIONS = {
|
||||
"1.0.0": fast_scrape_v1_0_0,
|
||||
"v1.0.0": fast_scrape_v1_0_0,
|
||||
"1.1.0": fast_scrape_v1_1_0,
|
||||
"v1.1.0": fast_scrape_v1_1_0,
|
||||
}
|
||||
DEFAULT_SCRAPER_VERSION = "1.1.0" # Latest version as default
|
||||
|
||||
def get_scraper_for_version(version: str = None):
|
||||
"""Get the appropriate scraper function for a version string."""
|
||||
if version and version in SCRAPER_VERSIONS:
|
||||
return SCRAPER_VERSIONS[version], version
|
||||
return SCRAPER_VERSIONS[DEFAULT_SCRAPER_VERSION], DEFAULT_SCRAPER_VERSION
|
||||
from utils.crash_analyzer import analyze_crash, summarize_crash_patterns, apply_auto_fix
|
||||
from utils.logger import StructuredLogger, LogEntry
|
||||
from workers.chrome_pool import (
|
||||
@@ -44,6 +60,7 @@ from api.routes import (
|
||||
batches_router, set_batches_db,
|
||||
dashboard_router, set_dashboard_db,
|
||||
admin_router, set_admin_db,
|
||||
pipelines_router, set_pipelines_db,
|
||||
)
|
||||
|
||||
# Configure logging
|
||||
@@ -92,6 +109,7 @@ async def lifespan(app: FastAPI):
|
||||
set_batches_db(db)
|
||||
set_dashboard_db(db)
|
||||
set_admin_db(db)
|
||||
set_pipelines_db(db.pool) # Pipeline router uses raw asyncpg pool
|
||||
|
||||
# Initialize health check system with canary monitoring
|
||||
# DISABLED: Canary tests consume Google Maps requests and trigger rate limiting
|
||||
@@ -153,6 +171,7 @@ app.add_middleware(
|
||||
app.include_router(batches_router)
|
||||
app.include_router(dashboard_router)
|
||||
app.include_router(admin_router)
|
||||
app.include_router(pipelines_router)
|
||||
|
||||
|
||||
# ==================== Request/Response Models ====================
|
||||
@@ -1122,7 +1141,7 @@ async def check_reviews(request: ScrapeRequest):
|
||||
log.info(f"Creating Chrome with default settings")
|
||||
|
||||
result = await asyncio.to_thread(
|
||||
fast_scrape_reviews,
|
||||
fast_scrape_v1_0_0, # Use v1.0.0 for validation (fastest)
|
||||
url=url,
|
||||
headless=False, # Use Xvfb display
|
||||
validation_only=True, # Return early after getting total_reviews
|
||||
@@ -1704,10 +1723,15 @@ async def run_scraping_job(job_id: UUID):
|
||||
# Schedule the coroutine on the event loop
|
||||
asyncio.run_coroutine_threadsafe(save(), loop)
|
||||
|
||||
# Get scraper version from metadata (frontend can specify version)
|
||||
requested_version = metadata.get('scraper_version') if metadata else None
|
||||
scraper_func, actual_version = get_scraper_for_version(requested_version)
|
||||
log.info(f"Using scraper version {actual_version} for job {job_id}")
|
||||
|
||||
# Run scraping with progress callback and shared log capture
|
||||
# headless=False because Docker uses Xvfb virtual display
|
||||
result = await asyncio.to_thread(
|
||||
fast_scrape_reviews,
|
||||
scraper_func,
|
||||
url=url,
|
||||
headless=False,
|
||||
progress_callback=progress_callback,
|
||||
|
||||
Reference in New Issue
Block a user