Phases 5-7: Dashboard UI, Admin API, and Auth middleware

Phase 5 - Main Dashboard: - Dashboard overview page with system health stats - Jobs by status breakdown, success rates, top clients - Dashboard API (/api/dashboard/overview, by-client, problems, by-version) Phase 6 - Admin/Scraper Management: - Scrapers management page with traffic allocation UI - Admin API for scraper CRUD operations - Traffic percentage updates for A/B testing - Promote/deprecate scraper versions Phase 7 - Authentication: - API key authentication middleware - SHA-256 key hashing (keys never stored in plain text) - Scope-based authorization (jobs:read, jobs:write, admin) - Rate limiting per API key Also: - Updated api_server_production.py to include new routers - Extended core/database.py with dashboard query methods - Added dashboard link to sidebar navigation - Updated CONTEXT-KEEPER.md to mark all phases complete Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 15:43:00 +00:00
parent 788ef84756
commit 39c80fc8be
11 changed files with 3465 additions and 16 deletions
--- a/api/routes/init.py
+++ b/api/routes/init.py
@@ -4,8 +4,14 @@ API Routes for ReviewIQ.
 This module exports all route modules for easy import into the main server.
 """
 from api.routes.batches import router as batches_router, set_database as set_batches_db
+from api.routes.dashboard import router as dashboard_router, set_database as set_dashboard_db
+from api.routes.admin import router as admin_router, set_database as set_admin_db

 __all__ = [
    'batches_router',
    'set_batches_db',
+    'dashboard_router',
+    'set_dashboard_db',
+    'admin_router',
+    'set_admin_db',
 ]
--- a/api/routes/admin.py
+++ b/api/routes/admin.py
@@ -0,0 +1,756 @@
+#!/usr/bin/env python3
+"""
+Admin API routes for scraper management.
+
+Phase 6 - ReviewIQ Platform
+
+Provides endpoints for:
+- Listing registered scrapers with stats
+- Registering new scraper versions
+- Updating traffic allocation for A/B testing
+- Deprecating scrapers (soft delete)
+- Promoting scrapers to stable/default
+"""
+import json
+import logging
+from datetime import datetime, timedelta
+from typing import Optional, List, Dict, Any
+from uuid import UUID
+
+from fastapi import APIRouter, HTTPException, Query, Depends
+from pydantic import BaseModel, Field, validator
+
+from core.database import DatabaseManager
+from scrapers.registry import ScraperRegistry
+
+log = logging.getLogger(__name__)
+
+# Create router
+router = APIRouter(prefix="/api/admin", tags=["admin"])
+
+
+# ==================== Pydantic Models ====================
+
+class ScraperStatsModel(BaseModel):
+    """Statistics for a scraper over the last 24 hours."""
+    total_jobs: int = Field(default=0, description="Total jobs processed")
+    success_rate: float = Field(default=0.0, description="Success rate percentage")
+    avg_duration: float = Field(default=0.0, description="Average scrape duration in seconds")
+
+
+class ScraperInfoResponse(BaseModel):
+    """Response model for scraper information."""
+    id: str = Field(..., description="Unique scraper registry ID")
+    job_type: str = Field(..., description="Type of job this scraper handles")
+    version: str = Field(..., description="Semantic version string")
+    variant: str = Field(..., description="Release variant (stable, beta, canary)")
+    is_default: bool = Field(..., description="Whether this is the default scraper")
+    traffic_pct: int = Field(..., description="Traffic percentage for A/B testing (0-100)")
+    module_path: str = Field(..., description="Python module path")
+    function_name: Optional[str] = Field(None, description="Entry function name")
+    deprecated_at: Optional[str] = Field(None, description="Deprecation timestamp (ISO format)")
+    stats: ScraperStatsModel = Field(default_factory=ScraperStatsModel, description="Last 24h stats")
+
+
+class RegisterScraperRequest(BaseModel):
+    """Request model for registering a new scraper."""
+    job_type: str = Field(..., description="Type of job (e.g., 'google_reviews')")
+    version: str = Field(..., description="Semantic version string (e.g., '1.1.0')")
+    variant: str = Field(..., description="Release variant: stable, beta, or canary")
+    module_path: str = Field(..., description="Python module path")
+    function_name: str = Field(default="scrape", description="Entry function name")
+    traffic_pct: int = Field(default=0, description="Initial traffic percentage (0-100)", ge=0, le=100)
+    min_priority: int = Field(default=0, description="Minimum job priority required")
+    config: Optional[Dict[str, Any]] = Field(default=None, description="Optional configuration")
+
+    @validator('variant')
+    def validate_variant(cls, v):
+        if v not in ('stable', 'beta', 'canary'):
+            raise ValueError("variant must be 'stable', 'beta', or 'canary'")
+        return v
+
+    @validator('version')
+    def validate_version(cls, v):
+        # Basic semver validation
+        parts = v.split('.')
+        if len(parts) < 2:
+            raise ValueError("version must be semantic version format (e.g., '1.0.0')")
+        return v
+
+
+class RegisterScraperResponse(BaseModel):
+    """Response model for scraper registration."""
+    id: str = Field(..., description="Created scraper registry ID")
+    job_type: str = Field(..., description="Job type")
+    version: str = Field(..., description="Version string")
+    variant: str = Field(..., description="Release variant")
+    message: str = Field(..., description="Status message")
+
+
+class UpdateTrafficRequest(BaseModel):
+    """Request model for updating traffic percentage."""
+    traffic_pct: int = Field(..., description="New traffic percentage (0-100)", ge=0, le=100)
+
+
+class UpdateTrafficResponse(BaseModel):
+    """Response model for traffic update."""
+    id: str = Field(..., description="Scraper registry ID")
+    traffic_pct: int = Field(..., description="Updated traffic percentage")
+    message: str = Field(..., description="Status message")
+
+
+class DeprecateResponse(BaseModel):
+    """Response model for deprecation."""
+    id: str = Field(..., description="Scraper registry ID")
+    deprecated_at: str = Field(..., description="Deprecation timestamp")
+    message: str = Field(..., description="Status message")
+
+
+class PromoteResponse(BaseModel):
+    """Response model for promotion."""
+    id: str = Field(..., description="Scraper registry ID")
+    variant: str = Field(..., description="New variant (stable)")
+    is_default: bool = Field(..., description="Whether now default")
+    traffic_pct: int = Field(..., description="New traffic percentage")
+    message: str = Field(..., description="Status message")
+
+
+# ==================== Database Helper Functions ====================
+
+async def get_scraper_stats(
+    db: DatabaseManager,
+    scraper_id: str,
+    hours: int = 24
+) -> ScraperStatsModel:
+    """
+    Get statistics for a specific scraper over the given time period.
+
+    Args:
+        db: Database manager instance
+        scraper_id: UUID of the scraper registry entry
+        hours: Number of hours to look back (default: 24)
+
+    Returns:
+        ScraperStatsModel with job counts, success rate, and avg duration
+    """
+    try:
+        async with db.pool.acquire() as conn:
+            # Query jobs that used this scraper version in the time period
+            stats = await conn.fetchrow("""
+                SELECT
+                    COUNT(*) as total_jobs,
+                    COUNT(*) FILTER (WHERE status = 'completed') as completed_jobs,
+                    COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed_jobs,
+                    AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration
+                FROM jobs
+                WHERE created_at >= NOW() - INTERVAL '%s hours'
+                  AND (
+                      metadata->>'scraper_id' = $1
+                      OR (scraper_version IS NOT NULL AND EXISTS (
+                          SELECT 1 FROM scraper_registry sr
+                          WHERE sr.id = $2::uuid
+                            AND sr.version = jobs.scraper_version
+                            AND sr.variant = COALESCE(jobs.scraper_variant, sr.variant)
+                      ))
+                  )
+            """, hours, scraper_id, scraper_id)
+
+            if not stats or stats['total_jobs'] == 0:
+                return ScraperStatsModel()
+
+            total = stats['total_jobs']
+            completed = stats['completed_jobs'] or 0
+            success_rate = (completed / total * 100) if total > 0 else 0.0
+            avg_duration = float(stats['avg_duration']) if stats['avg_duration'] else 0.0
+
+            return ScraperStatsModel(
+                total_jobs=total,
+                success_rate=round(success_rate, 2),
+                avg_duration=round(avg_duration, 2)
+            )
+    except Exception as e:
+        log.warning(f"Error getting scraper stats for {scraper_id}: {e}")
+        return ScraperStatsModel()
+
+
+async def get_scraper_by_id_from_db(
+    db: DatabaseManager,
+    scraper_id: str
+) -> Optional[Dict[str, Any]]:
+    """
+    Get scraper by ID directly from database.
+
+    Args:
+        db: Database manager instance
+        scraper_id: UUID of the scraper registry entry
+
+    Returns:
+        Scraper dictionary or None if not found
+    """
+    async with db.pool.acquire() as conn:
+        row = await conn.fetchrow("""
+            SELECT
+                id,
+                job_type,
+                version,
+                variant,
+                module_path,
+                function_name,
+                is_default,
+                traffic_pct,
+                min_priority,
+                config,
+                deprecated_at
+            FROM scraper_registry
+            WHERE id = $1
+        """, UUID(scraper_id))
+
+        if not row:
+            return None
+
+        return dict(row)
+
+
+async def update_scraper_traffic(
+    db: DatabaseManager,
+    scraper_id: str,
+    traffic_pct: int
+) -> bool:
+    """
+    Update traffic percentage for a scraper.
+
+    Args:
+        db: Database manager instance
+        scraper_id: UUID of the scraper registry entry
+        traffic_pct: New traffic percentage (0-100)
+
+    Returns:
+        True if updated, False if not found
+    """
+    async with db.pool.acquire() as conn:
+        result = await conn.execute("""
+            UPDATE scraper_registry
+            SET traffic_pct = $2
+            WHERE id = $1 AND deprecated_at IS NULL
+        """, UUID(scraper_id), traffic_pct)
+
+        return result.split()[-1] == "1"
+
+
+async def deprecate_scraper_by_id(
+    db: DatabaseManager,
+    scraper_id: str
+) -> Optional[str]:
+    """
+    Deprecate a scraper by ID (soft delete).
+
+    Args:
+        db: Database manager instance
+        scraper_id: UUID of the scraper registry entry
+
+    Returns:
+        Deprecation timestamp as ISO string, or None if not found/already deprecated
+    """
+    async with db.pool.acquire() as conn:
+        result = await conn.fetchval("""
+            UPDATE scraper_registry
+            SET deprecated_at = NOW(), traffic_pct = 0
+            WHERE id = $1 AND deprecated_at IS NULL
+            RETURNING deprecated_at
+        """, UUID(scraper_id))
+
+        if result:
+            return result.isoformat()
+        return None
+
+
+async def promote_scraper_by_id(
+    db: DatabaseManager,
+    scraper_id: str,
+    default_traffic_pct: int = 80
+) -> Optional[Dict[str, Any]]:
+    """
+    Promote a scraper to stable variant, set as default, and give it majority traffic.
+
+    This will:
+    1. Set the scraper's variant to 'stable'
+    2. Set is_default to True
+    3. Set traffic_pct to default_traffic_pct (default: 80%)
+    4. Unset is_default on other scrapers of the same job_type
+    5. Reduce traffic_pct on other scrapers proportionally
+
+    Args:
+        db: Database manager instance
+        scraper_id: UUID of the scraper to promote
+        default_traffic_pct: Traffic percentage to assign (default: 80)
+
+    Returns:
+        Updated scraper dict or None if not found
+    """
+    async with db.pool.acquire() as conn:
+        async with conn.transaction():
+            # Get the scraper to promote
+            scraper = await conn.fetchrow("""
+                SELECT id, job_type, version, variant
+                FROM scraper_registry
+                WHERE id = $1 AND deprecated_at IS NULL
+            """, UUID(scraper_id))
+
+            if not scraper:
+                return None
+
+            job_type = scraper['job_type']
+
+            # Unset is_default on other scrapers of same job_type
+            await conn.execute("""
+                UPDATE scraper_registry
+                SET is_default = FALSE
+                WHERE job_type = $1 AND id != $2
+            """, job_type, UUID(scraper_id))
+
+            # Reduce traffic on other active scrapers proportionally
+            # Calculate remaining traffic to distribute
+            remaining_traffic = 100 - default_traffic_pct
+
+            # Get other active scrapers
+            other_scrapers = await conn.fetch("""
+                SELECT id, traffic_pct
+                FROM scraper_registry
+                WHERE job_type = $1 AND id != $2 AND deprecated_at IS NULL AND traffic_pct > 0
+            """, job_type, UUID(scraper_id))
+
+            if other_scrapers:
+                total_other_traffic = sum(s['traffic_pct'] for s in other_scrapers)
+                if total_other_traffic > 0:
+                    for s in other_scrapers:
+                        new_pct = int((s['traffic_pct'] / total_other_traffic) * remaining_traffic)
+                        await conn.execute("""
+                            UPDATE scraper_registry
+                            SET traffic_pct = $2
+                            WHERE id = $1
+                        """, s['id'], new_pct)
+
+            # Promote the target scraper
+            updated = await conn.fetchrow("""
+                UPDATE scraper_registry
+                SET
+                    variant = 'stable',
+                    is_default = TRUE,
+                    traffic_pct = $2
+                WHERE id = $1
+                RETURNING id, job_type, version, variant, is_default, traffic_pct
+            """, UUID(scraper_id), default_traffic_pct)
+
+            if updated:
+                return dict(updated)
+            return None
+
+
+# ==================== Dependency Injection ====================
+
+_db: Optional[DatabaseManager] = None
+_registry: Optional[ScraperRegistry] = None
+
+
+def set_database(db: DatabaseManager):
+    """Set the database instance for the router."""
+    global _db, _registry
+    _db = db
+    _registry = ScraperRegistry(db)
+
+
+def get_db() -> DatabaseManager:
+    """Dependency to get database instance."""
+    if _db is None:
+        raise HTTPException(status_code=500, detail="Database not initialized")
+    return _db
+
+
+def get_registry() -> ScraperRegistry:
+    """Dependency to get scraper registry instance."""
+    if _registry is None:
+        raise HTTPException(status_code=500, detail="Scraper registry not initialized")
+    return _registry
+
+
+# ==================== API Endpoints ====================
+
+@router.get(
+    "/scrapers",
+    response_model=List[ScraperInfoResponse],
+    summary="List All Scrapers",
+    description="Get a list of all registered scrapers with their stats"
+)
+async def list_scrapers(
+    job_type: Optional[str] = Query(None, description="Filter by job type"),
+    include_deprecated: bool = Query(False, description="Include deprecated scrapers"),
+    db: DatabaseManager = Depends(get_db),
+    registry: ScraperRegistry = Depends(get_registry)
+):
+    """
+    List all registered scrapers with their configuration and stats.
+
+    Returns scraper information including:
+    - Version and variant information
+    - Traffic allocation percentage
+    - Whether it's the default scraper
+    - Last 24h performance stats (total jobs, success rate, avg duration)
+
+    Use `job_type` filter to get scrapers for a specific job type.
+    Set `include_deprecated=true` to include deprecated scrapers.
+    """
+    try:
+        # Refresh cache to get latest data
+        await registry.refresh_cache()
+
+        # Get all scrapers
+        scrapers = await registry.list_scrapers(
+            job_type=job_type,
+            include_deprecated=include_deprecated
+        )
+
+        # Enrich with stats
+        result = []
+        for scraper in scrapers:
+            stats = await get_scraper_stats(db, scraper['id'])
+
+            # Get full scraper info from DB to include job_type
+            full_info = await get_scraper_by_id_from_db(db, scraper['id'])
+
+            result.append(ScraperInfoResponse(
+                id=scraper['id'],
+                job_type=full_info['job_type'] if full_info else 'unknown',
+                version=scraper['version'],
+                variant=scraper['variant'],
+                is_default=scraper['is_default'],
+                traffic_pct=scraper['traffic_pct'],
+                module_path=scraper['module_path'],
+                function_name=scraper.get('function_name'),
+                deprecated_at=str(full_info['deprecated_at']) if full_info and full_info.get('deprecated_at') else None,
+                stats=stats
+            ))
+
+        # Sort by job_type, then by version descending
+        result.sort(key=lambda x: (x.job_type, x.version), reverse=True)
+
+        return result
+
+    except Exception as e:
+        log.error(f"Error listing scrapers: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to list scrapers: {str(e)}")
+
+
+@router.post(
+    "/scrapers",
+    response_model=RegisterScraperResponse,
+    summary="Register New Scraper",
+    description="Register a new scraper version"
+)
+async def register_scraper(
+    request: RegisterScraperRequest,
+    db: DatabaseManager = Depends(get_db),
+    registry: ScraperRegistry = Depends(get_registry)
+):
+    """
+    Register a new scraper version in the registry.
+
+    This allows adding new scraper implementations that can be used for:
+    - A/B testing (set traffic_pct to allocate traffic)
+    - Canary releases (set variant to 'canary' with low traffic_pct)
+    - Beta testing (set variant to 'beta')
+
+    The scraper won't receive any traffic until traffic_pct > 0.
+
+    **Parameters:**
+    - `job_type`: Type of scraping job (e.g., 'google_reviews')
+    - `version`: Semantic version (e.g., '1.1.0')
+    - `variant`: Release channel ('stable', 'beta', 'canary')
+    - `module_path`: Python module path (e.g., 'scrapers.google_reviews.v1_1_0')
+    - `function_name`: Entry function name (default: 'scrape')
+    - `traffic_pct`: Initial traffic allocation (0-100, default: 0)
+    - `config`: Optional configuration dict passed to the scraper
+    """
+    try:
+        # Check if version already exists for this job_type
+        existing = await registry.list_scrapers(job_type=request.job_type, include_deprecated=True)
+        for scraper in existing:
+            if scraper['version'] == request.version:
+                raise HTTPException(
+                    status_code=409,
+                    detail=f"Scraper version {request.version} already exists for job_type {request.job_type}"
+                )
+
+        # Register the new scraper
+        scraper_id = await registry.register_scraper(
+            job_type=request.job_type,
+            version=request.version,
+            variant=request.variant,
+            module_path=request.module_path,
+            function_name=request.function_name,
+            is_default=False,  # Never auto-set as default
+            traffic_pct=request.traffic_pct,
+            min_priority=request.min_priority,
+            config=request.config
+        )
+
+        log.info(f"Registered new scraper: {request.job_type} v{request.version} ({request.variant})")
+
+        return RegisterScraperResponse(
+            id=scraper_id,
+            job_type=request.job_type,
+            version=request.version,
+            variant=request.variant,
+            message=f"Successfully registered scraper {request.job_type} v{request.version} ({request.variant})"
+        )
+
+    except HTTPException:
+        raise
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        log.error(f"Error registering scraper: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to register scraper: {str(e)}")
+
+
+@router.put(
+    "/scrapers/{scraper_id}/traffic",
+    response_model=UpdateTrafficResponse,
+    summary="Update Traffic Percentage",
+    description="Update the traffic allocation for a scraper"
+)
+async def update_traffic(
+    scraper_id: str,
+    request: UpdateTrafficRequest,
+    db: DatabaseManager = Depends(get_db),
+    registry: ScraperRegistry = Depends(get_registry)
+):
+    """
+    Update the traffic percentage for a specific scraper.
+
+    Traffic percentage determines what portion of requests are routed
+    to this scraper version. Used for:
+    - Gradual rollouts (start at 10%, increase to 50%, then 100%)
+    - A/B testing (set two versions to 50% each)
+    - Canary releases (set new version to 5-10%)
+
+    **Note:** Total traffic across all active scrapers of the same
+    job_type should not exceed 100%. The system uses weighted random
+    selection, so percentages are relative weights, not exact guarantees.
+    """
+    try:
+        # Validate UUID format
+        try:
+            UUID(scraper_id)
+        except ValueError:
+            raise HTTPException(status_code=400, detail="Invalid scraper ID format")
+
+        # Check scraper exists
+        scraper = await get_scraper_by_id_from_db(db, scraper_id)
+        if not scraper:
+            raise HTTPException(status_code=404, detail="Scraper not found")
+
+        if scraper.get('deprecated_at'):
+            raise HTTPException(status_code=400, detail="Cannot update traffic for deprecated scraper")
+
+        # Update traffic
+        success = await update_scraper_traffic(db, scraper_id, request.traffic_pct)
+        if not success:
+            raise HTTPException(status_code=500, detail="Failed to update traffic allocation")
+
+        # Invalidate registry cache
+        await registry.refresh_cache()
+
+        log.info(f"Updated traffic for scraper {scraper_id} to {request.traffic_pct}%")
+
+        return UpdateTrafficResponse(
+            id=scraper_id,
+            traffic_pct=request.traffic_pct,
+            message=f"Traffic updated to {request.traffic_pct}%"
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        log.error(f"Error updating traffic for scraper {scraper_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to update traffic: {str(e)}")
+
+
+@router.post(
+    "/scrapers/{scraper_id}/deprecate",
+    response_model=DeprecateResponse,
+    summary="Deprecate Scraper",
+    description="Mark a scraper as deprecated (soft delete)"
+)
+async def deprecate_scraper(
+    scraper_id: str,
+    db: DatabaseManager = Depends(get_db),
+    registry: ScraperRegistry = Depends(get_registry)
+):
+    """
+    Deprecate a scraper version (soft delete).
+
+    This will:
+    - Set deprecated_at timestamp
+    - Set traffic_pct to 0 (no new requests)
+    - Keep the scraper in the registry for historical reference
+
+    Deprecated scrapers are excluded from normal routing but can
+    still be explicitly requested by version for debugging.
+
+    To permanently remove a scraper, use database admin tools.
+    """
+    try:
+        # Validate UUID format
+        try:
+            UUID(scraper_id)
+        except ValueError:
+            raise HTTPException(status_code=400, detail="Invalid scraper ID format")
+
+        # Check scraper exists
+        scraper = await get_scraper_by_id_from_db(db, scraper_id)
+        if not scraper:
+            raise HTTPException(status_code=404, detail="Scraper not found")
+
+        if scraper.get('deprecated_at'):
+            raise HTTPException(status_code=400, detail="Scraper is already deprecated")
+
+        # Deprecate
+        deprecated_at = await deprecate_scraper_by_id(db, scraper_id)
+        if not deprecated_at:
+            raise HTTPException(status_code=500, detail="Failed to deprecate scraper")
+
+        # Invalidate registry cache
+        await registry.refresh_cache()
+
+        log.info(f"Deprecated scraper {scraper_id}")
+
+        return DeprecateResponse(
+            id=scraper_id,
+            deprecated_at=deprecated_at,
+            message=f"Scraper deprecated. Traffic allocation set to 0%."
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        log.error(f"Error deprecating scraper {scraper_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to deprecate scraper: {str(e)}")
+
+
+@router.post(
+    "/scrapers/{scraper_id}/promote",
+    response_model=PromoteResponse,
+    summary="Promote Scraper",
+    description="Promote scraper to stable variant and set as default"
+)
+async def promote_scraper(
+    scraper_id: str,
+    traffic_pct: int = Query(80, description="Traffic percentage to assign (0-100)", ge=0, le=100),
+    db: DatabaseManager = Depends(get_db),
+    registry: ScraperRegistry = Depends(get_registry)
+):
+    """
+    Promote a scraper to stable variant, set as default, and give it majority traffic.
+
+    This operation will:
+    1. Set the scraper's variant to 'stable'
+    2. Set is_default to True
+    3. Set traffic_pct to the specified value (default: 80%)
+    4. Unset is_default on other scrapers of the same job_type
+    5. Redistribute remaining traffic among other active scrapers
+
+    **Use cases:**
+    - Graduating a beta version to production
+    - Making a canary release the new stable version
+    - Switching to a new scraper implementation
+
+    **Parameters:**
+    - `traffic_pct`: Traffic percentage to assign (default: 80%)
+    """
+    try:
+        # Validate UUID format
+        try:
+            UUID(scraper_id)
+        except ValueError:
+            raise HTTPException(status_code=400, detail="Invalid scraper ID format")
+
+        # Check scraper exists
+        scraper = await get_scraper_by_id_from_db(db, scraper_id)
+        if not scraper:
+            raise HTTPException(status_code=404, detail="Scraper not found")
+
+        if scraper.get('deprecated_at'):
+            raise HTTPException(status_code=400, detail="Cannot promote a deprecated scraper")
+
+        # Promote
+        result = await promote_scraper_by_id(db, scraper_id, traffic_pct)
+        if not result:
+            raise HTTPException(status_code=500, detail="Failed to promote scraper")
+
+        # Invalidate registry cache
+        await registry.refresh_cache()
+
+        log.info(f"Promoted scraper {scraper_id} to stable with {traffic_pct}% traffic")
+
+        return PromoteResponse(
+            id=scraper_id,
+            variant='stable',
+            is_default=True,
+            traffic_pct=traffic_pct,
+            message=f"Scraper promoted to stable. Now default with {traffic_pct}% traffic."
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        log.error(f"Error promoting scraper {scraper_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to promote scraper: {str(e)}")
+
+
+@router.get(
+    "/scrapers/{scraper_id}",
+    response_model=ScraperInfoResponse,
+    summary="Get Scraper Details",
+    description="Get detailed information about a specific scraper"
+)
+async def get_scraper_details(
+    scraper_id: str,
+    db: DatabaseManager = Depends(get_db),
+    registry: ScraperRegistry = Depends(get_registry)
+):
+    """
+    Get detailed information about a specific scraper including stats.
+    """
+    try:
+        # Validate UUID format
+        try:
+            UUID(scraper_id)
+        except ValueError:
+            raise HTTPException(status_code=400, detail="Invalid scraper ID format")
+
+        # Get scraper from DB
+        scraper = await get_scraper_by_id_from_db(db, scraper_id)
+        if not scraper:
+            raise HTTPException(status_code=404, detail="Scraper not found")
+
+        # Get stats
+        stats = await get_scraper_stats(db, scraper_id)
+
+        return ScraperInfoResponse(
+            id=str(scraper['id']),
+            job_type=scraper['job_type'],
+            version=scraper['version'],
+            variant=scraper['variant'],
+            is_default=scraper['is_default'],
+            traffic_pct=scraper['traffic_pct'],
+            module_path=scraper['module_path'],
+            function_name=scraper.get('function_name'),
+            deprecated_at=str(scraper['deprecated_at']) if scraper.get('deprecated_at') else None,
+            stats=stats
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        log.error(f"Error getting scraper {scraper_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get scraper: {str(e)}")
--- a/api/routes/dashboard.py
+++ b/api/routes/dashboard.py
@@ -0,0 +1,623 @@
+#!/usr/bin/env python3
+"""
+Dashboard API for ReviewIQ Phase 5.
+
+Provides system-wide analytics and monitoring endpoints:
+- Overview statistics (jobs by status, success rates, durations)
+- Client-level aggregations
+- Problem detection (failures, slow jobs, callback issues)
+- Scraper version performance analysis
+"""
+import json
+import logging
+from datetime import datetime, timedelta
+from typing import Optional, List, Dict, Any
+from enum import Enum
+
+from fastapi import APIRouter, HTTPException, Query, Depends
+from pydantic import BaseModel, Field
+
+from core.database import DatabaseManager
+
+log = logging.getLogger(__name__)
+
+# Create router
+router = APIRouter(prefix="/api/dashboard", tags=["dashboard"])
+
+
+# ==================== Enums ====================
+
+class TimePeriod(str, Enum):
+    """Time period for filtering dashboard data"""
+    HOUR_1 = "1h"
+    HOUR_6 = "6h"
+    HOUR_24 = "24h"
+    DAY_7 = "7d"
+    DAY_30 = "30d"
+
+
+# ==================== Pydantic Response Models ====================
+
+class JobsByStatus(BaseModel):
+    """Job counts grouped by status"""
+    pending: int = 0
+    running: int = 0
+    completed: int = 0
+    failed: int = 0
+    cancelled: int = 0
+    partial: int = 0
+
+
+class OverviewResponse(BaseModel):
+    """System-wide dashboard overview statistics"""
+    period: str = Field(..., description="Time period for the statistics (e.g., '24h')")
+    total_jobs: int = Field(..., description="Total number of jobs in the period")
+    completed_jobs: int = Field(..., description="Number of successfully completed jobs")
+    failed_jobs: int = Field(..., description="Number of failed jobs")
+    running_jobs: int = Field(..., description="Number of currently running jobs")
+    success_rate: float = Field(..., description="Percentage of successful jobs (0-100)")
+    avg_duration_seconds: Optional[float] = Field(None, description="Average job duration in seconds")
+    jobs_by_status: JobsByStatus = Field(..., description="Job counts grouped by status")
+    total_reviews_scraped: int = Field(0, description="Total reviews scraped in the period")
+
+
+class ClientStats(BaseModel):
+    """Job statistics for a single client"""
+    client_id: str = Field(..., description="Client identifier")
+    source: Optional[str] = Field(None, description="Source of the requests (e.g., 'veritasreview.com')")
+    total_jobs: int = Field(..., description="Total jobs submitted by this client")
+    completed: int = Field(..., description="Number of completed jobs")
+    failed: int = Field(..., description="Number of failed jobs")
+    success_rate: float = Field(..., description="Success rate percentage (0-100)")
+    total_reviews: int = Field(0, description="Total reviews scraped for this client")
+
+
+class FailedJob(BaseModel):
+    """Details of a failed job"""
+    job_id: str = Field(..., description="Job UUID")
+    url: str = Field(..., description="URL that was being scraped")
+    error_type: Optional[str] = Field(None, description="Categorized error type")
+    error_message: Optional[str] = Field(None, description="Error message")
+    failed_at: str = Field(..., description="ISO timestamp when the job failed")
+    client_id: Optional[str] = Field(None, description="Client who submitted the job")
+
+
+class SlowJob(BaseModel):
+    """Details of a slow job (taking > 2x average duration)"""
+    job_id: str = Field(..., description="Job UUID")
+    url: str = Field(..., description="URL that was being scraped")
+    duration_seconds: float = Field(..., description="Actual job duration in seconds")
+    avg_duration_seconds: float = Field(..., description="Average duration for comparison")
+    ratio: float = Field(..., description="How many times slower than average")
+    completed_at: str = Field(..., description="ISO timestamp when the job completed")
+
+
+class CallbackFailure(BaseModel):
+    """Details of a failed webhook callback"""
+    job_id: str = Field(..., description="Job UUID")
+    callback_url: str = Field(..., description="Webhook URL that failed")
+    status: str = Field(..., description="Callback status")
+    attempts: int = Field(..., description="Number of delivery attempts")
+    last_error: Optional[str] = Field(None, description="Last error message")
+
+
+class ProblemsResponse(BaseModel):
+    """Recent failures and issues"""
+    failed_jobs: List[FailedJob] = Field(default_factory=list, description="Recent job failures")
+    slow_jobs: List[SlowJob] = Field(default_factory=list, description="Jobs taking > 2x average duration")
+    callback_failures: List[CallbackFailure] = Field(default_factory=list, description="Failed webhook deliveries")
+    total_problems: int = Field(..., description="Total number of problems detected")
+
+
+class VersionStats(BaseModel):
+    """Performance statistics for a scraper version"""
+    version: str = Field(..., description="Scraper version string (e.g., '1.0.0')")
+    variant: Optional[str] = Field(None, description="Scraper variant (e.g., 'stable', 'stealth')")
+    total_jobs: int = Field(..., description="Total jobs run with this version")
+    success_rate: float = Field(..., description="Success rate percentage (0-100)")
+    avg_duration: Optional[float] = Field(None, description="Average job duration in seconds")
+    total_reviews: int = Field(0, description="Total reviews scraped with this version")
+
+
+# ==================== Helper Functions ====================
+
+def get_period_delta(period: TimePeriod) -> timedelta:
+    """Convert period enum to timedelta"""
+    mapping = {
+        TimePeriod.HOUR_1: timedelta(hours=1),
+        TimePeriod.HOUR_6: timedelta(hours=6),
+        TimePeriod.HOUR_24: timedelta(hours=24),
+        TimePeriod.DAY_7: timedelta(days=7),
+        TimePeriod.DAY_30: timedelta(days=30),
+    }
+    return mapping.get(period, timedelta(hours=24))
+
+
+def categorize_error(error_message: Optional[str]) -> str:
+    """Categorize error message into a type"""
+    if not error_message:
+        return "unknown"
+
+    error_lower = error_message.lower()
+
+    if "rate" in error_lower and "limit" in error_lower:
+        return "rate_limited"
+    elif "timeout" in error_lower:
+        return "timeout"
+    elif "captcha" in error_lower or "recaptcha" in error_lower:
+        return "captcha_blocked"
+    elif "bot" in error_lower or "detected" in error_lower:
+        return "bot_detected"
+    elif "network" in error_lower or "connection" in error_lower:
+        return "network_error"
+    elif "element" in error_lower or "selector" in error_lower or "not found" in error_lower:
+        return "selector_failed"
+    elif "navigation" in error_lower or "page" in error_lower:
+        return "navigation_error"
+    elif "browser" in error_lower or "playwright" in error_lower:
+        return "browser_error"
+    else:
+        return "other"
+
+
+# ==================== Database Query Functions ====================
+
+async def get_overview_stats(
+    db: DatabaseManager,
+    period: TimePeriod
+) -> Dict[str, Any]:
+    """
+    Get system-wide job statistics for the specified period.
+    """
+    delta = get_period_delta(period)
+    cutoff = datetime.now() - delta
+
+    async with db.pool.acquire() as conn:
+        # Get job counts by status
+        stats = await conn.fetchrow("""
+            SELECT
+                COUNT(*) as total_jobs,
+                COUNT(*) FILTER (WHERE status = 'pending') as pending,
+                COUNT(*) FILTER (WHERE status = 'running') as running,
+                COUNT(*) FILTER (WHERE status = 'completed') as completed,
+                COUNT(*) FILTER (WHERE status = 'failed') as failed,
+                COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled,
+                COUNT(*) FILTER (WHERE status = 'partial') as partial,
+                AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration,
+                COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
+            FROM jobs
+            WHERE created_at >= $1
+        """, cutoff)
+
+        total = stats['total_jobs'] or 0
+        completed = stats['completed'] or 0
+        failed = stats['failed'] or 0
+
+        # Calculate success rate (only for finished jobs)
+        finished = completed + failed + (stats['partial'] or 0)
+        success_rate = (completed / finished * 100) if finished > 0 else 0.0
+
+        return {
+            'period': period.value,
+            'total_jobs': total,
+            'completed_jobs': completed,
+            'failed_jobs': failed,
+            'running_jobs': stats['running'] or 0,
+            'success_rate': round(success_rate, 1),
+            'avg_duration_seconds': round(stats['avg_duration'], 1) if stats['avg_duration'] else None,
+            'total_reviews_scraped': stats['total_reviews'] or 0,
+            'jobs_by_status': {
+                'pending': stats['pending'] or 0,
+                'running': stats['running'] or 0,
+                'completed': completed,
+                'failed': failed,
+                'cancelled': stats['cancelled'] or 0,
+                'partial': stats['partial'] or 0,
+            }
+        }
+
+
+async def get_stats_by_client(
+    db: DatabaseManager,
+    period: TimePeriod,
+    limit: int = 50
+) -> List[Dict[str, Any]]:
+    """
+    Get job statistics grouped by client.
+    """
+    delta = get_period_delta(period)
+    cutoff = datetime.now() - delta
+
+    async with db.pool.acquire() as conn:
+        rows = await conn.fetch("""
+            SELECT
+                COALESCE(requester_client_id, 'unknown') as client_id,
+                requester_source as source,
+                COUNT(*) as total_jobs,
+                COUNT(*) FILTER (WHERE status = 'completed') as completed,
+                COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed,
+                COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
+            FROM jobs
+            WHERE created_at >= $1
+            GROUP BY requester_client_id, requester_source
+            ORDER BY total_jobs DESC
+            LIMIT $2
+        """, cutoff, limit)
+
+        results = []
+        for row in rows:
+            total = row['total_jobs']
+            completed = row['completed'] or 0
+            failed = row['failed'] or 0
+            finished = completed + failed
+            success_rate = (completed / finished * 100) if finished > 0 else 0.0
+
+            results.append({
+                'client_id': row['client_id'],
+                'source': row['source'],
+                'total_jobs': total,
+                'completed': completed,
+                'failed': failed,
+                'success_rate': round(success_rate, 1),
+                'total_reviews': row['total_reviews'] or 0,
+            })
+
+        return results
+
+
+async def get_problems(
+    db: DatabaseManager,
+    period: TimePeriod,
+    limit: int = 20
+) -> Dict[str, Any]:
+    """
+    Get recent failures and issues.
+    """
+    delta = get_period_delta(period)
+    cutoff = datetime.now() - delta
+
+    async with db.pool.acquire() as conn:
+        # Get failed jobs
+        failed_rows = await conn.fetch("""
+            SELECT
+                job_id,
+                url,
+                error_message,
+                completed_at,
+                requester_client_id
+            FROM jobs
+            WHERE status IN ('failed', 'partial')
+              AND created_at >= $1
+            ORDER BY completed_at DESC
+            LIMIT $2
+        """, cutoff, limit)
+
+        failed_jobs = [
+            {
+                'job_id': str(row['job_id']),
+                'url': row['url'],
+                'error_type': categorize_error(row['error_message']),
+                'error_message': row['error_message'],
+                'failed_at': row['completed_at'].isoformat() if row['completed_at'] else datetime.now().isoformat(),
+                'client_id': row['requester_client_id'],
+            }
+            for row in failed_rows
+        ]
+
+        # Get average duration for slow job detection
+        avg_duration = await conn.fetchval("""
+            SELECT AVG(scrape_time)
+            FROM jobs
+            WHERE status = 'completed'
+              AND scrape_time IS NOT NULL
+              AND created_at >= $1
+        """, cutoff)
+
+        slow_jobs = []
+        if avg_duration and avg_duration > 0:
+            # Find jobs taking > 2x average duration
+            slow_rows = await conn.fetch("""
+                SELECT
+                    job_id,
+                    url,
+                    scrape_time,
+                    completed_at
+                FROM jobs
+                WHERE status = 'completed'
+                  AND scrape_time IS NOT NULL
+                  AND scrape_time > $1 * 2
+                  AND created_at >= $2
+                ORDER BY scrape_time DESC
+                LIMIT $3
+            """, avg_duration, cutoff, limit)
+
+            slow_jobs = [
+                {
+                    'job_id': str(row['job_id']),
+                    'url': row['url'],
+                    'duration_seconds': round(row['scrape_time'], 1),
+                    'avg_duration_seconds': round(avg_duration, 1),
+                    'ratio': round(row['scrape_time'] / avg_duration, 1),
+                    'completed_at': row['completed_at'].isoformat() if row['completed_at'] else datetime.now().isoformat(),
+                }
+                for row in slow_rows
+            ]
+
+        # Get callback failures
+        callback_rows = await conn.fetch("""
+            SELECT
+                job_id,
+                callback_url,
+                callback_status,
+                callback_attempts
+            FROM jobs
+            WHERE callback_url IS NOT NULL
+              AND callback_status = 'failed'
+              AND created_at >= $1
+            ORDER BY completed_at DESC
+            LIMIT $2
+        """, cutoff, limit)
+
+        callback_failures = [
+            {
+                'job_id': str(row['job_id']),
+                'callback_url': row['callback_url'],
+                'status': row['callback_status'] or 'failed',
+                'attempts': row['callback_attempts'] or 0,
+                'last_error': None,  # Would need to query webhook_attempts table
+            }
+            for row in callback_rows
+        ]
+
+        total_problems = len(failed_jobs) + len(slow_jobs) + len(callback_failures)
+
+        return {
+            'failed_jobs': failed_jobs,
+            'slow_jobs': slow_jobs,
+            'callback_failures': callback_failures,
+            'total_problems': total_problems,
+        }
+
+
+async def get_stats_by_version(
+    db: DatabaseManager,
+    period: TimePeriod,
+    limit: int = 20
+) -> List[Dict[str, Any]]:
+    """
+    Get performance statistics grouped by scraper version.
+    """
+    delta = get_period_delta(period)
+    cutoff = datetime.now() - delta
+
+    async with db.pool.acquire() as conn:
+        rows = await conn.fetch("""
+            SELECT
+                COALESCE(scraper_version, 'unknown') as version,
+                scraper_variant as variant,
+                COUNT(*) as total_jobs,
+                COUNT(*) FILTER (WHERE status = 'completed') as completed,
+                COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed,
+                AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration,
+                COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
+            FROM jobs
+            WHERE created_at >= $1
+            GROUP BY scraper_version, scraper_variant
+            ORDER BY total_jobs DESC
+            LIMIT $2
+        """, cutoff, limit)
+
+        results = []
+        for row in rows:
+            completed = row['completed'] or 0
+            failed = row['failed'] or 0
+            finished = completed + failed
+            success_rate = (completed / finished * 100) if finished > 0 else 0.0
+
+            results.append({
+                'version': row['version'],
+                'variant': row['variant'],
+                'total_jobs': row['total_jobs'],
+                'success_rate': round(success_rate, 1),
+                'avg_duration': round(row['avg_duration'], 1) if row['avg_duration'] else None,
+                'total_reviews': row['total_reviews'] or 0,
+            })
+
+        return results
+
+
+# ==================== Dependency Injection ====================
+
+_db: Optional[DatabaseManager] = None
+
+
+def set_database(db: DatabaseManager):
+    """Set the database instance for the router"""
+    global _db
+    _db = db
+
+
+def get_db() -> DatabaseManager:
+    """Dependency to get database instance"""
+    if _db is None:
+        raise HTTPException(status_code=500, detail="Database not initialized")
+    return _db
+
+
+# ==================== API Endpoints ====================
+
+@router.get(
+    "/overview",
+    response_model=OverviewResponse,
+    summary="Get Dashboard Overview",
+    description="Get system-wide job statistics and success rates"
+)
+async def get_overview(
+    period: TimePeriod = Query(
+        TimePeriod.HOUR_24,
+        description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
+    ),
+    db: DatabaseManager = Depends(get_db)
+) -> OverviewResponse:
+    """
+    Get system-wide dashboard statistics.
+
+    Returns aggregate job counts, success rates, and average durations
+    for the specified time period.
+
+    - **period**: Time window to analyze (default: 24h)
+      - 1h: Last hour
+      - 6h: Last 6 hours
+      - 24h: Last 24 hours
+      - 7d: Last 7 days
+      - 30d: Last 30 days
+    """
+    try:
+        stats = await get_overview_stats(db, period)
+
+        return OverviewResponse(
+            period=stats['period'],
+            total_jobs=stats['total_jobs'],
+            completed_jobs=stats['completed_jobs'],
+            failed_jobs=stats['failed_jobs'],
+            running_jobs=stats['running_jobs'],
+            success_rate=stats['success_rate'],
+            avg_duration_seconds=stats['avg_duration_seconds'],
+            jobs_by_status=JobsByStatus(**stats['jobs_by_status']),
+            total_reviews_scraped=stats['total_reviews_scraped'],
+        )
+
+    except Exception as e:
+        log.error(f"Error getting dashboard overview: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get overview: {str(e)}")
+
+
+@router.get(
+    "/by-client",
+    response_model=List[ClientStats],
+    summary="Get Stats by Client",
+    description="Get job statistics grouped by client"
+)
+async def get_by_client(
+    period: TimePeriod = Query(
+        TimePeriod.HOUR_24,
+        description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
+    ),
+    limit: int = Query(50, description="Maximum number of clients to return", ge=1, le=200),
+    db: DatabaseManager = Depends(get_db)
+) -> List[ClientStats]:
+    """
+    Get job statistics grouped by client.
+
+    Returns aggregated statistics for each client including job counts,
+    success rates, and total reviews scraped. Results are ordered by
+    total job count descending.
+
+    - **period**: Time window to analyze (default: 24h)
+    - **limit**: Maximum number of clients to return (default: 50)
+    """
+    try:
+        stats = await get_stats_by_client(db, period, limit)
+
+        return [
+            ClientStats(
+                client_id=s['client_id'],
+                source=s['source'],
+                total_jobs=s['total_jobs'],
+                completed=s['completed'],
+                failed=s['failed'],
+                success_rate=s['success_rate'],
+                total_reviews=s['total_reviews'],
+            )
+            for s in stats
+        ]
+
+    except Exception as e:
+        log.error(f"Error getting client stats: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get client stats: {str(e)}")
+
+
+@router.get(
+    "/problems",
+    response_model=ProblemsResponse,
+    summary="Get Recent Problems",
+    description="Get recent failures, slow jobs, and callback issues"
+)
+async def get_problems_endpoint(
+    period: TimePeriod = Query(
+        TimePeriod.HOUR_24,
+        description="Time period for problems (1h, 6h, 24h, 7d, 30d)"
+    ),
+    limit: int = Query(20, description="Maximum number of items per category", ge=1, le=100),
+    db: DatabaseManager = Depends(get_db)
+) -> ProblemsResponse:
+    """
+    Get recent failures and issues.
+
+    Returns three categories of problems:
+    - **failed_jobs**: Jobs that failed with errors
+    - **slow_jobs**: Jobs that took more than 2x the average duration
+    - **callback_failures**: Webhook deliveries that failed
+
+    Each category includes relevant details for debugging and resolution.
+
+    - **period**: Time window to analyze (default: 24h)
+    - **limit**: Maximum items per category (default: 20)
+    """
+    try:
+        problems = await get_problems(db, period, limit)
+
+        return ProblemsResponse(
+            failed_jobs=[FailedJob(**fj) for fj in problems['failed_jobs']],
+            slow_jobs=[SlowJob(**sj) for sj in problems['slow_jobs']],
+            callback_failures=[CallbackFailure(**cf) for cf in problems['callback_failures']],
+            total_problems=problems['total_problems'],
+        )
+
+    except Exception as e:
+        log.error(f"Error getting problems: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get problems: {str(e)}")
+
+
+@router.get(
+    "/by-version",
+    response_model=List[VersionStats],
+    summary="Get Stats by Scraper Version",
+    description="Get performance statistics grouped by scraper version"
+)
+async def get_by_version(
+    period: TimePeriod = Query(
+        TimePeriod.HOUR_24,
+        description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
+    ),
+    limit: int = Query(20, description="Maximum number of versions to return", ge=1, le=100),
+    db: DatabaseManager = Depends(get_db)
+) -> List[VersionStats]:
+    """
+    Get performance statistics grouped by scraper version.
+
+    Useful for comparing the performance of different scraper versions
+    and variants (e.g., 'stable' vs 'stealth'). Results are ordered by
+    total job count descending.
+
+    - **period**: Time window to analyze (default: 24h)
+    - **limit**: Maximum number of versions to return (default: 20)
+    """
+    try:
+        stats = await get_stats_by_version(db, period, limit)
+
+        return [
+            VersionStats(
+                version=s['version'],
+                variant=s['variant'],
+                total_jobs=s['total_jobs'],
+                success_rate=s['success_rate'],
+                avg_duration=s['avg_duration'],
+                total_reviews=s['total_reviews'],
+            )
+            for s in stats
+        ]
+
+    except Exception as e:
+        log.error(f"Error getting version stats: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get version stats: {str(e)}")