Phases 5-7: Dashboard UI, Admin API, and Auth middleware

Phase 5 - Main Dashboard:
- Dashboard overview page with system health stats
- Jobs by status breakdown, success rates, top clients
- Dashboard API (/api/dashboard/overview, by-client, problems, by-version)

Phase 6 - Admin/Scraper Management:
- Scrapers management page with traffic allocation UI
- Admin API for scraper CRUD operations
- Traffic percentage updates for A/B testing
- Promote/deprecate scraper versions

Phase 7 - Authentication:
- API key authentication middleware
- SHA-256 key hashing (keys never stored in plain text)
- Scope-based authorization (jobs:read, jobs:write, admin)
- Rate limiting per API key

Also:
- Updated api_server_production.py to include new routers
- Extended core/database.py with dashboard query methods
- Added dashboard link to sidebar navigation
- Updated CONTEXT-KEEPER.md to mark all phases complete

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-24 15:43:00 +00:00
parent 788ef84756
commit 39c80fc8be
11 changed files with 3465 additions and 16 deletions

756
api/routes/admin.py Normal file
View File

@@ -0,0 +1,756 @@
#!/usr/bin/env python3
"""
Admin API routes for scraper management.
Phase 6 - ReviewIQ Platform
Provides endpoints for:
- Listing registered scrapers with stats
- Registering new scraper versions
- Updating traffic allocation for A/B testing
- Deprecating scrapers (soft delete)
- Promoting scrapers to stable/default
"""
import json
import logging
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
from uuid import UUID
from fastapi import APIRouter, HTTPException, Query, Depends
from pydantic import BaseModel, Field, validator
from core.database import DatabaseManager
from scrapers.registry import ScraperRegistry
log = logging.getLogger(__name__)
# Create router
router = APIRouter(prefix="/api/admin", tags=["admin"])
# ==================== Pydantic Models ====================
class ScraperStatsModel(BaseModel):
"""Statistics for a scraper over the last 24 hours."""
total_jobs: int = Field(default=0, description="Total jobs processed")
success_rate: float = Field(default=0.0, description="Success rate percentage")
avg_duration: float = Field(default=0.0, description="Average scrape duration in seconds")
class ScraperInfoResponse(BaseModel):
"""Response model for scraper information."""
id: str = Field(..., description="Unique scraper registry ID")
job_type: str = Field(..., description="Type of job this scraper handles")
version: str = Field(..., description="Semantic version string")
variant: str = Field(..., description="Release variant (stable, beta, canary)")
is_default: bool = Field(..., description="Whether this is the default scraper")
traffic_pct: int = Field(..., description="Traffic percentage for A/B testing (0-100)")
module_path: str = Field(..., description="Python module path")
function_name: Optional[str] = Field(None, description="Entry function name")
deprecated_at: Optional[str] = Field(None, description="Deprecation timestamp (ISO format)")
stats: ScraperStatsModel = Field(default_factory=ScraperStatsModel, description="Last 24h stats")
class RegisterScraperRequest(BaseModel):
"""Request model for registering a new scraper."""
job_type: str = Field(..., description="Type of job (e.g., 'google_reviews')")
version: str = Field(..., description="Semantic version string (e.g., '1.1.0')")
variant: str = Field(..., description="Release variant: stable, beta, or canary")
module_path: str = Field(..., description="Python module path")
function_name: str = Field(default="scrape", description="Entry function name")
traffic_pct: int = Field(default=0, description="Initial traffic percentage (0-100)", ge=0, le=100)
min_priority: int = Field(default=0, description="Minimum job priority required")
config: Optional[Dict[str, Any]] = Field(default=None, description="Optional configuration")
@validator('variant')
def validate_variant(cls, v):
if v not in ('stable', 'beta', 'canary'):
raise ValueError("variant must be 'stable', 'beta', or 'canary'")
return v
@validator('version')
def validate_version(cls, v):
# Basic semver validation
parts = v.split('.')
if len(parts) < 2:
raise ValueError("version must be semantic version format (e.g., '1.0.0')")
return v
class RegisterScraperResponse(BaseModel):
"""Response model for scraper registration."""
id: str = Field(..., description="Created scraper registry ID")
job_type: str = Field(..., description="Job type")
version: str = Field(..., description="Version string")
variant: str = Field(..., description="Release variant")
message: str = Field(..., description="Status message")
class UpdateTrafficRequest(BaseModel):
"""Request model for updating traffic percentage."""
traffic_pct: int = Field(..., description="New traffic percentage (0-100)", ge=0, le=100)
class UpdateTrafficResponse(BaseModel):
"""Response model for traffic update."""
id: str = Field(..., description="Scraper registry ID")
traffic_pct: int = Field(..., description="Updated traffic percentage")
message: str = Field(..., description="Status message")
class DeprecateResponse(BaseModel):
"""Response model for deprecation."""
id: str = Field(..., description="Scraper registry ID")
deprecated_at: str = Field(..., description="Deprecation timestamp")
message: str = Field(..., description="Status message")
class PromoteResponse(BaseModel):
"""Response model for promotion."""
id: str = Field(..., description="Scraper registry ID")
variant: str = Field(..., description="New variant (stable)")
is_default: bool = Field(..., description="Whether now default")
traffic_pct: int = Field(..., description="New traffic percentage")
message: str = Field(..., description="Status message")
# ==================== Database Helper Functions ====================
async def get_scraper_stats(
db: DatabaseManager,
scraper_id: str,
hours: int = 24
) -> ScraperStatsModel:
"""
Get statistics for a specific scraper over the given time period.
Args:
db: Database manager instance
scraper_id: UUID of the scraper registry entry
hours: Number of hours to look back (default: 24)
Returns:
ScraperStatsModel with job counts, success rate, and avg duration
"""
try:
async with db.pool.acquire() as conn:
# Query jobs that used this scraper version in the time period
stats = await conn.fetchrow("""
SELECT
COUNT(*) as total_jobs,
COUNT(*) FILTER (WHERE status = 'completed') as completed_jobs,
COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed_jobs,
AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration
FROM jobs
WHERE created_at >= NOW() - INTERVAL '%s hours'
AND (
metadata->>'scraper_id' = $1
OR (scraper_version IS NOT NULL AND EXISTS (
SELECT 1 FROM scraper_registry sr
WHERE sr.id = $2::uuid
AND sr.version = jobs.scraper_version
AND sr.variant = COALESCE(jobs.scraper_variant, sr.variant)
))
)
""", hours, scraper_id, scraper_id)
if not stats or stats['total_jobs'] == 0:
return ScraperStatsModel()
total = stats['total_jobs']
completed = stats['completed_jobs'] or 0
success_rate = (completed / total * 100) if total > 0 else 0.0
avg_duration = float(stats['avg_duration']) if stats['avg_duration'] else 0.0
return ScraperStatsModel(
total_jobs=total,
success_rate=round(success_rate, 2),
avg_duration=round(avg_duration, 2)
)
except Exception as e:
log.warning(f"Error getting scraper stats for {scraper_id}: {e}")
return ScraperStatsModel()
async def get_scraper_by_id_from_db(
db: DatabaseManager,
scraper_id: str
) -> Optional[Dict[str, Any]]:
"""
Get scraper by ID directly from database.
Args:
db: Database manager instance
scraper_id: UUID of the scraper registry entry
Returns:
Scraper dictionary or None if not found
"""
async with db.pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
id,
job_type,
version,
variant,
module_path,
function_name,
is_default,
traffic_pct,
min_priority,
config,
deprecated_at
FROM scraper_registry
WHERE id = $1
""", UUID(scraper_id))
if not row:
return None
return dict(row)
async def update_scraper_traffic(
db: DatabaseManager,
scraper_id: str,
traffic_pct: int
) -> bool:
"""
Update traffic percentage for a scraper.
Args:
db: Database manager instance
scraper_id: UUID of the scraper registry entry
traffic_pct: New traffic percentage (0-100)
Returns:
True if updated, False if not found
"""
async with db.pool.acquire() as conn:
result = await conn.execute("""
UPDATE scraper_registry
SET traffic_pct = $2
WHERE id = $1 AND deprecated_at IS NULL
""", UUID(scraper_id), traffic_pct)
return result.split()[-1] == "1"
async def deprecate_scraper_by_id(
db: DatabaseManager,
scraper_id: str
) -> Optional[str]:
"""
Deprecate a scraper by ID (soft delete).
Args:
db: Database manager instance
scraper_id: UUID of the scraper registry entry
Returns:
Deprecation timestamp as ISO string, or None if not found/already deprecated
"""
async with db.pool.acquire() as conn:
result = await conn.fetchval("""
UPDATE scraper_registry
SET deprecated_at = NOW(), traffic_pct = 0
WHERE id = $1 AND deprecated_at IS NULL
RETURNING deprecated_at
""", UUID(scraper_id))
if result:
return result.isoformat()
return None
async def promote_scraper_by_id(
db: DatabaseManager,
scraper_id: str,
default_traffic_pct: int = 80
) -> Optional[Dict[str, Any]]:
"""
Promote a scraper to stable variant, set as default, and give it majority traffic.
This will:
1. Set the scraper's variant to 'stable'
2. Set is_default to True
3. Set traffic_pct to default_traffic_pct (default: 80%)
4. Unset is_default on other scrapers of the same job_type
5. Reduce traffic_pct on other scrapers proportionally
Args:
db: Database manager instance
scraper_id: UUID of the scraper to promote
default_traffic_pct: Traffic percentage to assign (default: 80)
Returns:
Updated scraper dict or None if not found
"""
async with db.pool.acquire() as conn:
async with conn.transaction():
# Get the scraper to promote
scraper = await conn.fetchrow("""
SELECT id, job_type, version, variant
FROM scraper_registry
WHERE id = $1 AND deprecated_at IS NULL
""", UUID(scraper_id))
if not scraper:
return None
job_type = scraper['job_type']
# Unset is_default on other scrapers of same job_type
await conn.execute("""
UPDATE scraper_registry
SET is_default = FALSE
WHERE job_type = $1 AND id != $2
""", job_type, UUID(scraper_id))
# Reduce traffic on other active scrapers proportionally
# Calculate remaining traffic to distribute
remaining_traffic = 100 - default_traffic_pct
# Get other active scrapers
other_scrapers = await conn.fetch("""
SELECT id, traffic_pct
FROM scraper_registry
WHERE job_type = $1 AND id != $2 AND deprecated_at IS NULL AND traffic_pct > 0
""", job_type, UUID(scraper_id))
if other_scrapers:
total_other_traffic = sum(s['traffic_pct'] for s in other_scrapers)
if total_other_traffic > 0:
for s in other_scrapers:
new_pct = int((s['traffic_pct'] / total_other_traffic) * remaining_traffic)
await conn.execute("""
UPDATE scraper_registry
SET traffic_pct = $2
WHERE id = $1
""", s['id'], new_pct)
# Promote the target scraper
updated = await conn.fetchrow("""
UPDATE scraper_registry
SET
variant = 'stable',
is_default = TRUE,
traffic_pct = $2
WHERE id = $1
RETURNING id, job_type, version, variant, is_default, traffic_pct
""", UUID(scraper_id), default_traffic_pct)
if updated:
return dict(updated)
return None
# ==================== Dependency Injection ====================
_db: Optional[DatabaseManager] = None
_registry: Optional[ScraperRegistry] = None
def set_database(db: DatabaseManager):
"""Set the database instance for the router."""
global _db, _registry
_db = db
_registry = ScraperRegistry(db)
def get_db() -> DatabaseManager:
"""Dependency to get database instance."""
if _db is None:
raise HTTPException(status_code=500, detail="Database not initialized")
return _db
def get_registry() -> ScraperRegistry:
"""Dependency to get scraper registry instance."""
if _registry is None:
raise HTTPException(status_code=500, detail="Scraper registry not initialized")
return _registry
# ==================== API Endpoints ====================
@router.get(
"/scrapers",
response_model=List[ScraperInfoResponse],
summary="List All Scrapers",
description="Get a list of all registered scrapers with their stats"
)
async def list_scrapers(
job_type: Optional[str] = Query(None, description="Filter by job type"),
include_deprecated: bool = Query(False, description="Include deprecated scrapers"),
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
List all registered scrapers with their configuration and stats.
Returns scraper information including:
- Version and variant information
- Traffic allocation percentage
- Whether it's the default scraper
- Last 24h performance stats (total jobs, success rate, avg duration)
Use `job_type` filter to get scrapers for a specific job type.
Set `include_deprecated=true` to include deprecated scrapers.
"""
try:
# Refresh cache to get latest data
await registry.refresh_cache()
# Get all scrapers
scrapers = await registry.list_scrapers(
job_type=job_type,
include_deprecated=include_deprecated
)
# Enrich with stats
result = []
for scraper in scrapers:
stats = await get_scraper_stats(db, scraper['id'])
# Get full scraper info from DB to include job_type
full_info = await get_scraper_by_id_from_db(db, scraper['id'])
result.append(ScraperInfoResponse(
id=scraper['id'],
job_type=full_info['job_type'] if full_info else 'unknown',
version=scraper['version'],
variant=scraper['variant'],
is_default=scraper['is_default'],
traffic_pct=scraper['traffic_pct'],
module_path=scraper['module_path'],
function_name=scraper.get('function_name'),
deprecated_at=str(full_info['deprecated_at']) if full_info and full_info.get('deprecated_at') else None,
stats=stats
))
# Sort by job_type, then by version descending
result.sort(key=lambda x: (x.job_type, x.version), reverse=True)
return result
except Exception as e:
log.error(f"Error listing scrapers: {e}")
raise HTTPException(status_code=500, detail=f"Failed to list scrapers: {str(e)}")
@router.post(
"/scrapers",
response_model=RegisterScraperResponse,
summary="Register New Scraper",
description="Register a new scraper version"
)
async def register_scraper(
request: RegisterScraperRequest,
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
Register a new scraper version in the registry.
This allows adding new scraper implementations that can be used for:
- A/B testing (set traffic_pct to allocate traffic)
- Canary releases (set variant to 'canary' with low traffic_pct)
- Beta testing (set variant to 'beta')
The scraper won't receive any traffic until traffic_pct > 0.
**Parameters:**
- `job_type`: Type of scraping job (e.g., 'google_reviews')
- `version`: Semantic version (e.g., '1.1.0')
- `variant`: Release channel ('stable', 'beta', 'canary')
- `module_path`: Python module path (e.g., 'scrapers.google_reviews.v1_1_0')
- `function_name`: Entry function name (default: 'scrape')
- `traffic_pct`: Initial traffic allocation (0-100, default: 0)
- `config`: Optional configuration dict passed to the scraper
"""
try:
# Check if version already exists for this job_type
existing = await registry.list_scrapers(job_type=request.job_type, include_deprecated=True)
for scraper in existing:
if scraper['version'] == request.version:
raise HTTPException(
status_code=409,
detail=f"Scraper version {request.version} already exists for job_type {request.job_type}"
)
# Register the new scraper
scraper_id = await registry.register_scraper(
job_type=request.job_type,
version=request.version,
variant=request.variant,
module_path=request.module_path,
function_name=request.function_name,
is_default=False, # Never auto-set as default
traffic_pct=request.traffic_pct,
min_priority=request.min_priority,
config=request.config
)
log.info(f"Registered new scraper: {request.job_type} v{request.version} ({request.variant})")
return RegisterScraperResponse(
id=scraper_id,
job_type=request.job_type,
version=request.version,
variant=request.variant,
message=f"Successfully registered scraper {request.job_type} v{request.version} ({request.variant})"
)
except HTTPException:
raise
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
log.error(f"Error registering scraper: {e}")
raise HTTPException(status_code=500, detail=f"Failed to register scraper: {str(e)}")
@router.put(
"/scrapers/{scraper_id}/traffic",
response_model=UpdateTrafficResponse,
summary="Update Traffic Percentage",
description="Update the traffic allocation for a scraper"
)
async def update_traffic(
scraper_id: str,
request: UpdateTrafficRequest,
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
Update the traffic percentage for a specific scraper.
Traffic percentage determines what portion of requests are routed
to this scraper version. Used for:
- Gradual rollouts (start at 10%, increase to 50%, then 100%)
- A/B testing (set two versions to 50% each)
- Canary releases (set new version to 5-10%)
**Note:** Total traffic across all active scrapers of the same
job_type should not exceed 100%. The system uses weighted random
selection, so percentages are relative weights, not exact guarantees.
"""
try:
# Validate UUID format
try:
UUID(scraper_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
# Check scraper exists
scraper = await get_scraper_by_id_from_db(db, scraper_id)
if not scraper:
raise HTTPException(status_code=404, detail="Scraper not found")
if scraper.get('deprecated_at'):
raise HTTPException(status_code=400, detail="Cannot update traffic for deprecated scraper")
# Update traffic
success = await update_scraper_traffic(db, scraper_id, request.traffic_pct)
if not success:
raise HTTPException(status_code=500, detail="Failed to update traffic allocation")
# Invalidate registry cache
await registry.refresh_cache()
log.info(f"Updated traffic for scraper {scraper_id} to {request.traffic_pct}%")
return UpdateTrafficResponse(
id=scraper_id,
traffic_pct=request.traffic_pct,
message=f"Traffic updated to {request.traffic_pct}%"
)
except HTTPException:
raise
except Exception as e:
log.error(f"Error updating traffic for scraper {scraper_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to update traffic: {str(e)}")
@router.post(
"/scrapers/{scraper_id}/deprecate",
response_model=DeprecateResponse,
summary="Deprecate Scraper",
description="Mark a scraper as deprecated (soft delete)"
)
async def deprecate_scraper(
scraper_id: str,
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
Deprecate a scraper version (soft delete).
This will:
- Set deprecated_at timestamp
- Set traffic_pct to 0 (no new requests)
- Keep the scraper in the registry for historical reference
Deprecated scrapers are excluded from normal routing but can
still be explicitly requested by version for debugging.
To permanently remove a scraper, use database admin tools.
"""
try:
# Validate UUID format
try:
UUID(scraper_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
# Check scraper exists
scraper = await get_scraper_by_id_from_db(db, scraper_id)
if not scraper:
raise HTTPException(status_code=404, detail="Scraper not found")
if scraper.get('deprecated_at'):
raise HTTPException(status_code=400, detail="Scraper is already deprecated")
# Deprecate
deprecated_at = await deprecate_scraper_by_id(db, scraper_id)
if not deprecated_at:
raise HTTPException(status_code=500, detail="Failed to deprecate scraper")
# Invalidate registry cache
await registry.refresh_cache()
log.info(f"Deprecated scraper {scraper_id}")
return DeprecateResponse(
id=scraper_id,
deprecated_at=deprecated_at,
message=f"Scraper deprecated. Traffic allocation set to 0%."
)
except HTTPException:
raise
except Exception as e:
log.error(f"Error deprecating scraper {scraper_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to deprecate scraper: {str(e)}")
@router.post(
"/scrapers/{scraper_id}/promote",
response_model=PromoteResponse,
summary="Promote Scraper",
description="Promote scraper to stable variant and set as default"
)
async def promote_scraper(
scraper_id: str,
traffic_pct: int = Query(80, description="Traffic percentage to assign (0-100)", ge=0, le=100),
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
Promote a scraper to stable variant, set as default, and give it majority traffic.
This operation will:
1. Set the scraper's variant to 'stable'
2. Set is_default to True
3. Set traffic_pct to the specified value (default: 80%)
4. Unset is_default on other scrapers of the same job_type
5. Redistribute remaining traffic among other active scrapers
**Use cases:**
- Graduating a beta version to production
- Making a canary release the new stable version
- Switching to a new scraper implementation
**Parameters:**
- `traffic_pct`: Traffic percentage to assign (default: 80%)
"""
try:
# Validate UUID format
try:
UUID(scraper_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
# Check scraper exists
scraper = await get_scraper_by_id_from_db(db, scraper_id)
if not scraper:
raise HTTPException(status_code=404, detail="Scraper not found")
if scraper.get('deprecated_at'):
raise HTTPException(status_code=400, detail="Cannot promote a deprecated scraper")
# Promote
result = await promote_scraper_by_id(db, scraper_id, traffic_pct)
if not result:
raise HTTPException(status_code=500, detail="Failed to promote scraper")
# Invalidate registry cache
await registry.refresh_cache()
log.info(f"Promoted scraper {scraper_id} to stable with {traffic_pct}% traffic")
return PromoteResponse(
id=scraper_id,
variant='stable',
is_default=True,
traffic_pct=traffic_pct,
message=f"Scraper promoted to stable. Now default with {traffic_pct}% traffic."
)
except HTTPException:
raise
except Exception as e:
log.error(f"Error promoting scraper {scraper_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to promote scraper: {str(e)}")
@router.get(
"/scrapers/{scraper_id}",
response_model=ScraperInfoResponse,
summary="Get Scraper Details",
description="Get detailed information about a specific scraper"
)
async def get_scraper_details(
scraper_id: str,
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
Get detailed information about a specific scraper including stats.
"""
try:
# Validate UUID format
try:
UUID(scraper_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
# Get scraper from DB
scraper = await get_scraper_by_id_from_db(db, scraper_id)
if not scraper:
raise HTTPException(status_code=404, detail="Scraper not found")
# Get stats
stats = await get_scraper_stats(db, scraper_id)
return ScraperInfoResponse(
id=str(scraper['id']),
job_type=scraper['job_type'],
version=scraper['version'],
variant=scraper['variant'],
is_default=scraper['is_default'],
traffic_pct=scraper['traffic_pct'],
module_path=scraper['module_path'],
function_name=scraper.get('function_name'),
deprecated_at=str(scraper['deprecated_at']) if scraper.get('deprecated_at') else None,
stats=stats
)
except HTTPException:
raise
except Exception as e:
log.error(f"Error getting scraper {scraper_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get scraper: {str(e)}")