Phases 5-7: Dashboard UI, Admin API, and Auth middleware

Phase 5 - Main Dashboard:
- Dashboard overview page with system health stats
- Jobs by status breakdown, success rates, top clients
- Dashboard API (/api/dashboard/overview, by-client, problems, by-version)

Phase 6 - Admin/Scraper Management:
- Scrapers management page with traffic allocation UI
- Admin API for scraper CRUD operations
- Traffic percentage updates for A/B testing
- Promote/deprecate scraper versions

Phase 7 - Authentication:
- API key authentication middleware
- SHA-256 key hashing (keys never stored in plain text)
- Scope-based authorization (jobs:read, jobs:write, admin)
- Rate limiting per API key

Also:
- Updated api_server_production.py to include new routers
- Extended core/database.py with dashboard query methods
- Added dashboard link to sidebar navigation
- Updated CONTEXT-KEEPER.md to mark all phases complete

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-24 15:43:00 +00:00
parent 788ef84756
commit 39c80fc8be
11 changed files with 3465 additions and 16 deletions

View File

@@ -0,0 +1,22 @@
#!/usr/bin/env python3
"""
API Middleware for ReviewIQ.
This module exports authentication and other middleware components.
"""
from api.middleware.auth import (
APIKeyAuth,
api_key_header,
generate_api_key,
create_auth,
AVAILABLE_SCOPES,
)
__all__ = [
"APIKeyAuth",
"api_key_header",
"generate_api_key",
"create_auth",
"AVAILABLE_SCOPES",
]

326
api/middleware/auth.py Normal file
View File

@@ -0,0 +1,326 @@
#!/usr/bin/env python3
"""
API Key Authentication Middleware for ReviewIQ Phase 7.
Security Model:
- API keys are never stored in plain text
- Only SHA-256 hashes are stored in the database
- First 8 characters (prefix) are stored for identification in logs/UI
- Keys follow format: "riq_" + 32 random alphanumeric characters
Authentication Flow:
1. Client sends API key in X-API-Key header
2. Server hashes the received key with SHA-256
3. Server looks up the hash in api_keys table
4. If found, active, and not expired, request is authenticated
5. Scopes are checked for protected endpoints
"""
import hashlib
import secrets
import string
import logging
from datetime import datetime
from functools import wraps
from typing import Optional, List, Callable
from uuid import UUID
from fastapi import Request, HTTPException, Depends
from fastapi.security import APIKeyHeader
log = logging.getLogger(__name__)
# Security header for API key
api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
# Key format constants
API_KEY_PREFIX = "riq_"
API_KEY_RANDOM_LENGTH = 32
API_KEY_PREFIX_STORE_LENGTH = 8 # First 8 chars stored for identification
def generate_api_key() -> str:
"""
Generate a secure random API key with prefix.
Format: "riq_" + 32 random alphanumeric characters
Example: "riq_a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
Returns:
Secure random API key string
"""
# Use cryptographically secure random generator
alphabet = string.ascii_lowercase + string.digits
random_part = ''.join(secrets.choice(alphabet) for _ in range(API_KEY_RANDOM_LENGTH))
return f"{API_KEY_PREFIX}{random_part}"
class APIKeyAuth:
"""
API Key authentication middleware.
Usage:
from api.middleware.auth import APIKeyAuth
# Initialize with database
auth = APIKeyAuth(db)
# Use as dependency
@app.get("/protected")
async def protected_endpoint(client: dict = Depends(auth.verify_api_key)):
return {"client_id": client["client_id"]}
# Require specific scope
@app.post("/admin-only")
async def admin_endpoint(client: dict = Depends(auth.require_scope("admin"))):
return {"message": "Admin access granted"}
"""
def __init__(self, db):
"""
Initialize API key authentication.
Args:
db: DatabaseManager instance with api key methods
"""
self.db = db
async def verify_api_key(
self,
request: Request,
api_key: Optional[str] = Depends(api_key_header)
) -> dict:
"""
Verify API key and return client info.
This is a FastAPI dependency that validates the X-API-Key header
and returns information about the authenticated client.
Args:
request: FastAPI request object
api_key: API key from X-API-Key header
Returns:
dict: Client information:
{
"client_id": "veritas_123",
"key_id": "uuid-of-key",
"key_prefix": "riq_a1b2",
"name": "Production Key",
"scopes": ["jobs:read", "jobs:write"],
"rate_limit_rpm": 60
}
Raises:
HTTPException 401: If API key is missing or invalid
HTTPException 403: If API key is inactive or expired
"""
if not api_key:
log.warning(f"Missing API key for request: {request.method} {request.url.path}")
raise HTTPException(
status_code=401,
detail="Missing API key. Include X-API-Key header.",
headers={"WWW-Authenticate": "ApiKey"}
)
# Validate key format
if not api_key.startswith(API_KEY_PREFIX):
log.warning(f"Invalid API key format (wrong prefix): {api_key[:8]}...")
raise HTTPException(
status_code=401,
detail="Invalid API key format.",
headers={"WWW-Authenticate": "ApiKey"}
)
# Hash the key for lookup
key_hash = self.hash_api_key(api_key)
# Look up the key in database
key_data = await self.db.get_api_key_by_hash(key_hash)
if not key_data:
# Log only the prefix for security
log.warning(f"Unknown API key attempted: {api_key[:12]}...")
raise HTTPException(
status_code=401,
detail="Invalid API key.",
headers={"WWW-Authenticate": "ApiKey"}
)
# Check if key is active
if not key_data.get('is_active', False):
log.warning(f"Inactive API key used: {key_data['key_prefix']} (client: {key_data['client_id']})")
raise HTTPException(
status_code=403,
detail="API key has been revoked."
)
# Check expiration
expires_at = key_data.get('expires_at')
if expires_at and expires_at < datetime.utcnow():
log.warning(f"Expired API key used: {key_data['key_prefix']} (client: {key_data['client_id']})")
raise HTTPException(
status_code=403,
detail="API key has expired."
)
# Update last_used_at timestamp (fire and forget, don't block request)
try:
await self.db.update_api_key_last_used(key_data['id'])
except Exception as e:
# Don't fail the request if timestamp update fails
log.error(f"Failed to update last_used_at for key {key_data['key_prefix']}: {e}")
# Log successful authentication (at debug level to avoid log spam)
log.debug(f"Authenticated: client={key_data['client_id']} key={key_data['key_prefix']}")
# Return client info
return {
"client_id": key_data['client_id'],
"key_id": str(key_data['id']),
"key_prefix": key_data['key_prefix'],
"name": key_data['name'],
"scopes": key_data.get('scopes', []),
"rate_limit_rpm": key_data.get('rate_limit_rpm', 60)
}
def require_scope(self, scope: str) -> Callable:
"""
Create a dependency that requires a specific scope.
Usage:
@app.post("/jobs")
async def create_job(client: dict = Depends(auth.require_scope("jobs:write"))):
# Only accessible with jobs:write scope
pass
Args:
scope: Required scope string (e.g., "jobs:read", "jobs:write", "admin")
Returns:
FastAPI dependency function that verifies the API key and checks scope
"""
async def scope_dependency(
request: Request,
api_key: Optional[str] = Depends(api_key_header)
) -> dict:
# First verify the API key
client = await self.verify_api_key(request, api_key)
# Check if client has the required scope
client_scopes = client.get('scopes', [])
# Admin scope grants all permissions
if 'admin' in client_scopes:
return client
if scope not in client_scopes:
log.warning(
f"Scope denied: client={client['client_id']} "
f"required={scope} has={client_scopes}"
)
raise HTTPException(
status_code=403,
detail=f"Insufficient permissions. Required scope: {scope}"
)
return client
return scope_dependency
def require_any_scope(self, scopes: List[str]) -> Callable:
"""
Create a dependency that requires any one of the specified scopes.
Usage:
@app.get("/jobs/{job_id}")
async def get_job(client: dict = Depends(auth.require_any_scope(["jobs:read", "jobs:write"]))):
pass
Args:
scopes: List of acceptable scopes (client needs at least one)
Returns:
FastAPI dependency function
"""
async def scope_dependency(
request: Request,
api_key: Optional[str] = Depends(api_key_header)
) -> dict:
client = await self.verify_api_key(request, api_key)
client_scopes = client.get('scopes', [])
# Admin scope grants all permissions
if 'admin' in client_scopes:
return client
# Check if client has any of the required scopes
if not any(s in client_scopes for s in scopes):
log.warning(
f"Scope denied: client={client['client_id']} "
f"required_any={scopes} has={client_scopes}"
)
raise HTTPException(
status_code=403,
detail=f"Insufficient permissions. Required one of: {', '.join(scopes)}"
)
return client
return scope_dependency
@staticmethod
def hash_api_key(api_key: str) -> str:
"""
Hash API key for storage/lookup using SHA-256.
This is a one-way hash - the original key cannot be recovered.
We use SHA-256 for consistency and security.
Args:
api_key: Plain text API key
Returns:
64-character hexadecimal hash string
"""
return hashlib.sha256(api_key.encode('utf-8')).hexdigest()
@staticmethod
def get_key_prefix(api_key: str) -> str:
"""
Extract the identifying prefix from an API key.
This prefix is safe to store and display as it cannot
be used to reconstruct the full key.
Args:
api_key: Plain text API key
Returns:
First 8 characters of the key (e.g., "riq_a1b2")
"""
return api_key[:API_KEY_PREFIX_STORE_LENGTH]
# Convenience function for creating auth instance
def create_auth(db) -> APIKeyAuth:
"""
Factory function to create APIKeyAuth instance.
Args:
db: DatabaseManager instance
Returns:
Configured APIKeyAuth instance
"""
return APIKeyAuth(db)
# Available scopes documentation
AVAILABLE_SCOPES = {
"jobs:read": "Read job status and results",
"jobs:write": "Create and cancel jobs",
"batches:read": "Read batch status and results",
"batches:write": "Create and manage batches",
"webhooks:manage": "Configure webhook endpoints",
"admin": "Full administrative access (includes all other scopes)"
}

View File

@@ -4,8 +4,14 @@ API Routes for ReviewIQ.
This module exports all route modules for easy import into the main server.
"""
from api.routes.batches import router as batches_router, set_database as set_batches_db
from api.routes.dashboard import router as dashboard_router, set_database as set_dashboard_db
from api.routes.admin import router as admin_router, set_database as set_admin_db
__all__ = [
'batches_router',
'set_batches_db',
'dashboard_router',
'set_dashboard_db',
'admin_router',
'set_admin_db',
]

756
api/routes/admin.py Normal file
View File

@@ -0,0 +1,756 @@
#!/usr/bin/env python3
"""
Admin API routes for scraper management.
Phase 6 - ReviewIQ Platform
Provides endpoints for:
- Listing registered scrapers with stats
- Registering new scraper versions
- Updating traffic allocation for A/B testing
- Deprecating scrapers (soft delete)
- Promoting scrapers to stable/default
"""
import json
import logging
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
from uuid import UUID
from fastapi import APIRouter, HTTPException, Query, Depends
from pydantic import BaseModel, Field, validator
from core.database import DatabaseManager
from scrapers.registry import ScraperRegistry
log = logging.getLogger(__name__)
# Create router
router = APIRouter(prefix="/api/admin", tags=["admin"])
# ==================== Pydantic Models ====================
class ScraperStatsModel(BaseModel):
"""Statistics for a scraper over the last 24 hours."""
total_jobs: int = Field(default=0, description="Total jobs processed")
success_rate: float = Field(default=0.0, description="Success rate percentage")
avg_duration: float = Field(default=0.0, description="Average scrape duration in seconds")
class ScraperInfoResponse(BaseModel):
"""Response model for scraper information."""
id: str = Field(..., description="Unique scraper registry ID")
job_type: str = Field(..., description="Type of job this scraper handles")
version: str = Field(..., description="Semantic version string")
variant: str = Field(..., description="Release variant (stable, beta, canary)")
is_default: bool = Field(..., description="Whether this is the default scraper")
traffic_pct: int = Field(..., description="Traffic percentage for A/B testing (0-100)")
module_path: str = Field(..., description="Python module path")
function_name: Optional[str] = Field(None, description="Entry function name")
deprecated_at: Optional[str] = Field(None, description="Deprecation timestamp (ISO format)")
stats: ScraperStatsModel = Field(default_factory=ScraperStatsModel, description="Last 24h stats")
class RegisterScraperRequest(BaseModel):
"""Request model for registering a new scraper."""
job_type: str = Field(..., description="Type of job (e.g., 'google_reviews')")
version: str = Field(..., description="Semantic version string (e.g., '1.1.0')")
variant: str = Field(..., description="Release variant: stable, beta, or canary")
module_path: str = Field(..., description="Python module path")
function_name: str = Field(default="scrape", description="Entry function name")
traffic_pct: int = Field(default=0, description="Initial traffic percentage (0-100)", ge=0, le=100)
min_priority: int = Field(default=0, description="Minimum job priority required")
config: Optional[Dict[str, Any]] = Field(default=None, description="Optional configuration")
@validator('variant')
def validate_variant(cls, v):
if v not in ('stable', 'beta', 'canary'):
raise ValueError("variant must be 'stable', 'beta', or 'canary'")
return v
@validator('version')
def validate_version(cls, v):
# Basic semver validation
parts = v.split('.')
if len(parts) < 2:
raise ValueError("version must be semantic version format (e.g., '1.0.0')")
return v
class RegisterScraperResponse(BaseModel):
"""Response model for scraper registration."""
id: str = Field(..., description="Created scraper registry ID")
job_type: str = Field(..., description="Job type")
version: str = Field(..., description="Version string")
variant: str = Field(..., description="Release variant")
message: str = Field(..., description="Status message")
class UpdateTrafficRequest(BaseModel):
"""Request model for updating traffic percentage."""
traffic_pct: int = Field(..., description="New traffic percentage (0-100)", ge=0, le=100)
class UpdateTrafficResponse(BaseModel):
"""Response model for traffic update."""
id: str = Field(..., description="Scraper registry ID")
traffic_pct: int = Field(..., description="Updated traffic percentage")
message: str = Field(..., description="Status message")
class DeprecateResponse(BaseModel):
"""Response model for deprecation."""
id: str = Field(..., description="Scraper registry ID")
deprecated_at: str = Field(..., description="Deprecation timestamp")
message: str = Field(..., description="Status message")
class PromoteResponse(BaseModel):
"""Response model for promotion."""
id: str = Field(..., description="Scraper registry ID")
variant: str = Field(..., description="New variant (stable)")
is_default: bool = Field(..., description="Whether now default")
traffic_pct: int = Field(..., description="New traffic percentage")
message: str = Field(..., description="Status message")
# ==================== Database Helper Functions ====================
async def get_scraper_stats(
db: DatabaseManager,
scraper_id: str,
hours: int = 24
) -> ScraperStatsModel:
"""
Get statistics for a specific scraper over the given time period.
Args:
db: Database manager instance
scraper_id: UUID of the scraper registry entry
hours: Number of hours to look back (default: 24)
Returns:
ScraperStatsModel with job counts, success rate, and avg duration
"""
try:
async with db.pool.acquire() as conn:
# Query jobs that used this scraper version in the time period
stats = await conn.fetchrow("""
SELECT
COUNT(*) as total_jobs,
COUNT(*) FILTER (WHERE status = 'completed') as completed_jobs,
COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed_jobs,
AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration
FROM jobs
WHERE created_at >= NOW() - INTERVAL '%s hours'
AND (
metadata->>'scraper_id' = $1
OR (scraper_version IS NOT NULL AND EXISTS (
SELECT 1 FROM scraper_registry sr
WHERE sr.id = $2::uuid
AND sr.version = jobs.scraper_version
AND sr.variant = COALESCE(jobs.scraper_variant, sr.variant)
))
)
""", hours, scraper_id, scraper_id)
if not stats or stats['total_jobs'] == 0:
return ScraperStatsModel()
total = stats['total_jobs']
completed = stats['completed_jobs'] or 0
success_rate = (completed / total * 100) if total > 0 else 0.0
avg_duration = float(stats['avg_duration']) if stats['avg_duration'] else 0.0
return ScraperStatsModel(
total_jobs=total,
success_rate=round(success_rate, 2),
avg_duration=round(avg_duration, 2)
)
except Exception as e:
log.warning(f"Error getting scraper stats for {scraper_id}: {e}")
return ScraperStatsModel()
async def get_scraper_by_id_from_db(
db: DatabaseManager,
scraper_id: str
) -> Optional[Dict[str, Any]]:
"""
Get scraper by ID directly from database.
Args:
db: Database manager instance
scraper_id: UUID of the scraper registry entry
Returns:
Scraper dictionary or None if not found
"""
async with db.pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
id,
job_type,
version,
variant,
module_path,
function_name,
is_default,
traffic_pct,
min_priority,
config,
deprecated_at
FROM scraper_registry
WHERE id = $1
""", UUID(scraper_id))
if not row:
return None
return dict(row)
async def update_scraper_traffic(
db: DatabaseManager,
scraper_id: str,
traffic_pct: int
) -> bool:
"""
Update traffic percentage for a scraper.
Args:
db: Database manager instance
scraper_id: UUID of the scraper registry entry
traffic_pct: New traffic percentage (0-100)
Returns:
True if updated, False if not found
"""
async with db.pool.acquire() as conn:
result = await conn.execute("""
UPDATE scraper_registry
SET traffic_pct = $2
WHERE id = $1 AND deprecated_at IS NULL
""", UUID(scraper_id), traffic_pct)
return result.split()[-1] == "1"
async def deprecate_scraper_by_id(
db: DatabaseManager,
scraper_id: str
) -> Optional[str]:
"""
Deprecate a scraper by ID (soft delete).
Args:
db: Database manager instance
scraper_id: UUID of the scraper registry entry
Returns:
Deprecation timestamp as ISO string, or None if not found/already deprecated
"""
async with db.pool.acquire() as conn:
result = await conn.fetchval("""
UPDATE scraper_registry
SET deprecated_at = NOW(), traffic_pct = 0
WHERE id = $1 AND deprecated_at IS NULL
RETURNING deprecated_at
""", UUID(scraper_id))
if result:
return result.isoformat()
return None
async def promote_scraper_by_id(
db: DatabaseManager,
scraper_id: str,
default_traffic_pct: int = 80
) -> Optional[Dict[str, Any]]:
"""
Promote a scraper to stable variant, set as default, and give it majority traffic.
This will:
1. Set the scraper's variant to 'stable'
2. Set is_default to True
3. Set traffic_pct to default_traffic_pct (default: 80%)
4. Unset is_default on other scrapers of the same job_type
5. Reduce traffic_pct on other scrapers proportionally
Args:
db: Database manager instance
scraper_id: UUID of the scraper to promote
default_traffic_pct: Traffic percentage to assign (default: 80)
Returns:
Updated scraper dict or None if not found
"""
async with db.pool.acquire() as conn:
async with conn.transaction():
# Get the scraper to promote
scraper = await conn.fetchrow("""
SELECT id, job_type, version, variant
FROM scraper_registry
WHERE id = $1 AND deprecated_at IS NULL
""", UUID(scraper_id))
if not scraper:
return None
job_type = scraper['job_type']
# Unset is_default on other scrapers of same job_type
await conn.execute("""
UPDATE scraper_registry
SET is_default = FALSE
WHERE job_type = $1 AND id != $2
""", job_type, UUID(scraper_id))
# Reduce traffic on other active scrapers proportionally
# Calculate remaining traffic to distribute
remaining_traffic = 100 - default_traffic_pct
# Get other active scrapers
other_scrapers = await conn.fetch("""
SELECT id, traffic_pct
FROM scraper_registry
WHERE job_type = $1 AND id != $2 AND deprecated_at IS NULL AND traffic_pct > 0
""", job_type, UUID(scraper_id))
if other_scrapers:
total_other_traffic = sum(s['traffic_pct'] for s in other_scrapers)
if total_other_traffic > 0:
for s in other_scrapers:
new_pct = int((s['traffic_pct'] / total_other_traffic) * remaining_traffic)
await conn.execute("""
UPDATE scraper_registry
SET traffic_pct = $2
WHERE id = $1
""", s['id'], new_pct)
# Promote the target scraper
updated = await conn.fetchrow("""
UPDATE scraper_registry
SET
variant = 'stable',
is_default = TRUE,
traffic_pct = $2
WHERE id = $1
RETURNING id, job_type, version, variant, is_default, traffic_pct
""", UUID(scraper_id), default_traffic_pct)
if updated:
return dict(updated)
return None
# ==================== Dependency Injection ====================
_db: Optional[DatabaseManager] = None
_registry: Optional[ScraperRegistry] = None
def set_database(db: DatabaseManager):
"""Set the database instance for the router."""
global _db, _registry
_db = db
_registry = ScraperRegistry(db)
def get_db() -> DatabaseManager:
"""Dependency to get database instance."""
if _db is None:
raise HTTPException(status_code=500, detail="Database not initialized")
return _db
def get_registry() -> ScraperRegistry:
"""Dependency to get scraper registry instance."""
if _registry is None:
raise HTTPException(status_code=500, detail="Scraper registry not initialized")
return _registry
# ==================== API Endpoints ====================
@router.get(
"/scrapers",
response_model=List[ScraperInfoResponse],
summary="List All Scrapers",
description="Get a list of all registered scrapers with their stats"
)
async def list_scrapers(
job_type: Optional[str] = Query(None, description="Filter by job type"),
include_deprecated: bool = Query(False, description="Include deprecated scrapers"),
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
List all registered scrapers with their configuration and stats.
Returns scraper information including:
- Version and variant information
- Traffic allocation percentage
- Whether it's the default scraper
- Last 24h performance stats (total jobs, success rate, avg duration)
Use `job_type` filter to get scrapers for a specific job type.
Set `include_deprecated=true` to include deprecated scrapers.
"""
try:
# Refresh cache to get latest data
await registry.refresh_cache()
# Get all scrapers
scrapers = await registry.list_scrapers(
job_type=job_type,
include_deprecated=include_deprecated
)
# Enrich with stats
result = []
for scraper in scrapers:
stats = await get_scraper_stats(db, scraper['id'])
# Get full scraper info from DB to include job_type
full_info = await get_scraper_by_id_from_db(db, scraper['id'])
result.append(ScraperInfoResponse(
id=scraper['id'],
job_type=full_info['job_type'] if full_info else 'unknown',
version=scraper['version'],
variant=scraper['variant'],
is_default=scraper['is_default'],
traffic_pct=scraper['traffic_pct'],
module_path=scraper['module_path'],
function_name=scraper.get('function_name'),
deprecated_at=str(full_info['deprecated_at']) if full_info and full_info.get('deprecated_at') else None,
stats=stats
))
# Sort by job_type, then by version descending
result.sort(key=lambda x: (x.job_type, x.version), reverse=True)
return result
except Exception as e:
log.error(f"Error listing scrapers: {e}")
raise HTTPException(status_code=500, detail=f"Failed to list scrapers: {str(e)}")
@router.post(
"/scrapers",
response_model=RegisterScraperResponse,
summary="Register New Scraper",
description="Register a new scraper version"
)
async def register_scraper(
request: RegisterScraperRequest,
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
Register a new scraper version in the registry.
This allows adding new scraper implementations that can be used for:
- A/B testing (set traffic_pct to allocate traffic)
- Canary releases (set variant to 'canary' with low traffic_pct)
- Beta testing (set variant to 'beta')
The scraper won't receive any traffic until traffic_pct > 0.
**Parameters:**
- `job_type`: Type of scraping job (e.g., 'google_reviews')
- `version`: Semantic version (e.g., '1.1.0')
- `variant`: Release channel ('stable', 'beta', 'canary')
- `module_path`: Python module path (e.g., 'scrapers.google_reviews.v1_1_0')
- `function_name`: Entry function name (default: 'scrape')
- `traffic_pct`: Initial traffic allocation (0-100, default: 0)
- `config`: Optional configuration dict passed to the scraper
"""
try:
# Check if version already exists for this job_type
existing = await registry.list_scrapers(job_type=request.job_type, include_deprecated=True)
for scraper in existing:
if scraper['version'] == request.version:
raise HTTPException(
status_code=409,
detail=f"Scraper version {request.version} already exists for job_type {request.job_type}"
)
# Register the new scraper
scraper_id = await registry.register_scraper(
job_type=request.job_type,
version=request.version,
variant=request.variant,
module_path=request.module_path,
function_name=request.function_name,
is_default=False, # Never auto-set as default
traffic_pct=request.traffic_pct,
min_priority=request.min_priority,
config=request.config
)
log.info(f"Registered new scraper: {request.job_type} v{request.version} ({request.variant})")
return RegisterScraperResponse(
id=scraper_id,
job_type=request.job_type,
version=request.version,
variant=request.variant,
message=f"Successfully registered scraper {request.job_type} v{request.version} ({request.variant})"
)
except HTTPException:
raise
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
log.error(f"Error registering scraper: {e}")
raise HTTPException(status_code=500, detail=f"Failed to register scraper: {str(e)}")
@router.put(
"/scrapers/{scraper_id}/traffic",
response_model=UpdateTrafficResponse,
summary="Update Traffic Percentage",
description="Update the traffic allocation for a scraper"
)
async def update_traffic(
scraper_id: str,
request: UpdateTrafficRequest,
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
Update the traffic percentage for a specific scraper.
Traffic percentage determines what portion of requests are routed
to this scraper version. Used for:
- Gradual rollouts (start at 10%, increase to 50%, then 100%)
- A/B testing (set two versions to 50% each)
- Canary releases (set new version to 5-10%)
**Note:** Total traffic across all active scrapers of the same
job_type should not exceed 100%. The system uses weighted random
selection, so percentages are relative weights, not exact guarantees.
"""
try:
# Validate UUID format
try:
UUID(scraper_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
# Check scraper exists
scraper = await get_scraper_by_id_from_db(db, scraper_id)
if not scraper:
raise HTTPException(status_code=404, detail="Scraper not found")
if scraper.get('deprecated_at'):
raise HTTPException(status_code=400, detail="Cannot update traffic for deprecated scraper")
# Update traffic
success = await update_scraper_traffic(db, scraper_id, request.traffic_pct)
if not success:
raise HTTPException(status_code=500, detail="Failed to update traffic allocation")
# Invalidate registry cache
await registry.refresh_cache()
log.info(f"Updated traffic for scraper {scraper_id} to {request.traffic_pct}%")
return UpdateTrafficResponse(
id=scraper_id,
traffic_pct=request.traffic_pct,
message=f"Traffic updated to {request.traffic_pct}%"
)
except HTTPException:
raise
except Exception as e:
log.error(f"Error updating traffic for scraper {scraper_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to update traffic: {str(e)}")
@router.post(
"/scrapers/{scraper_id}/deprecate",
response_model=DeprecateResponse,
summary="Deprecate Scraper",
description="Mark a scraper as deprecated (soft delete)"
)
async def deprecate_scraper(
scraper_id: str,
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
Deprecate a scraper version (soft delete).
This will:
- Set deprecated_at timestamp
- Set traffic_pct to 0 (no new requests)
- Keep the scraper in the registry for historical reference
Deprecated scrapers are excluded from normal routing but can
still be explicitly requested by version for debugging.
To permanently remove a scraper, use database admin tools.
"""
try:
# Validate UUID format
try:
UUID(scraper_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
# Check scraper exists
scraper = await get_scraper_by_id_from_db(db, scraper_id)
if not scraper:
raise HTTPException(status_code=404, detail="Scraper not found")
if scraper.get('deprecated_at'):
raise HTTPException(status_code=400, detail="Scraper is already deprecated")
# Deprecate
deprecated_at = await deprecate_scraper_by_id(db, scraper_id)
if not deprecated_at:
raise HTTPException(status_code=500, detail="Failed to deprecate scraper")
# Invalidate registry cache
await registry.refresh_cache()
log.info(f"Deprecated scraper {scraper_id}")
return DeprecateResponse(
id=scraper_id,
deprecated_at=deprecated_at,
message=f"Scraper deprecated. Traffic allocation set to 0%."
)
except HTTPException:
raise
except Exception as e:
log.error(f"Error deprecating scraper {scraper_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to deprecate scraper: {str(e)}")
@router.post(
"/scrapers/{scraper_id}/promote",
response_model=PromoteResponse,
summary="Promote Scraper",
description="Promote scraper to stable variant and set as default"
)
async def promote_scraper(
scraper_id: str,
traffic_pct: int = Query(80, description="Traffic percentage to assign (0-100)", ge=0, le=100),
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
Promote a scraper to stable variant, set as default, and give it majority traffic.
This operation will:
1. Set the scraper's variant to 'stable'
2. Set is_default to True
3. Set traffic_pct to the specified value (default: 80%)
4. Unset is_default on other scrapers of the same job_type
5. Redistribute remaining traffic among other active scrapers
**Use cases:**
- Graduating a beta version to production
- Making a canary release the new stable version
- Switching to a new scraper implementation
**Parameters:**
- `traffic_pct`: Traffic percentage to assign (default: 80%)
"""
try:
# Validate UUID format
try:
UUID(scraper_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
# Check scraper exists
scraper = await get_scraper_by_id_from_db(db, scraper_id)
if not scraper:
raise HTTPException(status_code=404, detail="Scraper not found")
if scraper.get('deprecated_at'):
raise HTTPException(status_code=400, detail="Cannot promote a deprecated scraper")
# Promote
result = await promote_scraper_by_id(db, scraper_id, traffic_pct)
if not result:
raise HTTPException(status_code=500, detail="Failed to promote scraper")
# Invalidate registry cache
await registry.refresh_cache()
log.info(f"Promoted scraper {scraper_id} to stable with {traffic_pct}% traffic")
return PromoteResponse(
id=scraper_id,
variant='stable',
is_default=True,
traffic_pct=traffic_pct,
message=f"Scraper promoted to stable. Now default with {traffic_pct}% traffic."
)
except HTTPException:
raise
except Exception as e:
log.error(f"Error promoting scraper {scraper_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to promote scraper: {str(e)}")
@router.get(
"/scrapers/{scraper_id}",
response_model=ScraperInfoResponse,
summary="Get Scraper Details",
description="Get detailed information about a specific scraper"
)
async def get_scraper_details(
scraper_id: str,
db: DatabaseManager = Depends(get_db),
registry: ScraperRegistry = Depends(get_registry)
):
"""
Get detailed information about a specific scraper including stats.
"""
try:
# Validate UUID format
try:
UUID(scraper_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
# Get scraper from DB
scraper = await get_scraper_by_id_from_db(db, scraper_id)
if not scraper:
raise HTTPException(status_code=404, detail="Scraper not found")
# Get stats
stats = await get_scraper_stats(db, scraper_id)
return ScraperInfoResponse(
id=str(scraper['id']),
job_type=scraper['job_type'],
version=scraper['version'],
variant=scraper['variant'],
is_default=scraper['is_default'],
traffic_pct=scraper['traffic_pct'],
module_path=scraper['module_path'],
function_name=scraper.get('function_name'),
deprecated_at=str(scraper['deprecated_at']) if scraper.get('deprecated_at') else None,
stats=stats
)
except HTTPException:
raise
except Exception as e:
log.error(f"Error getting scraper {scraper_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get scraper: {str(e)}")

623
api/routes/dashboard.py Normal file
View File

@@ -0,0 +1,623 @@
#!/usr/bin/env python3
"""
Dashboard API for ReviewIQ Phase 5.
Provides system-wide analytics and monitoring endpoints:
- Overview statistics (jobs by status, success rates, durations)
- Client-level aggregations
- Problem detection (failures, slow jobs, callback issues)
- Scraper version performance analysis
"""
import json
import logging
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
from enum import Enum
from fastapi import APIRouter, HTTPException, Query, Depends
from pydantic import BaseModel, Field
from core.database import DatabaseManager
log = logging.getLogger(__name__)
# Create router
router = APIRouter(prefix="/api/dashboard", tags=["dashboard"])
# ==================== Enums ====================
class TimePeriod(str, Enum):
"""Time period for filtering dashboard data"""
HOUR_1 = "1h"
HOUR_6 = "6h"
HOUR_24 = "24h"
DAY_7 = "7d"
DAY_30 = "30d"
# ==================== Pydantic Response Models ====================
class JobsByStatus(BaseModel):
"""Job counts grouped by status"""
pending: int = 0
running: int = 0
completed: int = 0
failed: int = 0
cancelled: int = 0
partial: int = 0
class OverviewResponse(BaseModel):
"""System-wide dashboard overview statistics"""
period: str = Field(..., description="Time period for the statistics (e.g., '24h')")
total_jobs: int = Field(..., description="Total number of jobs in the period")
completed_jobs: int = Field(..., description="Number of successfully completed jobs")
failed_jobs: int = Field(..., description="Number of failed jobs")
running_jobs: int = Field(..., description="Number of currently running jobs")
success_rate: float = Field(..., description="Percentage of successful jobs (0-100)")
avg_duration_seconds: Optional[float] = Field(None, description="Average job duration in seconds")
jobs_by_status: JobsByStatus = Field(..., description="Job counts grouped by status")
total_reviews_scraped: int = Field(0, description="Total reviews scraped in the period")
class ClientStats(BaseModel):
"""Job statistics for a single client"""
client_id: str = Field(..., description="Client identifier")
source: Optional[str] = Field(None, description="Source of the requests (e.g., 'veritasreview.com')")
total_jobs: int = Field(..., description="Total jobs submitted by this client")
completed: int = Field(..., description="Number of completed jobs")
failed: int = Field(..., description="Number of failed jobs")
success_rate: float = Field(..., description="Success rate percentage (0-100)")
total_reviews: int = Field(0, description="Total reviews scraped for this client")
class FailedJob(BaseModel):
"""Details of a failed job"""
job_id: str = Field(..., description="Job UUID")
url: str = Field(..., description="URL that was being scraped")
error_type: Optional[str] = Field(None, description="Categorized error type")
error_message: Optional[str] = Field(None, description="Error message")
failed_at: str = Field(..., description="ISO timestamp when the job failed")
client_id: Optional[str] = Field(None, description="Client who submitted the job")
class SlowJob(BaseModel):
"""Details of a slow job (taking > 2x average duration)"""
job_id: str = Field(..., description="Job UUID")
url: str = Field(..., description="URL that was being scraped")
duration_seconds: float = Field(..., description="Actual job duration in seconds")
avg_duration_seconds: float = Field(..., description="Average duration for comparison")
ratio: float = Field(..., description="How many times slower than average")
completed_at: str = Field(..., description="ISO timestamp when the job completed")
class CallbackFailure(BaseModel):
"""Details of a failed webhook callback"""
job_id: str = Field(..., description="Job UUID")
callback_url: str = Field(..., description="Webhook URL that failed")
status: str = Field(..., description="Callback status")
attempts: int = Field(..., description="Number of delivery attempts")
last_error: Optional[str] = Field(None, description="Last error message")
class ProblemsResponse(BaseModel):
"""Recent failures and issues"""
failed_jobs: List[FailedJob] = Field(default_factory=list, description="Recent job failures")
slow_jobs: List[SlowJob] = Field(default_factory=list, description="Jobs taking > 2x average duration")
callback_failures: List[CallbackFailure] = Field(default_factory=list, description="Failed webhook deliveries")
total_problems: int = Field(..., description="Total number of problems detected")
class VersionStats(BaseModel):
"""Performance statistics for a scraper version"""
version: str = Field(..., description="Scraper version string (e.g., '1.0.0')")
variant: Optional[str] = Field(None, description="Scraper variant (e.g., 'stable', 'stealth')")
total_jobs: int = Field(..., description="Total jobs run with this version")
success_rate: float = Field(..., description="Success rate percentage (0-100)")
avg_duration: Optional[float] = Field(None, description="Average job duration in seconds")
total_reviews: int = Field(0, description="Total reviews scraped with this version")
# ==================== Helper Functions ====================
def get_period_delta(period: TimePeriod) -> timedelta:
"""Convert period enum to timedelta"""
mapping = {
TimePeriod.HOUR_1: timedelta(hours=1),
TimePeriod.HOUR_6: timedelta(hours=6),
TimePeriod.HOUR_24: timedelta(hours=24),
TimePeriod.DAY_7: timedelta(days=7),
TimePeriod.DAY_30: timedelta(days=30),
}
return mapping.get(period, timedelta(hours=24))
def categorize_error(error_message: Optional[str]) -> str:
"""Categorize error message into a type"""
if not error_message:
return "unknown"
error_lower = error_message.lower()
if "rate" in error_lower and "limit" in error_lower:
return "rate_limited"
elif "timeout" in error_lower:
return "timeout"
elif "captcha" in error_lower or "recaptcha" in error_lower:
return "captcha_blocked"
elif "bot" in error_lower or "detected" in error_lower:
return "bot_detected"
elif "network" in error_lower or "connection" in error_lower:
return "network_error"
elif "element" in error_lower or "selector" in error_lower or "not found" in error_lower:
return "selector_failed"
elif "navigation" in error_lower or "page" in error_lower:
return "navigation_error"
elif "browser" in error_lower or "playwright" in error_lower:
return "browser_error"
else:
return "other"
# ==================== Database Query Functions ====================
async def get_overview_stats(
db: DatabaseManager,
period: TimePeriod
) -> Dict[str, Any]:
"""
Get system-wide job statistics for the specified period.
"""
delta = get_period_delta(period)
cutoff = datetime.now() - delta
async with db.pool.acquire() as conn:
# Get job counts by status
stats = await conn.fetchrow("""
SELECT
COUNT(*) as total_jobs,
COUNT(*) FILTER (WHERE status = 'pending') as pending,
COUNT(*) FILTER (WHERE status = 'running') as running,
COUNT(*) FILTER (WHERE status = 'completed') as completed,
COUNT(*) FILTER (WHERE status = 'failed') as failed,
COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled,
COUNT(*) FILTER (WHERE status = 'partial') as partial,
AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration,
COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
FROM jobs
WHERE created_at >= $1
""", cutoff)
total = stats['total_jobs'] or 0
completed = stats['completed'] or 0
failed = stats['failed'] or 0
# Calculate success rate (only for finished jobs)
finished = completed + failed + (stats['partial'] or 0)
success_rate = (completed / finished * 100) if finished > 0 else 0.0
return {
'period': period.value,
'total_jobs': total,
'completed_jobs': completed,
'failed_jobs': failed,
'running_jobs': stats['running'] or 0,
'success_rate': round(success_rate, 1),
'avg_duration_seconds': round(stats['avg_duration'], 1) if stats['avg_duration'] else None,
'total_reviews_scraped': stats['total_reviews'] or 0,
'jobs_by_status': {
'pending': stats['pending'] or 0,
'running': stats['running'] or 0,
'completed': completed,
'failed': failed,
'cancelled': stats['cancelled'] or 0,
'partial': stats['partial'] or 0,
}
}
async def get_stats_by_client(
db: DatabaseManager,
period: TimePeriod,
limit: int = 50
) -> List[Dict[str, Any]]:
"""
Get job statistics grouped by client.
"""
delta = get_period_delta(period)
cutoff = datetime.now() - delta
async with db.pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
COALESCE(requester_client_id, 'unknown') as client_id,
requester_source as source,
COUNT(*) as total_jobs,
COUNT(*) FILTER (WHERE status = 'completed') as completed,
COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed,
COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
FROM jobs
WHERE created_at >= $1
GROUP BY requester_client_id, requester_source
ORDER BY total_jobs DESC
LIMIT $2
""", cutoff, limit)
results = []
for row in rows:
total = row['total_jobs']
completed = row['completed'] or 0
failed = row['failed'] or 0
finished = completed + failed
success_rate = (completed / finished * 100) if finished > 0 else 0.0
results.append({
'client_id': row['client_id'],
'source': row['source'],
'total_jobs': total,
'completed': completed,
'failed': failed,
'success_rate': round(success_rate, 1),
'total_reviews': row['total_reviews'] or 0,
})
return results
async def get_problems(
db: DatabaseManager,
period: TimePeriod,
limit: int = 20
) -> Dict[str, Any]:
"""
Get recent failures and issues.
"""
delta = get_period_delta(period)
cutoff = datetime.now() - delta
async with db.pool.acquire() as conn:
# Get failed jobs
failed_rows = await conn.fetch("""
SELECT
job_id,
url,
error_message,
completed_at,
requester_client_id
FROM jobs
WHERE status IN ('failed', 'partial')
AND created_at >= $1
ORDER BY completed_at DESC
LIMIT $2
""", cutoff, limit)
failed_jobs = [
{
'job_id': str(row['job_id']),
'url': row['url'],
'error_type': categorize_error(row['error_message']),
'error_message': row['error_message'],
'failed_at': row['completed_at'].isoformat() if row['completed_at'] else datetime.now().isoformat(),
'client_id': row['requester_client_id'],
}
for row in failed_rows
]
# Get average duration for slow job detection
avg_duration = await conn.fetchval("""
SELECT AVG(scrape_time)
FROM jobs
WHERE status = 'completed'
AND scrape_time IS NOT NULL
AND created_at >= $1
""", cutoff)
slow_jobs = []
if avg_duration and avg_duration > 0:
# Find jobs taking > 2x average duration
slow_rows = await conn.fetch("""
SELECT
job_id,
url,
scrape_time,
completed_at
FROM jobs
WHERE status = 'completed'
AND scrape_time IS NOT NULL
AND scrape_time > $1 * 2
AND created_at >= $2
ORDER BY scrape_time DESC
LIMIT $3
""", avg_duration, cutoff, limit)
slow_jobs = [
{
'job_id': str(row['job_id']),
'url': row['url'],
'duration_seconds': round(row['scrape_time'], 1),
'avg_duration_seconds': round(avg_duration, 1),
'ratio': round(row['scrape_time'] / avg_duration, 1),
'completed_at': row['completed_at'].isoformat() if row['completed_at'] else datetime.now().isoformat(),
}
for row in slow_rows
]
# Get callback failures
callback_rows = await conn.fetch("""
SELECT
job_id,
callback_url,
callback_status,
callback_attempts
FROM jobs
WHERE callback_url IS NOT NULL
AND callback_status = 'failed'
AND created_at >= $1
ORDER BY completed_at DESC
LIMIT $2
""", cutoff, limit)
callback_failures = [
{
'job_id': str(row['job_id']),
'callback_url': row['callback_url'],
'status': row['callback_status'] or 'failed',
'attempts': row['callback_attempts'] or 0,
'last_error': None, # Would need to query webhook_attempts table
}
for row in callback_rows
]
total_problems = len(failed_jobs) + len(slow_jobs) + len(callback_failures)
return {
'failed_jobs': failed_jobs,
'slow_jobs': slow_jobs,
'callback_failures': callback_failures,
'total_problems': total_problems,
}
async def get_stats_by_version(
db: DatabaseManager,
period: TimePeriod,
limit: int = 20
) -> List[Dict[str, Any]]:
"""
Get performance statistics grouped by scraper version.
"""
delta = get_period_delta(period)
cutoff = datetime.now() - delta
async with db.pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
COALESCE(scraper_version, 'unknown') as version,
scraper_variant as variant,
COUNT(*) as total_jobs,
COUNT(*) FILTER (WHERE status = 'completed') as completed,
COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed,
AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration,
COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
FROM jobs
WHERE created_at >= $1
GROUP BY scraper_version, scraper_variant
ORDER BY total_jobs DESC
LIMIT $2
""", cutoff, limit)
results = []
for row in rows:
completed = row['completed'] or 0
failed = row['failed'] or 0
finished = completed + failed
success_rate = (completed / finished * 100) if finished > 0 else 0.0
results.append({
'version': row['version'],
'variant': row['variant'],
'total_jobs': row['total_jobs'],
'success_rate': round(success_rate, 1),
'avg_duration': round(row['avg_duration'], 1) if row['avg_duration'] else None,
'total_reviews': row['total_reviews'] or 0,
})
return results
# ==================== Dependency Injection ====================
_db: Optional[DatabaseManager] = None
def set_database(db: DatabaseManager):
"""Set the database instance for the router"""
global _db
_db = db
def get_db() -> DatabaseManager:
"""Dependency to get database instance"""
if _db is None:
raise HTTPException(status_code=500, detail="Database not initialized")
return _db
# ==================== API Endpoints ====================
@router.get(
"/overview",
response_model=OverviewResponse,
summary="Get Dashboard Overview",
description="Get system-wide job statistics and success rates"
)
async def get_overview(
period: TimePeriod = Query(
TimePeriod.HOUR_24,
description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
),
db: DatabaseManager = Depends(get_db)
) -> OverviewResponse:
"""
Get system-wide dashboard statistics.
Returns aggregate job counts, success rates, and average durations
for the specified time period.
- **period**: Time window to analyze (default: 24h)
- 1h: Last hour
- 6h: Last 6 hours
- 24h: Last 24 hours
- 7d: Last 7 days
- 30d: Last 30 days
"""
try:
stats = await get_overview_stats(db, period)
return OverviewResponse(
period=stats['period'],
total_jobs=stats['total_jobs'],
completed_jobs=stats['completed_jobs'],
failed_jobs=stats['failed_jobs'],
running_jobs=stats['running_jobs'],
success_rate=stats['success_rate'],
avg_duration_seconds=stats['avg_duration_seconds'],
jobs_by_status=JobsByStatus(**stats['jobs_by_status']),
total_reviews_scraped=stats['total_reviews_scraped'],
)
except Exception as e:
log.error(f"Error getting dashboard overview: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get overview: {str(e)}")
@router.get(
"/by-client",
response_model=List[ClientStats],
summary="Get Stats by Client",
description="Get job statistics grouped by client"
)
async def get_by_client(
period: TimePeriod = Query(
TimePeriod.HOUR_24,
description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
),
limit: int = Query(50, description="Maximum number of clients to return", ge=1, le=200),
db: DatabaseManager = Depends(get_db)
) -> List[ClientStats]:
"""
Get job statistics grouped by client.
Returns aggregated statistics for each client including job counts,
success rates, and total reviews scraped. Results are ordered by
total job count descending.
- **period**: Time window to analyze (default: 24h)
- **limit**: Maximum number of clients to return (default: 50)
"""
try:
stats = await get_stats_by_client(db, period, limit)
return [
ClientStats(
client_id=s['client_id'],
source=s['source'],
total_jobs=s['total_jobs'],
completed=s['completed'],
failed=s['failed'],
success_rate=s['success_rate'],
total_reviews=s['total_reviews'],
)
for s in stats
]
except Exception as e:
log.error(f"Error getting client stats: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get client stats: {str(e)}")
@router.get(
"/problems",
response_model=ProblemsResponse,
summary="Get Recent Problems",
description="Get recent failures, slow jobs, and callback issues"
)
async def get_problems_endpoint(
period: TimePeriod = Query(
TimePeriod.HOUR_24,
description="Time period for problems (1h, 6h, 24h, 7d, 30d)"
),
limit: int = Query(20, description="Maximum number of items per category", ge=1, le=100),
db: DatabaseManager = Depends(get_db)
) -> ProblemsResponse:
"""
Get recent failures and issues.
Returns three categories of problems:
- **failed_jobs**: Jobs that failed with errors
- **slow_jobs**: Jobs that took more than 2x the average duration
- **callback_failures**: Webhook deliveries that failed
Each category includes relevant details for debugging and resolution.
- **period**: Time window to analyze (default: 24h)
- **limit**: Maximum items per category (default: 20)
"""
try:
problems = await get_problems(db, period, limit)
return ProblemsResponse(
failed_jobs=[FailedJob(**fj) for fj in problems['failed_jobs']],
slow_jobs=[SlowJob(**sj) for sj in problems['slow_jobs']],
callback_failures=[CallbackFailure(**cf) for cf in problems['callback_failures']],
total_problems=problems['total_problems'],
)
except Exception as e:
log.error(f"Error getting problems: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get problems: {str(e)}")
@router.get(
"/by-version",
response_model=List[VersionStats],
summary="Get Stats by Scraper Version",
description="Get performance statistics grouped by scraper version"
)
async def get_by_version(
period: TimePeriod = Query(
TimePeriod.HOUR_24,
description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
),
limit: int = Query(20, description="Maximum number of versions to return", ge=1, le=100),
db: DatabaseManager = Depends(get_db)
) -> List[VersionStats]:
"""
Get performance statistics grouped by scraper version.
Useful for comparing the performance of different scraper versions
and variants (e.g., 'stable' vs 'stealth'). Results are ordered by
total job count descending.
- **period**: Time window to analyze (default: 24h)
- **limit**: Maximum number of versions to return (default: 20)
"""
try:
stats = await get_stats_by_version(db, period, limit)
return [
VersionStats(
version=s['version'],
variant=s['variant'],
total_jobs=s['total_jobs'],
success_rate=s['success_rate'],
avg_duration=s['avg_duration'],
total_reviews=s['total_reviews'],
)
for s in stats
]
except Exception as e:
log.error(f"Error getting version stats: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get version stats: {str(e)}")