Phases 5-7: Dashboard UI, Admin API, and Auth middleware
Phase 5 - Main Dashboard: - Dashboard overview page with system health stats - Jobs by status breakdown, success rates, top clients - Dashboard API (/api/dashboard/overview, by-client, problems, by-version) Phase 6 - Admin/Scraper Management: - Scrapers management page with traffic allocation UI - Admin API for scraper CRUD operations - Traffic percentage updates for A/B testing - Promote/deprecate scraper versions Phase 7 - Authentication: - API key authentication middleware - SHA-256 key hashing (keys never stored in plain text) - Scope-based authorization (jobs:read, jobs:write, admin) - Rate limiting per API key Also: - Updated api_server_production.py to include new routers - Extended core/database.py with dashboard query methods - Added dashboard link to sidebar navigation - Updated CONTEXT-KEEPER.md to mark all phases complete Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
API Middleware for ReviewIQ.
|
||||
|
||||
This module exports authentication and other middleware components.
|
||||
"""
|
||||
|
||||
from api.middleware.auth import (
|
||||
APIKeyAuth,
|
||||
api_key_header,
|
||||
generate_api_key,
|
||||
create_auth,
|
||||
AVAILABLE_SCOPES,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"APIKeyAuth",
|
||||
"api_key_header",
|
||||
"generate_api_key",
|
||||
"create_auth",
|
||||
"AVAILABLE_SCOPES",
|
||||
]
|
||||
|
||||
326
api/middleware/auth.py
Normal file
326
api/middleware/auth.py
Normal file
@@ -0,0 +1,326 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
API Key Authentication Middleware for ReviewIQ Phase 7.
|
||||
|
||||
Security Model:
|
||||
- API keys are never stored in plain text
|
||||
- Only SHA-256 hashes are stored in the database
|
||||
- First 8 characters (prefix) are stored for identification in logs/UI
|
||||
- Keys follow format: "riq_" + 32 random alphanumeric characters
|
||||
|
||||
Authentication Flow:
|
||||
1. Client sends API key in X-API-Key header
|
||||
2. Server hashes the received key with SHA-256
|
||||
3. Server looks up the hash in api_keys table
|
||||
4. If found, active, and not expired, request is authenticated
|
||||
5. Scopes are checked for protected endpoints
|
||||
"""
|
||||
import hashlib
|
||||
import secrets
|
||||
import string
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from functools import wraps
|
||||
from typing import Optional, List, Callable
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import Request, HTTPException, Depends
|
||||
from fastapi.security import APIKeyHeader
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Security header for API key
|
||||
api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
|
||||
|
||||
# Key format constants
|
||||
API_KEY_PREFIX = "riq_"
|
||||
API_KEY_RANDOM_LENGTH = 32
|
||||
API_KEY_PREFIX_STORE_LENGTH = 8 # First 8 chars stored for identification
|
||||
|
||||
|
||||
def generate_api_key() -> str:
|
||||
"""
|
||||
Generate a secure random API key with prefix.
|
||||
|
||||
Format: "riq_" + 32 random alphanumeric characters
|
||||
Example: "riq_a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
|
||||
|
||||
Returns:
|
||||
Secure random API key string
|
||||
"""
|
||||
# Use cryptographically secure random generator
|
||||
alphabet = string.ascii_lowercase + string.digits
|
||||
random_part = ''.join(secrets.choice(alphabet) for _ in range(API_KEY_RANDOM_LENGTH))
|
||||
return f"{API_KEY_PREFIX}{random_part}"
|
||||
|
||||
|
||||
class APIKeyAuth:
|
||||
"""
|
||||
API Key authentication middleware.
|
||||
|
||||
Usage:
|
||||
from api.middleware.auth import APIKeyAuth
|
||||
|
||||
# Initialize with database
|
||||
auth = APIKeyAuth(db)
|
||||
|
||||
# Use as dependency
|
||||
@app.get("/protected")
|
||||
async def protected_endpoint(client: dict = Depends(auth.verify_api_key)):
|
||||
return {"client_id": client["client_id"]}
|
||||
|
||||
# Require specific scope
|
||||
@app.post("/admin-only")
|
||||
async def admin_endpoint(client: dict = Depends(auth.require_scope("admin"))):
|
||||
return {"message": "Admin access granted"}
|
||||
"""
|
||||
|
||||
def __init__(self, db):
|
||||
"""
|
||||
Initialize API key authentication.
|
||||
|
||||
Args:
|
||||
db: DatabaseManager instance with api key methods
|
||||
"""
|
||||
self.db = db
|
||||
|
||||
async def verify_api_key(
|
||||
self,
|
||||
request: Request,
|
||||
api_key: Optional[str] = Depends(api_key_header)
|
||||
) -> dict:
|
||||
"""
|
||||
Verify API key and return client info.
|
||||
|
||||
This is a FastAPI dependency that validates the X-API-Key header
|
||||
and returns information about the authenticated client.
|
||||
|
||||
Args:
|
||||
request: FastAPI request object
|
||||
api_key: API key from X-API-Key header
|
||||
|
||||
Returns:
|
||||
dict: Client information:
|
||||
{
|
||||
"client_id": "veritas_123",
|
||||
"key_id": "uuid-of-key",
|
||||
"key_prefix": "riq_a1b2",
|
||||
"name": "Production Key",
|
||||
"scopes": ["jobs:read", "jobs:write"],
|
||||
"rate_limit_rpm": 60
|
||||
}
|
||||
|
||||
Raises:
|
||||
HTTPException 401: If API key is missing or invalid
|
||||
HTTPException 403: If API key is inactive or expired
|
||||
"""
|
||||
if not api_key:
|
||||
log.warning(f"Missing API key for request: {request.method} {request.url.path}")
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Missing API key. Include X-API-Key header.",
|
||||
headers={"WWW-Authenticate": "ApiKey"}
|
||||
)
|
||||
|
||||
# Validate key format
|
||||
if not api_key.startswith(API_KEY_PREFIX):
|
||||
log.warning(f"Invalid API key format (wrong prefix): {api_key[:8]}...")
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Invalid API key format.",
|
||||
headers={"WWW-Authenticate": "ApiKey"}
|
||||
)
|
||||
|
||||
# Hash the key for lookup
|
||||
key_hash = self.hash_api_key(api_key)
|
||||
|
||||
# Look up the key in database
|
||||
key_data = await self.db.get_api_key_by_hash(key_hash)
|
||||
|
||||
if not key_data:
|
||||
# Log only the prefix for security
|
||||
log.warning(f"Unknown API key attempted: {api_key[:12]}...")
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Invalid API key.",
|
||||
headers={"WWW-Authenticate": "ApiKey"}
|
||||
)
|
||||
|
||||
# Check if key is active
|
||||
if not key_data.get('is_active', False):
|
||||
log.warning(f"Inactive API key used: {key_data['key_prefix']} (client: {key_data['client_id']})")
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="API key has been revoked."
|
||||
)
|
||||
|
||||
# Check expiration
|
||||
expires_at = key_data.get('expires_at')
|
||||
if expires_at and expires_at < datetime.utcnow():
|
||||
log.warning(f"Expired API key used: {key_data['key_prefix']} (client: {key_data['client_id']})")
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="API key has expired."
|
||||
)
|
||||
|
||||
# Update last_used_at timestamp (fire and forget, don't block request)
|
||||
try:
|
||||
await self.db.update_api_key_last_used(key_data['id'])
|
||||
except Exception as e:
|
||||
# Don't fail the request if timestamp update fails
|
||||
log.error(f"Failed to update last_used_at for key {key_data['key_prefix']}: {e}")
|
||||
|
||||
# Log successful authentication (at debug level to avoid log spam)
|
||||
log.debug(f"Authenticated: client={key_data['client_id']} key={key_data['key_prefix']}")
|
||||
|
||||
# Return client info
|
||||
return {
|
||||
"client_id": key_data['client_id'],
|
||||
"key_id": str(key_data['id']),
|
||||
"key_prefix": key_data['key_prefix'],
|
||||
"name": key_data['name'],
|
||||
"scopes": key_data.get('scopes', []),
|
||||
"rate_limit_rpm": key_data.get('rate_limit_rpm', 60)
|
||||
}
|
||||
|
||||
def require_scope(self, scope: str) -> Callable:
|
||||
"""
|
||||
Create a dependency that requires a specific scope.
|
||||
|
||||
Usage:
|
||||
@app.post("/jobs")
|
||||
async def create_job(client: dict = Depends(auth.require_scope("jobs:write"))):
|
||||
# Only accessible with jobs:write scope
|
||||
pass
|
||||
|
||||
Args:
|
||||
scope: Required scope string (e.g., "jobs:read", "jobs:write", "admin")
|
||||
|
||||
Returns:
|
||||
FastAPI dependency function that verifies the API key and checks scope
|
||||
"""
|
||||
async def scope_dependency(
|
||||
request: Request,
|
||||
api_key: Optional[str] = Depends(api_key_header)
|
||||
) -> dict:
|
||||
# First verify the API key
|
||||
client = await self.verify_api_key(request, api_key)
|
||||
|
||||
# Check if client has the required scope
|
||||
client_scopes = client.get('scopes', [])
|
||||
|
||||
# Admin scope grants all permissions
|
||||
if 'admin' in client_scopes:
|
||||
return client
|
||||
|
||||
if scope not in client_scopes:
|
||||
log.warning(
|
||||
f"Scope denied: client={client['client_id']} "
|
||||
f"required={scope} has={client_scopes}"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail=f"Insufficient permissions. Required scope: {scope}"
|
||||
)
|
||||
|
||||
return client
|
||||
|
||||
return scope_dependency
|
||||
|
||||
def require_any_scope(self, scopes: List[str]) -> Callable:
|
||||
"""
|
||||
Create a dependency that requires any one of the specified scopes.
|
||||
|
||||
Usage:
|
||||
@app.get("/jobs/{job_id}")
|
||||
async def get_job(client: dict = Depends(auth.require_any_scope(["jobs:read", "jobs:write"]))):
|
||||
pass
|
||||
|
||||
Args:
|
||||
scopes: List of acceptable scopes (client needs at least one)
|
||||
|
||||
Returns:
|
||||
FastAPI dependency function
|
||||
"""
|
||||
async def scope_dependency(
|
||||
request: Request,
|
||||
api_key: Optional[str] = Depends(api_key_header)
|
||||
) -> dict:
|
||||
client = await self.verify_api_key(request, api_key)
|
||||
client_scopes = client.get('scopes', [])
|
||||
|
||||
# Admin scope grants all permissions
|
||||
if 'admin' in client_scopes:
|
||||
return client
|
||||
|
||||
# Check if client has any of the required scopes
|
||||
if not any(s in client_scopes for s in scopes):
|
||||
log.warning(
|
||||
f"Scope denied: client={client['client_id']} "
|
||||
f"required_any={scopes} has={client_scopes}"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail=f"Insufficient permissions. Required one of: {', '.join(scopes)}"
|
||||
)
|
||||
|
||||
return client
|
||||
|
||||
return scope_dependency
|
||||
|
||||
@staticmethod
|
||||
def hash_api_key(api_key: str) -> str:
|
||||
"""
|
||||
Hash API key for storage/lookup using SHA-256.
|
||||
|
||||
This is a one-way hash - the original key cannot be recovered.
|
||||
We use SHA-256 for consistency and security.
|
||||
|
||||
Args:
|
||||
api_key: Plain text API key
|
||||
|
||||
Returns:
|
||||
64-character hexadecimal hash string
|
||||
"""
|
||||
return hashlib.sha256(api_key.encode('utf-8')).hexdigest()
|
||||
|
||||
@staticmethod
|
||||
def get_key_prefix(api_key: str) -> str:
|
||||
"""
|
||||
Extract the identifying prefix from an API key.
|
||||
|
||||
This prefix is safe to store and display as it cannot
|
||||
be used to reconstruct the full key.
|
||||
|
||||
Args:
|
||||
api_key: Plain text API key
|
||||
|
||||
Returns:
|
||||
First 8 characters of the key (e.g., "riq_a1b2")
|
||||
"""
|
||||
return api_key[:API_KEY_PREFIX_STORE_LENGTH]
|
||||
|
||||
|
||||
# Convenience function for creating auth instance
|
||||
def create_auth(db) -> APIKeyAuth:
|
||||
"""
|
||||
Factory function to create APIKeyAuth instance.
|
||||
|
||||
Args:
|
||||
db: DatabaseManager instance
|
||||
|
||||
Returns:
|
||||
Configured APIKeyAuth instance
|
||||
"""
|
||||
return APIKeyAuth(db)
|
||||
|
||||
|
||||
# Available scopes documentation
|
||||
AVAILABLE_SCOPES = {
|
||||
"jobs:read": "Read job status and results",
|
||||
"jobs:write": "Create and cancel jobs",
|
||||
"batches:read": "Read batch status and results",
|
||||
"batches:write": "Create and manage batches",
|
||||
"webhooks:manage": "Configure webhook endpoints",
|
||||
"admin": "Full administrative access (includes all other scopes)"
|
||||
}
|
||||
@@ -4,8 +4,14 @@ API Routes for ReviewIQ.
|
||||
This module exports all route modules for easy import into the main server.
|
||||
"""
|
||||
from api.routes.batches import router as batches_router, set_database as set_batches_db
|
||||
from api.routes.dashboard import router as dashboard_router, set_database as set_dashboard_db
|
||||
from api.routes.admin import router as admin_router, set_database as set_admin_db
|
||||
|
||||
__all__ = [
|
||||
'batches_router',
|
||||
'set_batches_db',
|
||||
'dashboard_router',
|
||||
'set_dashboard_db',
|
||||
'admin_router',
|
||||
'set_admin_db',
|
||||
]
|
||||
|
||||
756
api/routes/admin.py
Normal file
756
api/routes/admin.py
Normal file
@@ -0,0 +1,756 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Admin API routes for scraper management.
|
||||
|
||||
Phase 6 - ReviewIQ Platform
|
||||
|
||||
Provides endpoints for:
|
||||
- Listing registered scrapers with stats
|
||||
- Registering new scraper versions
|
||||
- Updating traffic allocation for A/B testing
|
||||
- Deprecating scrapers (soft delete)
|
||||
- Promoting scrapers to stable/default
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List, Dict, Any
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, Depends
|
||||
from pydantic import BaseModel, Field, validator
|
||||
|
||||
from core.database import DatabaseManager
|
||||
from scrapers.registry import ScraperRegistry
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Create router
|
||||
router = APIRouter(prefix="/api/admin", tags=["admin"])
|
||||
|
||||
|
||||
# ==================== Pydantic Models ====================
|
||||
|
||||
class ScraperStatsModel(BaseModel):
|
||||
"""Statistics for a scraper over the last 24 hours."""
|
||||
total_jobs: int = Field(default=0, description="Total jobs processed")
|
||||
success_rate: float = Field(default=0.0, description="Success rate percentage")
|
||||
avg_duration: float = Field(default=0.0, description="Average scrape duration in seconds")
|
||||
|
||||
|
||||
class ScraperInfoResponse(BaseModel):
|
||||
"""Response model for scraper information."""
|
||||
id: str = Field(..., description="Unique scraper registry ID")
|
||||
job_type: str = Field(..., description="Type of job this scraper handles")
|
||||
version: str = Field(..., description="Semantic version string")
|
||||
variant: str = Field(..., description="Release variant (stable, beta, canary)")
|
||||
is_default: bool = Field(..., description="Whether this is the default scraper")
|
||||
traffic_pct: int = Field(..., description="Traffic percentage for A/B testing (0-100)")
|
||||
module_path: str = Field(..., description="Python module path")
|
||||
function_name: Optional[str] = Field(None, description="Entry function name")
|
||||
deprecated_at: Optional[str] = Field(None, description="Deprecation timestamp (ISO format)")
|
||||
stats: ScraperStatsModel = Field(default_factory=ScraperStatsModel, description="Last 24h stats")
|
||||
|
||||
|
||||
class RegisterScraperRequest(BaseModel):
|
||||
"""Request model for registering a new scraper."""
|
||||
job_type: str = Field(..., description="Type of job (e.g., 'google_reviews')")
|
||||
version: str = Field(..., description="Semantic version string (e.g., '1.1.0')")
|
||||
variant: str = Field(..., description="Release variant: stable, beta, or canary")
|
||||
module_path: str = Field(..., description="Python module path")
|
||||
function_name: str = Field(default="scrape", description="Entry function name")
|
||||
traffic_pct: int = Field(default=0, description="Initial traffic percentage (0-100)", ge=0, le=100)
|
||||
min_priority: int = Field(default=0, description="Minimum job priority required")
|
||||
config: Optional[Dict[str, Any]] = Field(default=None, description="Optional configuration")
|
||||
|
||||
@validator('variant')
|
||||
def validate_variant(cls, v):
|
||||
if v not in ('stable', 'beta', 'canary'):
|
||||
raise ValueError("variant must be 'stable', 'beta', or 'canary'")
|
||||
return v
|
||||
|
||||
@validator('version')
|
||||
def validate_version(cls, v):
|
||||
# Basic semver validation
|
||||
parts = v.split('.')
|
||||
if len(parts) < 2:
|
||||
raise ValueError("version must be semantic version format (e.g., '1.0.0')")
|
||||
return v
|
||||
|
||||
|
||||
class RegisterScraperResponse(BaseModel):
|
||||
"""Response model for scraper registration."""
|
||||
id: str = Field(..., description="Created scraper registry ID")
|
||||
job_type: str = Field(..., description="Job type")
|
||||
version: str = Field(..., description="Version string")
|
||||
variant: str = Field(..., description="Release variant")
|
||||
message: str = Field(..., description="Status message")
|
||||
|
||||
|
||||
class UpdateTrafficRequest(BaseModel):
|
||||
"""Request model for updating traffic percentage."""
|
||||
traffic_pct: int = Field(..., description="New traffic percentage (0-100)", ge=0, le=100)
|
||||
|
||||
|
||||
class UpdateTrafficResponse(BaseModel):
|
||||
"""Response model for traffic update."""
|
||||
id: str = Field(..., description="Scraper registry ID")
|
||||
traffic_pct: int = Field(..., description="Updated traffic percentage")
|
||||
message: str = Field(..., description="Status message")
|
||||
|
||||
|
||||
class DeprecateResponse(BaseModel):
|
||||
"""Response model for deprecation."""
|
||||
id: str = Field(..., description="Scraper registry ID")
|
||||
deprecated_at: str = Field(..., description="Deprecation timestamp")
|
||||
message: str = Field(..., description="Status message")
|
||||
|
||||
|
||||
class PromoteResponse(BaseModel):
|
||||
"""Response model for promotion."""
|
||||
id: str = Field(..., description="Scraper registry ID")
|
||||
variant: str = Field(..., description="New variant (stable)")
|
||||
is_default: bool = Field(..., description="Whether now default")
|
||||
traffic_pct: int = Field(..., description="New traffic percentage")
|
||||
message: str = Field(..., description="Status message")
|
||||
|
||||
|
||||
# ==================== Database Helper Functions ====================
|
||||
|
||||
async def get_scraper_stats(
|
||||
db: DatabaseManager,
|
||||
scraper_id: str,
|
||||
hours: int = 24
|
||||
) -> ScraperStatsModel:
|
||||
"""
|
||||
Get statistics for a specific scraper over the given time period.
|
||||
|
||||
Args:
|
||||
db: Database manager instance
|
||||
scraper_id: UUID of the scraper registry entry
|
||||
hours: Number of hours to look back (default: 24)
|
||||
|
||||
Returns:
|
||||
ScraperStatsModel with job counts, success rate, and avg duration
|
||||
"""
|
||||
try:
|
||||
async with db.pool.acquire() as conn:
|
||||
# Query jobs that used this scraper version in the time period
|
||||
stats = await conn.fetchrow("""
|
||||
SELECT
|
||||
COUNT(*) as total_jobs,
|
||||
COUNT(*) FILTER (WHERE status = 'completed') as completed_jobs,
|
||||
COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed_jobs,
|
||||
AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration
|
||||
FROM jobs
|
||||
WHERE created_at >= NOW() - INTERVAL '%s hours'
|
||||
AND (
|
||||
metadata->>'scraper_id' = $1
|
||||
OR (scraper_version IS NOT NULL AND EXISTS (
|
||||
SELECT 1 FROM scraper_registry sr
|
||||
WHERE sr.id = $2::uuid
|
||||
AND sr.version = jobs.scraper_version
|
||||
AND sr.variant = COALESCE(jobs.scraper_variant, sr.variant)
|
||||
))
|
||||
)
|
||||
""", hours, scraper_id, scraper_id)
|
||||
|
||||
if not stats or stats['total_jobs'] == 0:
|
||||
return ScraperStatsModel()
|
||||
|
||||
total = stats['total_jobs']
|
||||
completed = stats['completed_jobs'] or 0
|
||||
success_rate = (completed / total * 100) if total > 0 else 0.0
|
||||
avg_duration = float(stats['avg_duration']) if stats['avg_duration'] else 0.0
|
||||
|
||||
return ScraperStatsModel(
|
||||
total_jobs=total,
|
||||
success_rate=round(success_rate, 2),
|
||||
avg_duration=round(avg_duration, 2)
|
||||
)
|
||||
except Exception as e:
|
||||
log.warning(f"Error getting scraper stats for {scraper_id}: {e}")
|
||||
return ScraperStatsModel()
|
||||
|
||||
|
||||
async def get_scraper_by_id_from_db(
|
||||
db: DatabaseManager,
|
||||
scraper_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get scraper by ID directly from database.
|
||||
|
||||
Args:
|
||||
db: Database manager instance
|
||||
scraper_id: UUID of the scraper registry entry
|
||||
|
||||
Returns:
|
||||
Scraper dictionary or None if not found
|
||||
"""
|
||||
async with db.pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT
|
||||
id,
|
||||
job_type,
|
||||
version,
|
||||
variant,
|
||||
module_path,
|
||||
function_name,
|
||||
is_default,
|
||||
traffic_pct,
|
||||
min_priority,
|
||||
config,
|
||||
deprecated_at
|
||||
FROM scraper_registry
|
||||
WHERE id = $1
|
||||
""", UUID(scraper_id))
|
||||
|
||||
if not row:
|
||||
return None
|
||||
|
||||
return dict(row)
|
||||
|
||||
|
||||
async def update_scraper_traffic(
|
||||
db: DatabaseManager,
|
||||
scraper_id: str,
|
||||
traffic_pct: int
|
||||
) -> bool:
|
||||
"""
|
||||
Update traffic percentage for a scraper.
|
||||
|
||||
Args:
|
||||
db: Database manager instance
|
||||
scraper_id: UUID of the scraper registry entry
|
||||
traffic_pct: New traffic percentage (0-100)
|
||||
|
||||
Returns:
|
||||
True if updated, False if not found
|
||||
"""
|
||||
async with db.pool.acquire() as conn:
|
||||
result = await conn.execute("""
|
||||
UPDATE scraper_registry
|
||||
SET traffic_pct = $2
|
||||
WHERE id = $1 AND deprecated_at IS NULL
|
||||
""", UUID(scraper_id), traffic_pct)
|
||||
|
||||
return result.split()[-1] == "1"
|
||||
|
||||
|
||||
async def deprecate_scraper_by_id(
|
||||
db: DatabaseManager,
|
||||
scraper_id: str
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Deprecate a scraper by ID (soft delete).
|
||||
|
||||
Args:
|
||||
db: Database manager instance
|
||||
scraper_id: UUID of the scraper registry entry
|
||||
|
||||
Returns:
|
||||
Deprecation timestamp as ISO string, or None if not found/already deprecated
|
||||
"""
|
||||
async with db.pool.acquire() as conn:
|
||||
result = await conn.fetchval("""
|
||||
UPDATE scraper_registry
|
||||
SET deprecated_at = NOW(), traffic_pct = 0
|
||||
WHERE id = $1 AND deprecated_at IS NULL
|
||||
RETURNING deprecated_at
|
||||
""", UUID(scraper_id))
|
||||
|
||||
if result:
|
||||
return result.isoformat()
|
||||
return None
|
||||
|
||||
|
||||
async def promote_scraper_by_id(
|
||||
db: DatabaseManager,
|
||||
scraper_id: str,
|
||||
default_traffic_pct: int = 80
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Promote a scraper to stable variant, set as default, and give it majority traffic.
|
||||
|
||||
This will:
|
||||
1. Set the scraper's variant to 'stable'
|
||||
2. Set is_default to True
|
||||
3. Set traffic_pct to default_traffic_pct (default: 80%)
|
||||
4. Unset is_default on other scrapers of the same job_type
|
||||
5. Reduce traffic_pct on other scrapers proportionally
|
||||
|
||||
Args:
|
||||
db: Database manager instance
|
||||
scraper_id: UUID of the scraper to promote
|
||||
default_traffic_pct: Traffic percentage to assign (default: 80)
|
||||
|
||||
Returns:
|
||||
Updated scraper dict or None if not found
|
||||
"""
|
||||
async with db.pool.acquire() as conn:
|
||||
async with conn.transaction():
|
||||
# Get the scraper to promote
|
||||
scraper = await conn.fetchrow("""
|
||||
SELECT id, job_type, version, variant
|
||||
FROM scraper_registry
|
||||
WHERE id = $1 AND deprecated_at IS NULL
|
||||
""", UUID(scraper_id))
|
||||
|
||||
if not scraper:
|
||||
return None
|
||||
|
||||
job_type = scraper['job_type']
|
||||
|
||||
# Unset is_default on other scrapers of same job_type
|
||||
await conn.execute("""
|
||||
UPDATE scraper_registry
|
||||
SET is_default = FALSE
|
||||
WHERE job_type = $1 AND id != $2
|
||||
""", job_type, UUID(scraper_id))
|
||||
|
||||
# Reduce traffic on other active scrapers proportionally
|
||||
# Calculate remaining traffic to distribute
|
||||
remaining_traffic = 100 - default_traffic_pct
|
||||
|
||||
# Get other active scrapers
|
||||
other_scrapers = await conn.fetch("""
|
||||
SELECT id, traffic_pct
|
||||
FROM scraper_registry
|
||||
WHERE job_type = $1 AND id != $2 AND deprecated_at IS NULL AND traffic_pct > 0
|
||||
""", job_type, UUID(scraper_id))
|
||||
|
||||
if other_scrapers:
|
||||
total_other_traffic = sum(s['traffic_pct'] for s in other_scrapers)
|
||||
if total_other_traffic > 0:
|
||||
for s in other_scrapers:
|
||||
new_pct = int((s['traffic_pct'] / total_other_traffic) * remaining_traffic)
|
||||
await conn.execute("""
|
||||
UPDATE scraper_registry
|
||||
SET traffic_pct = $2
|
||||
WHERE id = $1
|
||||
""", s['id'], new_pct)
|
||||
|
||||
# Promote the target scraper
|
||||
updated = await conn.fetchrow("""
|
||||
UPDATE scraper_registry
|
||||
SET
|
||||
variant = 'stable',
|
||||
is_default = TRUE,
|
||||
traffic_pct = $2
|
||||
WHERE id = $1
|
||||
RETURNING id, job_type, version, variant, is_default, traffic_pct
|
||||
""", UUID(scraper_id), default_traffic_pct)
|
||||
|
||||
if updated:
|
||||
return dict(updated)
|
||||
return None
|
||||
|
||||
|
||||
# ==================== Dependency Injection ====================
|
||||
|
||||
_db: Optional[DatabaseManager] = None
|
||||
_registry: Optional[ScraperRegistry] = None
|
||||
|
||||
|
||||
def set_database(db: DatabaseManager):
|
||||
"""Set the database instance for the router."""
|
||||
global _db, _registry
|
||||
_db = db
|
||||
_registry = ScraperRegistry(db)
|
||||
|
||||
|
||||
def get_db() -> DatabaseManager:
|
||||
"""Dependency to get database instance."""
|
||||
if _db is None:
|
||||
raise HTTPException(status_code=500, detail="Database not initialized")
|
||||
return _db
|
||||
|
||||
|
||||
def get_registry() -> ScraperRegistry:
|
||||
"""Dependency to get scraper registry instance."""
|
||||
if _registry is None:
|
||||
raise HTTPException(status_code=500, detail="Scraper registry not initialized")
|
||||
return _registry
|
||||
|
||||
|
||||
# ==================== API Endpoints ====================
|
||||
|
||||
@router.get(
|
||||
"/scrapers",
|
||||
response_model=List[ScraperInfoResponse],
|
||||
summary="List All Scrapers",
|
||||
description="Get a list of all registered scrapers with their stats"
|
||||
)
|
||||
async def list_scrapers(
|
||||
job_type: Optional[str] = Query(None, description="Filter by job type"),
|
||||
include_deprecated: bool = Query(False, description="Include deprecated scrapers"),
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
List all registered scrapers with their configuration and stats.
|
||||
|
||||
Returns scraper information including:
|
||||
- Version and variant information
|
||||
- Traffic allocation percentage
|
||||
- Whether it's the default scraper
|
||||
- Last 24h performance stats (total jobs, success rate, avg duration)
|
||||
|
||||
Use `job_type` filter to get scrapers for a specific job type.
|
||||
Set `include_deprecated=true` to include deprecated scrapers.
|
||||
"""
|
||||
try:
|
||||
# Refresh cache to get latest data
|
||||
await registry.refresh_cache()
|
||||
|
||||
# Get all scrapers
|
||||
scrapers = await registry.list_scrapers(
|
||||
job_type=job_type,
|
||||
include_deprecated=include_deprecated
|
||||
)
|
||||
|
||||
# Enrich with stats
|
||||
result = []
|
||||
for scraper in scrapers:
|
||||
stats = await get_scraper_stats(db, scraper['id'])
|
||||
|
||||
# Get full scraper info from DB to include job_type
|
||||
full_info = await get_scraper_by_id_from_db(db, scraper['id'])
|
||||
|
||||
result.append(ScraperInfoResponse(
|
||||
id=scraper['id'],
|
||||
job_type=full_info['job_type'] if full_info else 'unknown',
|
||||
version=scraper['version'],
|
||||
variant=scraper['variant'],
|
||||
is_default=scraper['is_default'],
|
||||
traffic_pct=scraper['traffic_pct'],
|
||||
module_path=scraper['module_path'],
|
||||
function_name=scraper.get('function_name'),
|
||||
deprecated_at=str(full_info['deprecated_at']) if full_info and full_info.get('deprecated_at') else None,
|
||||
stats=stats
|
||||
))
|
||||
|
||||
# Sort by job_type, then by version descending
|
||||
result.sort(key=lambda x: (x.job_type, x.version), reverse=True)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error listing scrapers: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to list scrapers: {str(e)}")
|
||||
|
||||
|
||||
@router.post(
|
||||
"/scrapers",
|
||||
response_model=RegisterScraperResponse,
|
||||
summary="Register New Scraper",
|
||||
description="Register a new scraper version"
|
||||
)
|
||||
async def register_scraper(
|
||||
request: RegisterScraperRequest,
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
Register a new scraper version in the registry.
|
||||
|
||||
This allows adding new scraper implementations that can be used for:
|
||||
- A/B testing (set traffic_pct to allocate traffic)
|
||||
- Canary releases (set variant to 'canary' with low traffic_pct)
|
||||
- Beta testing (set variant to 'beta')
|
||||
|
||||
The scraper won't receive any traffic until traffic_pct > 0.
|
||||
|
||||
**Parameters:**
|
||||
- `job_type`: Type of scraping job (e.g., 'google_reviews')
|
||||
- `version`: Semantic version (e.g., '1.1.0')
|
||||
- `variant`: Release channel ('stable', 'beta', 'canary')
|
||||
- `module_path`: Python module path (e.g., 'scrapers.google_reviews.v1_1_0')
|
||||
- `function_name`: Entry function name (default: 'scrape')
|
||||
- `traffic_pct`: Initial traffic allocation (0-100, default: 0)
|
||||
- `config`: Optional configuration dict passed to the scraper
|
||||
"""
|
||||
try:
|
||||
# Check if version already exists for this job_type
|
||||
existing = await registry.list_scrapers(job_type=request.job_type, include_deprecated=True)
|
||||
for scraper in existing:
|
||||
if scraper['version'] == request.version:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"Scraper version {request.version} already exists for job_type {request.job_type}"
|
||||
)
|
||||
|
||||
# Register the new scraper
|
||||
scraper_id = await registry.register_scraper(
|
||||
job_type=request.job_type,
|
||||
version=request.version,
|
||||
variant=request.variant,
|
||||
module_path=request.module_path,
|
||||
function_name=request.function_name,
|
||||
is_default=False, # Never auto-set as default
|
||||
traffic_pct=request.traffic_pct,
|
||||
min_priority=request.min_priority,
|
||||
config=request.config
|
||||
)
|
||||
|
||||
log.info(f"Registered new scraper: {request.job_type} v{request.version} ({request.variant})")
|
||||
|
||||
return RegisterScraperResponse(
|
||||
id=scraper_id,
|
||||
job_type=request.job_type,
|
||||
version=request.version,
|
||||
variant=request.variant,
|
||||
message=f"Successfully registered scraper {request.job_type} v{request.version} ({request.variant})"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
log.error(f"Error registering scraper: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to register scraper: {str(e)}")
|
||||
|
||||
|
||||
@router.put(
|
||||
"/scrapers/{scraper_id}/traffic",
|
||||
response_model=UpdateTrafficResponse,
|
||||
summary="Update Traffic Percentage",
|
||||
description="Update the traffic allocation for a scraper"
|
||||
)
|
||||
async def update_traffic(
|
||||
scraper_id: str,
|
||||
request: UpdateTrafficRequest,
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
Update the traffic percentage for a specific scraper.
|
||||
|
||||
Traffic percentage determines what portion of requests are routed
|
||||
to this scraper version. Used for:
|
||||
- Gradual rollouts (start at 10%, increase to 50%, then 100%)
|
||||
- A/B testing (set two versions to 50% each)
|
||||
- Canary releases (set new version to 5-10%)
|
||||
|
||||
**Note:** Total traffic across all active scrapers of the same
|
||||
job_type should not exceed 100%. The system uses weighted random
|
||||
selection, so percentages are relative weights, not exact guarantees.
|
||||
"""
|
||||
try:
|
||||
# Validate UUID format
|
||||
try:
|
||||
UUID(scraper_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
|
||||
|
||||
# Check scraper exists
|
||||
scraper = await get_scraper_by_id_from_db(db, scraper_id)
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=404, detail="Scraper not found")
|
||||
|
||||
if scraper.get('deprecated_at'):
|
||||
raise HTTPException(status_code=400, detail="Cannot update traffic for deprecated scraper")
|
||||
|
||||
# Update traffic
|
||||
success = await update_scraper_traffic(db, scraper_id, request.traffic_pct)
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to update traffic allocation")
|
||||
|
||||
# Invalidate registry cache
|
||||
await registry.refresh_cache()
|
||||
|
||||
log.info(f"Updated traffic for scraper {scraper_id} to {request.traffic_pct}%")
|
||||
|
||||
return UpdateTrafficResponse(
|
||||
id=scraper_id,
|
||||
traffic_pct=request.traffic_pct,
|
||||
message=f"Traffic updated to {request.traffic_pct}%"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error(f"Error updating traffic for scraper {scraper_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to update traffic: {str(e)}")
|
||||
|
||||
|
||||
@router.post(
|
||||
"/scrapers/{scraper_id}/deprecate",
|
||||
response_model=DeprecateResponse,
|
||||
summary="Deprecate Scraper",
|
||||
description="Mark a scraper as deprecated (soft delete)"
|
||||
)
|
||||
async def deprecate_scraper(
|
||||
scraper_id: str,
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
Deprecate a scraper version (soft delete).
|
||||
|
||||
This will:
|
||||
- Set deprecated_at timestamp
|
||||
- Set traffic_pct to 0 (no new requests)
|
||||
- Keep the scraper in the registry for historical reference
|
||||
|
||||
Deprecated scrapers are excluded from normal routing but can
|
||||
still be explicitly requested by version for debugging.
|
||||
|
||||
To permanently remove a scraper, use database admin tools.
|
||||
"""
|
||||
try:
|
||||
# Validate UUID format
|
||||
try:
|
||||
UUID(scraper_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
|
||||
|
||||
# Check scraper exists
|
||||
scraper = await get_scraper_by_id_from_db(db, scraper_id)
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=404, detail="Scraper not found")
|
||||
|
||||
if scraper.get('deprecated_at'):
|
||||
raise HTTPException(status_code=400, detail="Scraper is already deprecated")
|
||||
|
||||
# Deprecate
|
||||
deprecated_at = await deprecate_scraper_by_id(db, scraper_id)
|
||||
if not deprecated_at:
|
||||
raise HTTPException(status_code=500, detail="Failed to deprecate scraper")
|
||||
|
||||
# Invalidate registry cache
|
||||
await registry.refresh_cache()
|
||||
|
||||
log.info(f"Deprecated scraper {scraper_id}")
|
||||
|
||||
return DeprecateResponse(
|
||||
id=scraper_id,
|
||||
deprecated_at=deprecated_at,
|
||||
message=f"Scraper deprecated. Traffic allocation set to 0%."
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error(f"Error deprecating scraper {scraper_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to deprecate scraper: {str(e)}")
|
||||
|
||||
|
||||
@router.post(
|
||||
"/scrapers/{scraper_id}/promote",
|
||||
response_model=PromoteResponse,
|
||||
summary="Promote Scraper",
|
||||
description="Promote scraper to stable variant and set as default"
|
||||
)
|
||||
async def promote_scraper(
|
||||
scraper_id: str,
|
||||
traffic_pct: int = Query(80, description="Traffic percentage to assign (0-100)", ge=0, le=100),
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
Promote a scraper to stable variant, set as default, and give it majority traffic.
|
||||
|
||||
This operation will:
|
||||
1. Set the scraper's variant to 'stable'
|
||||
2. Set is_default to True
|
||||
3. Set traffic_pct to the specified value (default: 80%)
|
||||
4. Unset is_default on other scrapers of the same job_type
|
||||
5. Redistribute remaining traffic among other active scrapers
|
||||
|
||||
**Use cases:**
|
||||
- Graduating a beta version to production
|
||||
- Making a canary release the new stable version
|
||||
- Switching to a new scraper implementation
|
||||
|
||||
**Parameters:**
|
||||
- `traffic_pct`: Traffic percentage to assign (default: 80%)
|
||||
"""
|
||||
try:
|
||||
# Validate UUID format
|
||||
try:
|
||||
UUID(scraper_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
|
||||
|
||||
# Check scraper exists
|
||||
scraper = await get_scraper_by_id_from_db(db, scraper_id)
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=404, detail="Scraper not found")
|
||||
|
||||
if scraper.get('deprecated_at'):
|
||||
raise HTTPException(status_code=400, detail="Cannot promote a deprecated scraper")
|
||||
|
||||
# Promote
|
||||
result = await promote_scraper_by_id(db, scraper_id, traffic_pct)
|
||||
if not result:
|
||||
raise HTTPException(status_code=500, detail="Failed to promote scraper")
|
||||
|
||||
# Invalidate registry cache
|
||||
await registry.refresh_cache()
|
||||
|
||||
log.info(f"Promoted scraper {scraper_id} to stable with {traffic_pct}% traffic")
|
||||
|
||||
return PromoteResponse(
|
||||
id=scraper_id,
|
||||
variant='stable',
|
||||
is_default=True,
|
||||
traffic_pct=traffic_pct,
|
||||
message=f"Scraper promoted to stable. Now default with {traffic_pct}% traffic."
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error(f"Error promoting scraper {scraper_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to promote scraper: {str(e)}")
|
||||
|
||||
|
||||
@router.get(
|
||||
"/scrapers/{scraper_id}",
|
||||
response_model=ScraperInfoResponse,
|
||||
summary="Get Scraper Details",
|
||||
description="Get detailed information about a specific scraper"
|
||||
)
|
||||
async def get_scraper_details(
|
||||
scraper_id: str,
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
Get detailed information about a specific scraper including stats.
|
||||
"""
|
||||
try:
|
||||
# Validate UUID format
|
||||
try:
|
||||
UUID(scraper_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
|
||||
|
||||
# Get scraper from DB
|
||||
scraper = await get_scraper_by_id_from_db(db, scraper_id)
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=404, detail="Scraper not found")
|
||||
|
||||
# Get stats
|
||||
stats = await get_scraper_stats(db, scraper_id)
|
||||
|
||||
return ScraperInfoResponse(
|
||||
id=str(scraper['id']),
|
||||
job_type=scraper['job_type'],
|
||||
version=scraper['version'],
|
||||
variant=scraper['variant'],
|
||||
is_default=scraper['is_default'],
|
||||
traffic_pct=scraper['traffic_pct'],
|
||||
module_path=scraper['module_path'],
|
||||
function_name=scraper.get('function_name'),
|
||||
deprecated_at=str(scraper['deprecated_at']) if scraper.get('deprecated_at') else None,
|
||||
stats=stats
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error(f"Error getting scraper {scraper_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get scraper: {str(e)}")
|
||||
623
api/routes/dashboard.py
Normal file
623
api/routes/dashboard.py
Normal file
@@ -0,0 +1,623 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Dashboard API for ReviewIQ Phase 5.
|
||||
|
||||
Provides system-wide analytics and monitoring endpoints:
|
||||
- Overview statistics (jobs by status, success rates, durations)
|
||||
- Client-level aggregations
|
||||
- Problem detection (failures, slow jobs, callback issues)
|
||||
- Scraper version performance analysis
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List, Dict, Any
|
||||
from enum import Enum
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, Depends
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from core.database import DatabaseManager
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Create router
|
||||
router = APIRouter(prefix="/api/dashboard", tags=["dashboard"])
|
||||
|
||||
|
||||
# ==================== Enums ====================
|
||||
|
||||
class TimePeriod(str, Enum):
|
||||
"""Time period for filtering dashboard data"""
|
||||
HOUR_1 = "1h"
|
||||
HOUR_6 = "6h"
|
||||
HOUR_24 = "24h"
|
||||
DAY_7 = "7d"
|
||||
DAY_30 = "30d"
|
||||
|
||||
|
||||
# ==================== Pydantic Response Models ====================
|
||||
|
||||
class JobsByStatus(BaseModel):
|
||||
"""Job counts grouped by status"""
|
||||
pending: int = 0
|
||||
running: int = 0
|
||||
completed: int = 0
|
||||
failed: int = 0
|
||||
cancelled: int = 0
|
||||
partial: int = 0
|
||||
|
||||
|
||||
class OverviewResponse(BaseModel):
|
||||
"""System-wide dashboard overview statistics"""
|
||||
period: str = Field(..., description="Time period for the statistics (e.g., '24h')")
|
||||
total_jobs: int = Field(..., description="Total number of jobs in the period")
|
||||
completed_jobs: int = Field(..., description="Number of successfully completed jobs")
|
||||
failed_jobs: int = Field(..., description="Number of failed jobs")
|
||||
running_jobs: int = Field(..., description="Number of currently running jobs")
|
||||
success_rate: float = Field(..., description="Percentage of successful jobs (0-100)")
|
||||
avg_duration_seconds: Optional[float] = Field(None, description="Average job duration in seconds")
|
||||
jobs_by_status: JobsByStatus = Field(..., description="Job counts grouped by status")
|
||||
total_reviews_scraped: int = Field(0, description="Total reviews scraped in the period")
|
||||
|
||||
|
||||
class ClientStats(BaseModel):
|
||||
"""Job statistics for a single client"""
|
||||
client_id: str = Field(..., description="Client identifier")
|
||||
source: Optional[str] = Field(None, description="Source of the requests (e.g., 'veritasreview.com')")
|
||||
total_jobs: int = Field(..., description="Total jobs submitted by this client")
|
||||
completed: int = Field(..., description="Number of completed jobs")
|
||||
failed: int = Field(..., description="Number of failed jobs")
|
||||
success_rate: float = Field(..., description="Success rate percentage (0-100)")
|
||||
total_reviews: int = Field(0, description="Total reviews scraped for this client")
|
||||
|
||||
|
||||
class FailedJob(BaseModel):
|
||||
"""Details of a failed job"""
|
||||
job_id: str = Field(..., description="Job UUID")
|
||||
url: str = Field(..., description="URL that was being scraped")
|
||||
error_type: Optional[str] = Field(None, description="Categorized error type")
|
||||
error_message: Optional[str] = Field(None, description="Error message")
|
||||
failed_at: str = Field(..., description="ISO timestamp when the job failed")
|
||||
client_id: Optional[str] = Field(None, description="Client who submitted the job")
|
||||
|
||||
|
||||
class SlowJob(BaseModel):
|
||||
"""Details of a slow job (taking > 2x average duration)"""
|
||||
job_id: str = Field(..., description="Job UUID")
|
||||
url: str = Field(..., description="URL that was being scraped")
|
||||
duration_seconds: float = Field(..., description="Actual job duration in seconds")
|
||||
avg_duration_seconds: float = Field(..., description="Average duration for comparison")
|
||||
ratio: float = Field(..., description="How many times slower than average")
|
||||
completed_at: str = Field(..., description="ISO timestamp when the job completed")
|
||||
|
||||
|
||||
class CallbackFailure(BaseModel):
|
||||
"""Details of a failed webhook callback"""
|
||||
job_id: str = Field(..., description="Job UUID")
|
||||
callback_url: str = Field(..., description="Webhook URL that failed")
|
||||
status: str = Field(..., description="Callback status")
|
||||
attempts: int = Field(..., description="Number of delivery attempts")
|
||||
last_error: Optional[str] = Field(None, description="Last error message")
|
||||
|
||||
|
||||
class ProblemsResponse(BaseModel):
|
||||
"""Recent failures and issues"""
|
||||
failed_jobs: List[FailedJob] = Field(default_factory=list, description="Recent job failures")
|
||||
slow_jobs: List[SlowJob] = Field(default_factory=list, description="Jobs taking > 2x average duration")
|
||||
callback_failures: List[CallbackFailure] = Field(default_factory=list, description="Failed webhook deliveries")
|
||||
total_problems: int = Field(..., description="Total number of problems detected")
|
||||
|
||||
|
||||
class VersionStats(BaseModel):
|
||||
"""Performance statistics for a scraper version"""
|
||||
version: str = Field(..., description="Scraper version string (e.g., '1.0.0')")
|
||||
variant: Optional[str] = Field(None, description="Scraper variant (e.g., 'stable', 'stealth')")
|
||||
total_jobs: int = Field(..., description="Total jobs run with this version")
|
||||
success_rate: float = Field(..., description="Success rate percentage (0-100)")
|
||||
avg_duration: Optional[float] = Field(None, description="Average job duration in seconds")
|
||||
total_reviews: int = Field(0, description="Total reviews scraped with this version")
|
||||
|
||||
|
||||
# ==================== Helper Functions ====================
|
||||
|
||||
def get_period_delta(period: TimePeriod) -> timedelta:
|
||||
"""Convert period enum to timedelta"""
|
||||
mapping = {
|
||||
TimePeriod.HOUR_1: timedelta(hours=1),
|
||||
TimePeriod.HOUR_6: timedelta(hours=6),
|
||||
TimePeriod.HOUR_24: timedelta(hours=24),
|
||||
TimePeriod.DAY_7: timedelta(days=7),
|
||||
TimePeriod.DAY_30: timedelta(days=30),
|
||||
}
|
||||
return mapping.get(period, timedelta(hours=24))
|
||||
|
||||
|
||||
def categorize_error(error_message: Optional[str]) -> str:
|
||||
"""Categorize error message into a type"""
|
||||
if not error_message:
|
||||
return "unknown"
|
||||
|
||||
error_lower = error_message.lower()
|
||||
|
||||
if "rate" in error_lower and "limit" in error_lower:
|
||||
return "rate_limited"
|
||||
elif "timeout" in error_lower:
|
||||
return "timeout"
|
||||
elif "captcha" in error_lower or "recaptcha" in error_lower:
|
||||
return "captcha_blocked"
|
||||
elif "bot" in error_lower or "detected" in error_lower:
|
||||
return "bot_detected"
|
||||
elif "network" in error_lower or "connection" in error_lower:
|
||||
return "network_error"
|
||||
elif "element" in error_lower or "selector" in error_lower or "not found" in error_lower:
|
||||
return "selector_failed"
|
||||
elif "navigation" in error_lower or "page" in error_lower:
|
||||
return "navigation_error"
|
||||
elif "browser" in error_lower or "playwright" in error_lower:
|
||||
return "browser_error"
|
||||
else:
|
||||
return "other"
|
||||
|
||||
|
||||
# ==================== Database Query Functions ====================
|
||||
|
||||
async def get_overview_stats(
|
||||
db: DatabaseManager,
|
||||
period: TimePeriod
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get system-wide job statistics for the specified period.
|
||||
"""
|
||||
delta = get_period_delta(period)
|
||||
cutoff = datetime.now() - delta
|
||||
|
||||
async with db.pool.acquire() as conn:
|
||||
# Get job counts by status
|
||||
stats = await conn.fetchrow("""
|
||||
SELECT
|
||||
COUNT(*) as total_jobs,
|
||||
COUNT(*) FILTER (WHERE status = 'pending') as pending,
|
||||
COUNT(*) FILTER (WHERE status = 'running') as running,
|
||||
COUNT(*) FILTER (WHERE status = 'completed') as completed,
|
||||
COUNT(*) FILTER (WHERE status = 'failed') as failed,
|
||||
COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled,
|
||||
COUNT(*) FILTER (WHERE status = 'partial') as partial,
|
||||
AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration,
|
||||
COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
|
||||
FROM jobs
|
||||
WHERE created_at >= $1
|
||||
""", cutoff)
|
||||
|
||||
total = stats['total_jobs'] or 0
|
||||
completed = stats['completed'] or 0
|
||||
failed = stats['failed'] or 0
|
||||
|
||||
# Calculate success rate (only for finished jobs)
|
||||
finished = completed + failed + (stats['partial'] or 0)
|
||||
success_rate = (completed / finished * 100) if finished > 0 else 0.0
|
||||
|
||||
return {
|
||||
'period': period.value,
|
||||
'total_jobs': total,
|
||||
'completed_jobs': completed,
|
||||
'failed_jobs': failed,
|
||||
'running_jobs': stats['running'] or 0,
|
||||
'success_rate': round(success_rate, 1),
|
||||
'avg_duration_seconds': round(stats['avg_duration'], 1) if stats['avg_duration'] else None,
|
||||
'total_reviews_scraped': stats['total_reviews'] or 0,
|
||||
'jobs_by_status': {
|
||||
'pending': stats['pending'] or 0,
|
||||
'running': stats['running'] or 0,
|
||||
'completed': completed,
|
||||
'failed': failed,
|
||||
'cancelled': stats['cancelled'] or 0,
|
||||
'partial': stats['partial'] or 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async def get_stats_by_client(
|
||||
db: DatabaseManager,
|
||||
period: TimePeriod,
|
||||
limit: int = 50
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get job statistics grouped by client.
|
||||
"""
|
||||
delta = get_period_delta(period)
|
||||
cutoff = datetime.now() - delta
|
||||
|
||||
async with db.pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT
|
||||
COALESCE(requester_client_id, 'unknown') as client_id,
|
||||
requester_source as source,
|
||||
COUNT(*) as total_jobs,
|
||||
COUNT(*) FILTER (WHERE status = 'completed') as completed,
|
||||
COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed,
|
||||
COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
|
||||
FROM jobs
|
||||
WHERE created_at >= $1
|
||||
GROUP BY requester_client_id, requester_source
|
||||
ORDER BY total_jobs DESC
|
||||
LIMIT $2
|
||||
""", cutoff, limit)
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
total = row['total_jobs']
|
||||
completed = row['completed'] or 0
|
||||
failed = row['failed'] or 0
|
||||
finished = completed + failed
|
||||
success_rate = (completed / finished * 100) if finished > 0 else 0.0
|
||||
|
||||
results.append({
|
||||
'client_id': row['client_id'],
|
||||
'source': row['source'],
|
||||
'total_jobs': total,
|
||||
'completed': completed,
|
||||
'failed': failed,
|
||||
'success_rate': round(success_rate, 1),
|
||||
'total_reviews': row['total_reviews'] or 0,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def get_problems(
|
||||
db: DatabaseManager,
|
||||
period: TimePeriod,
|
||||
limit: int = 20
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get recent failures and issues.
|
||||
"""
|
||||
delta = get_period_delta(period)
|
||||
cutoff = datetime.now() - delta
|
||||
|
||||
async with db.pool.acquire() as conn:
|
||||
# Get failed jobs
|
||||
failed_rows = await conn.fetch("""
|
||||
SELECT
|
||||
job_id,
|
||||
url,
|
||||
error_message,
|
||||
completed_at,
|
||||
requester_client_id
|
||||
FROM jobs
|
||||
WHERE status IN ('failed', 'partial')
|
||||
AND created_at >= $1
|
||||
ORDER BY completed_at DESC
|
||||
LIMIT $2
|
||||
""", cutoff, limit)
|
||||
|
||||
failed_jobs = [
|
||||
{
|
||||
'job_id': str(row['job_id']),
|
||||
'url': row['url'],
|
||||
'error_type': categorize_error(row['error_message']),
|
||||
'error_message': row['error_message'],
|
||||
'failed_at': row['completed_at'].isoformat() if row['completed_at'] else datetime.now().isoformat(),
|
||||
'client_id': row['requester_client_id'],
|
||||
}
|
||||
for row in failed_rows
|
||||
]
|
||||
|
||||
# Get average duration for slow job detection
|
||||
avg_duration = await conn.fetchval("""
|
||||
SELECT AVG(scrape_time)
|
||||
FROM jobs
|
||||
WHERE status = 'completed'
|
||||
AND scrape_time IS NOT NULL
|
||||
AND created_at >= $1
|
||||
""", cutoff)
|
||||
|
||||
slow_jobs = []
|
||||
if avg_duration and avg_duration > 0:
|
||||
# Find jobs taking > 2x average duration
|
||||
slow_rows = await conn.fetch("""
|
||||
SELECT
|
||||
job_id,
|
||||
url,
|
||||
scrape_time,
|
||||
completed_at
|
||||
FROM jobs
|
||||
WHERE status = 'completed'
|
||||
AND scrape_time IS NOT NULL
|
||||
AND scrape_time > $1 * 2
|
||||
AND created_at >= $2
|
||||
ORDER BY scrape_time DESC
|
||||
LIMIT $3
|
||||
""", avg_duration, cutoff, limit)
|
||||
|
||||
slow_jobs = [
|
||||
{
|
||||
'job_id': str(row['job_id']),
|
||||
'url': row['url'],
|
||||
'duration_seconds': round(row['scrape_time'], 1),
|
||||
'avg_duration_seconds': round(avg_duration, 1),
|
||||
'ratio': round(row['scrape_time'] / avg_duration, 1),
|
||||
'completed_at': row['completed_at'].isoformat() if row['completed_at'] else datetime.now().isoformat(),
|
||||
}
|
||||
for row in slow_rows
|
||||
]
|
||||
|
||||
# Get callback failures
|
||||
callback_rows = await conn.fetch("""
|
||||
SELECT
|
||||
job_id,
|
||||
callback_url,
|
||||
callback_status,
|
||||
callback_attempts
|
||||
FROM jobs
|
||||
WHERE callback_url IS NOT NULL
|
||||
AND callback_status = 'failed'
|
||||
AND created_at >= $1
|
||||
ORDER BY completed_at DESC
|
||||
LIMIT $2
|
||||
""", cutoff, limit)
|
||||
|
||||
callback_failures = [
|
||||
{
|
||||
'job_id': str(row['job_id']),
|
||||
'callback_url': row['callback_url'],
|
||||
'status': row['callback_status'] or 'failed',
|
||||
'attempts': row['callback_attempts'] or 0,
|
||||
'last_error': None, # Would need to query webhook_attempts table
|
||||
}
|
||||
for row in callback_rows
|
||||
]
|
||||
|
||||
total_problems = len(failed_jobs) + len(slow_jobs) + len(callback_failures)
|
||||
|
||||
return {
|
||||
'failed_jobs': failed_jobs,
|
||||
'slow_jobs': slow_jobs,
|
||||
'callback_failures': callback_failures,
|
||||
'total_problems': total_problems,
|
||||
}
|
||||
|
||||
|
||||
async def get_stats_by_version(
|
||||
db: DatabaseManager,
|
||||
period: TimePeriod,
|
||||
limit: int = 20
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get performance statistics grouped by scraper version.
|
||||
"""
|
||||
delta = get_period_delta(period)
|
||||
cutoff = datetime.now() - delta
|
||||
|
||||
async with db.pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT
|
||||
COALESCE(scraper_version, 'unknown') as version,
|
||||
scraper_variant as variant,
|
||||
COUNT(*) as total_jobs,
|
||||
COUNT(*) FILTER (WHERE status = 'completed') as completed,
|
||||
COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed,
|
||||
AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration,
|
||||
COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
|
||||
FROM jobs
|
||||
WHERE created_at >= $1
|
||||
GROUP BY scraper_version, scraper_variant
|
||||
ORDER BY total_jobs DESC
|
||||
LIMIT $2
|
||||
""", cutoff, limit)
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
completed = row['completed'] or 0
|
||||
failed = row['failed'] or 0
|
||||
finished = completed + failed
|
||||
success_rate = (completed / finished * 100) if finished > 0 else 0.0
|
||||
|
||||
results.append({
|
||||
'version': row['version'],
|
||||
'variant': row['variant'],
|
||||
'total_jobs': row['total_jobs'],
|
||||
'success_rate': round(success_rate, 1),
|
||||
'avg_duration': round(row['avg_duration'], 1) if row['avg_duration'] else None,
|
||||
'total_reviews': row['total_reviews'] or 0,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ==================== Dependency Injection ====================
|
||||
|
||||
_db: Optional[DatabaseManager] = None
|
||||
|
||||
|
||||
def set_database(db: DatabaseManager):
|
||||
"""Set the database instance for the router"""
|
||||
global _db
|
||||
_db = db
|
||||
|
||||
|
||||
def get_db() -> DatabaseManager:
|
||||
"""Dependency to get database instance"""
|
||||
if _db is None:
|
||||
raise HTTPException(status_code=500, detail="Database not initialized")
|
||||
return _db
|
||||
|
||||
|
||||
# ==================== API Endpoints ====================
|
||||
|
||||
@router.get(
|
||||
"/overview",
|
||||
response_model=OverviewResponse,
|
||||
summary="Get Dashboard Overview",
|
||||
description="Get system-wide job statistics and success rates"
|
||||
)
|
||||
async def get_overview(
|
||||
period: TimePeriod = Query(
|
||||
TimePeriod.HOUR_24,
|
||||
description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
|
||||
),
|
||||
db: DatabaseManager = Depends(get_db)
|
||||
) -> OverviewResponse:
|
||||
"""
|
||||
Get system-wide dashboard statistics.
|
||||
|
||||
Returns aggregate job counts, success rates, and average durations
|
||||
for the specified time period.
|
||||
|
||||
- **period**: Time window to analyze (default: 24h)
|
||||
- 1h: Last hour
|
||||
- 6h: Last 6 hours
|
||||
- 24h: Last 24 hours
|
||||
- 7d: Last 7 days
|
||||
- 30d: Last 30 days
|
||||
"""
|
||||
try:
|
||||
stats = await get_overview_stats(db, period)
|
||||
|
||||
return OverviewResponse(
|
||||
period=stats['period'],
|
||||
total_jobs=stats['total_jobs'],
|
||||
completed_jobs=stats['completed_jobs'],
|
||||
failed_jobs=stats['failed_jobs'],
|
||||
running_jobs=stats['running_jobs'],
|
||||
success_rate=stats['success_rate'],
|
||||
avg_duration_seconds=stats['avg_duration_seconds'],
|
||||
jobs_by_status=JobsByStatus(**stats['jobs_by_status']),
|
||||
total_reviews_scraped=stats['total_reviews_scraped'],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error getting dashboard overview: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get overview: {str(e)}")
|
||||
|
||||
|
||||
@router.get(
|
||||
"/by-client",
|
||||
response_model=List[ClientStats],
|
||||
summary="Get Stats by Client",
|
||||
description="Get job statistics grouped by client"
|
||||
)
|
||||
async def get_by_client(
|
||||
period: TimePeriod = Query(
|
||||
TimePeriod.HOUR_24,
|
||||
description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
|
||||
),
|
||||
limit: int = Query(50, description="Maximum number of clients to return", ge=1, le=200),
|
||||
db: DatabaseManager = Depends(get_db)
|
||||
) -> List[ClientStats]:
|
||||
"""
|
||||
Get job statistics grouped by client.
|
||||
|
||||
Returns aggregated statistics for each client including job counts,
|
||||
success rates, and total reviews scraped. Results are ordered by
|
||||
total job count descending.
|
||||
|
||||
- **period**: Time window to analyze (default: 24h)
|
||||
- **limit**: Maximum number of clients to return (default: 50)
|
||||
"""
|
||||
try:
|
||||
stats = await get_stats_by_client(db, period, limit)
|
||||
|
||||
return [
|
||||
ClientStats(
|
||||
client_id=s['client_id'],
|
||||
source=s['source'],
|
||||
total_jobs=s['total_jobs'],
|
||||
completed=s['completed'],
|
||||
failed=s['failed'],
|
||||
success_rate=s['success_rate'],
|
||||
total_reviews=s['total_reviews'],
|
||||
)
|
||||
for s in stats
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error getting client stats: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get client stats: {str(e)}")
|
||||
|
||||
|
||||
@router.get(
|
||||
"/problems",
|
||||
response_model=ProblemsResponse,
|
||||
summary="Get Recent Problems",
|
||||
description="Get recent failures, slow jobs, and callback issues"
|
||||
)
|
||||
async def get_problems_endpoint(
|
||||
period: TimePeriod = Query(
|
||||
TimePeriod.HOUR_24,
|
||||
description="Time period for problems (1h, 6h, 24h, 7d, 30d)"
|
||||
),
|
||||
limit: int = Query(20, description="Maximum number of items per category", ge=1, le=100),
|
||||
db: DatabaseManager = Depends(get_db)
|
||||
) -> ProblemsResponse:
|
||||
"""
|
||||
Get recent failures and issues.
|
||||
|
||||
Returns three categories of problems:
|
||||
- **failed_jobs**: Jobs that failed with errors
|
||||
- **slow_jobs**: Jobs that took more than 2x the average duration
|
||||
- **callback_failures**: Webhook deliveries that failed
|
||||
|
||||
Each category includes relevant details for debugging and resolution.
|
||||
|
||||
- **period**: Time window to analyze (default: 24h)
|
||||
- **limit**: Maximum items per category (default: 20)
|
||||
"""
|
||||
try:
|
||||
problems = await get_problems(db, period, limit)
|
||||
|
||||
return ProblemsResponse(
|
||||
failed_jobs=[FailedJob(**fj) for fj in problems['failed_jobs']],
|
||||
slow_jobs=[SlowJob(**sj) for sj in problems['slow_jobs']],
|
||||
callback_failures=[CallbackFailure(**cf) for cf in problems['callback_failures']],
|
||||
total_problems=problems['total_problems'],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error getting problems: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get problems: {str(e)}")
|
||||
|
||||
|
||||
@router.get(
|
||||
"/by-version",
|
||||
response_model=List[VersionStats],
|
||||
summary="Get Stats by Scraper Version",
|
||||
description="Get performance statistics grouped by scraper version"
|
||||
)
|
||||
async def get_by_version(
|
||||
period: TimePeriod = Query(
|
||||
TimePeriod.HOUR_24,
|
||||
description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
|
||||
),
|
||||
limit: int = Query(20, description="Maximum number of versions to return", ge=1, le=100),
|
||||
db: DatabaseManager = Depends(get_db)
|
||||
) -> List[VersionStats]:
|
||||
"""
|
||||
Get performance statistics grouped by scraper version.
|
||||
|
||||
Useful for comparing the performance of different scraper versions
|
||||
and variants (e.g., 'stable' vs 'stealth'). Results are ordered by
|
||||
total job count descending.
|
||||
|
||||
- **period**: Time window to analyze (default: 24h)
|
||||
- **limit**: Maximum number of versions to return (default: 20)
|
||||
"""
|
||||
try:
|
||||
stats = await get_stats_by_version(db, period, limit)
|
||||
|
||||
return [
|
||||
VersionStats(
|
||||
version=s['version'],
|
||||
variant=s['variant'],
|
||||
total_jobs=s['total_jobs'],
|
||||
success_rate=s['success_rate'],
|
||||
avg_duration=s['avg_duration'],
|
||||
total_reviews=s['total_reviews'],
|
||||
)
|
||||
for s in stats
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error getting version stats: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get version stats: {str(e)}")
|
||||
Reference in New Issue
Block a user