Phases 5-7: Dashboard UI, Admin API, and Auth middleware
Phase 5 - Main Dashboard: - Dashboard overview page with system health stats - Jobs by status breakdown, success rates, top clients - Dashboard API (/api/dashboard/overview, by-client, problems, by-version) Phase 6 - Admin/Scraper Management: - Scrapers management page with traffic allocation UI - Admin API for scraper CRUD operations - Traffic percentage updates for A/B testing - Promote/deprecate scraper versions Phase 7 - Authentication: - API key authentication middleware - SHA-256 key hashing (keys never stored in plain text) - Scope-based authorization (jobs:read, jobs:write, admin) - Rate limiting per API key Also: - Updated api_server_production.py to include new routers - Extended core/database.py with dashboard query methods - Added dashboard link to sidebar navigation - Updated CONTEXT-KEEPER.md to mark all phases complete Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -4,8 +4,14 @@ API Routes for ReviewIQ.
|
||||
This module exports all route modules for easy import into the main server.
|
||||
"""
|
||||
from api.routes.batches import router as batches_router, set_database as set_batches_db
|
||||
from api.routes.dashboard import router as dashboard_router, set_database as set_dashboard_db
|
||||
from api.routes.admin import router as admin_router, set_database as set_admin_db
|
||||
|
||||
__all__ = [
|
||||
'batches_router',
|
||||
'set_batches_db',
|
||||
'dashboard_router',
|
||||
'set_dashboard_db',
|
||||
'admin_router',
|
||||
'set_admin_db',
|
||||
]
|
||||
|
||||
756
api/routes/admin.py
Normal file
756
api/routes/admin.py
Normal file
@@ -0,0 +1,756 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Admin API routes for scraper management.
|
||||
|
||||
Phase 6 - ReviewIQ Platform
|
||||
|
||||
Provides endpoints for:
|
||||
- Listing registered scrapers with stats
|
||||
- Registering new scraper versions
|
||||
- Updating traffic allocation for A/B testing
|
||||
- Deprecating scrapers (soft delete)
|
||||
- Promoting scrapers to stable/default
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List, Dict, Any
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, Depends
|
||||
from pydantic import BaseModel, Field, validator
|
||||
|
||||
from core.database import DatabaseManager
|
||||
from scrapers.registry import ScraperRegistry
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Create router
|
||||
router = APIRouter(prefix="/api/admin", tags=["admin"])
|
||||
|
||||
|
||||
# ==================== Pydantic Models ====================
|
||||
|
||||
class ScraperStatsModel(BaseModel):
|
||||
"""Statistics for a scraper over the last 24 hours."""
|
||||
total_jobs: int = Field(default=0, description="Total jobs processed")
|
||||
success_rate: float = Field(default=0.0, description="Success rate percentage")
|
||||
avg_duration: float = Field(default=0.0, description="Average scrape duration in seconds")
|
||||
|
||||
|
||||
class ScraperInfoResponse(BaseModel):
|
||||
"""Response model for scraper information."""
|
||||
id: str = Field(..., description="Unique scraper registry ID")
|
||||
job_type: str = Field(..., description="Type of job this scraper handles")
|
||||
version: str = Field(..., description="Semantic version string")
|
||||
variant: str = Field(..., description="Release variant (stable, beta, canary)")
|
||||
is_default: bool = Field(..., description="Whether this is the default scraper")
|
||||
traffic_pct: int = Field(..., description="Traffic percentage for A/B testing (0-100)")
|
||||
module_path: str = Field(..., description="Python module path")
|
||||
function_name: Optional[str] = Field(None, description="Entry function name")
|
||||
deprecated_at: Optional[str] = Field(None, description="Deprecation timestamp (ISO format)")
|
||||
stats: ScraperStatsModel = Field(default_factory=ScraperStatsModel, description="Last 24h stats")
|
||||
|
||||
|
||||
class RegisterScraperRequest(BaseModel):
|
||||
"""Request model for registering a new scraper."""
|
||||
job_type: str = Field(..., description="Type of job (e.g., 'google_reviews')")
|
||||
version: str = Field(..., description="Semantic version string (e.g., '1.1.0')")
|
||||
variant: str = Field(..., description="Release variant: stable, beta, or canary")
|
||||
module_path: str = Field(..., description="Python module path")
|
||||
function_name: str = Field(default="scrape", description="Entry function name")
|
||||
traffic_pct: int = Field(default=0, description="Initial traffic percentage (0-100)", ge=0, le=100)
|
||||
min_priority: int = Field(default=0, description="Minimum job priority required")
|
||||
config: Optional[Dict[str, Any]] = Field(default=None, description="Optional configuration")
|
||||
|
||||
@validator('variant')
|
||||
def validate_variant(cls, v):
|
||||
if v not in ('stable', 'beta', 'canary'):
|
||||
raise ValueError("variant must be 'stable', 'beta', or 'canary'")
|
||||
return v
|
||||
|
||||
@validator('version')
|
||||
def validate_version(cls, v):
|
||||
# Basic semver validation
|
||||
parts = v.split('.')
|
||||
if len(parts) < 2:
|
||||
raise ValueError("version must be semantic version format (e.g., '1.0.0')")
|
||||
return v
|
||||
|
||||
|
||||
class RegisterScraperResponse(BaseModel):
|
||||
"""Response model for scraper registration."""
|
||||
id: str = Field(..., description="Created scraper registry ID")
|
||||
job_type: str = Field(..., description="Job type")
|
||||
version: str = Field(..., description="Version string")
|
||||
variant: str = Field(..., description="Release variant")
|
||||
message: str = Field(..., description="Status message")
|
||||
|
||||
|
||||
class UpdateTrafficRequest(BaseModel):
|
||||
"""Request model for updating traffic percentage."""
|
||||
traffic_pct: int = Field(..., description="New traffic percentage (0-100)", ge=0, le=100)
|
||||
|
||||
|
||||
class UpdateTrafficResponse(BaseModel):
|
||||
"""Response model for traffic update."""
|
||||
id: str = Field(..., description="Scraper registry ID")
|
||||
traffic_pct: int = Field(..., description="Updated traffic percentage")
|
||||
message: str = Field(..., description="Status message")
|
||||
|
||||
|
||||
class DeprecateResponse(BaseModel):
|
||||
"""Response model for deprecation."""
|
||||
id: str = Field(..., description="Scraper registry ID")
|
||||
deprecated_at: str = Field(..., description="Deprecation timestamp")
|
||||
message: str = Field(..., description="Status message")
|
||||
|
||||
|
||||
class PromoteResponse(BaseModel):
|
||||
"""Response model for promotion."""
|
||||
id: str = Field(..., description="Scraper registry ID")
|
||||
variant: str = Field(..., description="New variant (stable)")
|
||||
is_default: bool = Field(..., description="Whether now default")
|
||||
traffic_pct: int = Field(..., description="New traffic percentage")
|
||||
message: str = Field(..., description="Status message")
|
||||
|
||||
|
||||
# ==================== Database Helper Functions ====================
|
||||
|
||||
async def get_scraper_stats(
|
||||
db: DatabaseManager,
|
||||
scraper_id: str,
|
||||
hours: int = 24
|
||||
) -> ScraperStatsModel:
|
||||
"""
|
||||
Get statistics for a specific scraper over the given time period.
|
||||
|
||||
Args:
|
||||
db: Database manager instance
|
||||
scraper_id: UUID of the scraper registry entry
|
||||
hours: Number of hours to look back (default: 24)
|
||||
|
||||
Returns:
|
||||
ScraperStatsModel with job counts, success rate, and avg duration
|
||||
"""
|
||||
try:
|
||||
async with db.pool.acquire() as conn:
|
||||
# Query jobs that used this scraper version in the time period
|
||||
stats = await conn.fetchrow("""
|
||||
SELECT
|
||||
COUNT(*) as total_jobs,
|
||||
COUNT(*) FILTER (WHERE status = 'completed') as completed_jobs,
|
||||
COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed_jobs,
|
||||
AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration
|
||||
FROM jobs
|
||||
WHERE created_at >= NOW() - INTERVAL '%s hours'
|
||||
AND (
|
||||
metadata->>'scraper_id' = $1
|
||||
OR (scraper_version IS NOT NULL AND EXISTS (
|
||||
SELECT 1 FROM scraper_registry sr
|
||||
WHERE sr.id = $2::uuid
|
||||
AND sr.version = jobs.scraper_version
|
||||
AND sr.variant = COALESCE(jobs.scraper_variant, sr.variant)
|
||||
))
|
||||
)
|
||||
""", hours, scraper_id, scraper_id)
|
||||
|
||||
if not stats or stats['total_jobs'] == 0:
|
||||
return ScraperStatsModel()
|
||||
|
||||
total = stats['total_jobs']
|
||||
completed = stats['completed_jobs'] or 0
|
||||
success_rate = (completed / total * 100) if total > 0 else 0.0
|
||||
avg_duration = float(stats['avg_duration']) if stats['avg_duration'] else 0.0
|
||||
|
||||
return ScraperStatsModel(
|
||||
total_jobs=total,
|
||||
success_rate=round(success_rate, 2),
|
||||
avg_duration=round(avg_duration, 2)
|
||||
)
|
||||
except Exception as e:
|
||||
log.warning(f"Error getting scraper stats for {scraper_id}: {e}")
|
||||
return ScraperStatsModel()
|
||||
|
||||
|
||||
async def get_scraper_by_id_from_db(
|
||||
db: DatabaseManager,
|
||||
scraper_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get scraper by ID directly from database.
|
||||
|
||||
Args:
|
||||
db: Database manager instance
|
||||
scraper_id: UUID of the scraper registry entry
|
||||
|
||||
Returns:
|
||||
Scraper dictionary or None if not found
|
||||
"""
|
||||
async with db.pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT
|
||||
id,
|
||||
job_type,
|
||||
version,
|
||||
variant,
|
||||
module_path,
|
||||
function_name,
|
||||
is_default,
|
||||
traffic_pct,
|
||||
min_priority,
|
||||
config,
|
||||
deprecated_at
|
||||
FROM scraper_registry
|
||||
WHERE id = $1
|
||||
""", UUID(scraper_id))
|
||||
|
||||
if not row:
|
||||
return None
|
||||
|
||||
return dict(row)
|
||||
|
||||
|
||||
async def update_scraper_traffic(
|
||||
db: DatabaseManager,
|
||||
scraper_id: str,
|
||||
traffic_pct: int
|
||||
) -> bool:
|
||||
"""
|
||||
Update traffic percentage for a scraper.
|
||||
|
||||
Args:
|
||||
db: Database manager instance
|
||||
scraper_id: UUID of the scraper registry entry
|
||||
traffic_pct: New traffic percentage (0-100)
|
||||
|
||||
Returns:
|
||||
True if updated, False if not found
|
||||
"""
|
||||
async with db.pool.acquire() as conn:
|
||||
result = await conn.execute("""
|
||||
UPDATE scraper_registry
|
||||
SET traffic_pct = $2
|
||||
WHERE id = $1 AND deprecated_at IS NULL
|
||||
""", UUID(scraper_id), traffic_pct)
|
||||
|
||||
return result.split()[-1] == "1"
|
||||
|
||||
|
||||
async def deprecate_scraper_by_id(
|
||||
db: DatabaseManager,
|
||||
scraper_id: str
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Deprecate a scraper by ID (soft delete).
|
||||
|
||||
Args:
|
||||
db: Database manager instance
|
||||
scraper_id: UUID of the scraper registry entry
|
||||
|
||||
Returns:
|
||||
Deprecation timestamp as ISO string, or None if not found/already deprecated
|
||||
"""
|
||||
async with db.pool.acquire() as conn:
|
||||
result = await conn.fetchval("""
|
||||
UPDATE scraper_registry
|
||||
SET deprecated_at = NOW(), traffic_pct = 0
|
||||
WHERE id = $1 AND deprecated_at IS NULL
|
||||
RETURNING deprecated_at
|
||||
""", UUID(scraper_id))
|
||||
|
||||
if result:
|
||||
return result.isoformat()
|
||||
return None
|
||||
|
||||
|
||||
async def promote_scraper_by_id(
|
||||
db: DatabaseManager,
|
||||
scraper_id: str,
|
||||
default_traffic_pct: int = 80
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Promote a scraper to stable variant, set as default, and give it majority traffic.
|
||||
|
||||
This will:
|
||||
1. Set the scraper's variant to 'stable'
|
||||
2. Set is_default to True
|
||||
3. Set traffic_pct to default_traffic_pct (default: 80%)
|
||||
4. Unset is_default on other scrapers of the same job_type
|
||||
5. Reduce traffic_pct on other scrapers proportionally
|
||||
|
||||
Args:
|
||||
db: Database manager instance
|
||||
scraper_id: UUID of the scraper to promote
|
||||
default_traffic_pct: Traffic percentage to assign (default: 80)
|
||||
|
||||
Returns:
|
||||
Updated scraper dict or None if not found
|
||||
"""
|
||||
async with db.pool.acquire() as conn:
|
||||
async with conn.transaction():
|
||||
# Get the scraper to promote
|
||||
scraper = await conn.fetchrow("""
|
||||
SELECT id, job_type, version, variant
|
||||
FROM scraper_registry
|
||||
WHERE id = $1 AND deprecated_at IS NULL
|
||||
""", UUID(scraper_id))
|
||||
|
||||
if not scraper:
|
||||
return None
|
||||
|
||||
job_type = scraper['job_type']
|
||||
|
||||
# Unset is_default on other scrapers of same job_type
|
||||
await conn.execute("""
|
||||
UPDATE scraper_registry
|
||||
SET is_default = FALSE
|
||||
WHERE job_type = $1 AND id != $2
|
||||
""", job_type, UUID(scraper_id))
|
||||
|
||||
# Reduce traffic on other active scrapers proportionally
|
||||
# Calculate remaining traffic to distribute
|
||||
remaining_traffic = 100 - default_traffic_pct
|
||||
|
||||
# Get other active scrapers
|
||||
other_scrapers = await conn.fetch("""
|
||||
SELECT id, traffic_pct
|
||||
FROM scraper_registry
|
||||
WHERE job_type = $1 AND id != $2 AND deprecated_at IS NULL AND traffic_pct > 0
|
||||
""", job_type, UUID(scraper_id))
|
||||
|
||||
if other_scrapers:
|
||||
total_other_traffic = sum(s['traffic_pct'] for s in other_scrapers)
|
||||
if total_other_traffic > 0:
|
||||
for s in other_scrapers:
|
||||
new_pct = int((s['traffic_pct'] / total_other_traffic) * remaining_traffic)
|
||||
await conn.execute("""
|
||||
UPDATE scraper_registry
|
||||
SET traffic_pct = $2
|
||||
WHERE id = $1
|
||||
""", s['id'], new_pct)
|
||||
|
||||
# Promote the target scraper
|
||||
updated = await conn.fetchrow("""
|
||||
UPDATE scraper_registry
|
||||
SET
|
||||
variant = 'stable',
|
||||
is_default = TRUE,
|
||||
traffic_pct = $2
|
||||
WHERE id = $1
|
||||
RETURNING id, job_type, version, variant, is_default, traffic_pct
|
||||
""", UUID(scraper_id), default_traffic_pct)
|
||||
|
||||
if updated:
|
||||
return dict(updated)
|
||||
return None
|
||||
|
||||
|
||||
# ==================== Dependency Injection ====================
|
||||
|
||||
_db: Optional[DatabaseManager] = None
|
||||
_registry: Optional[ScraperRegistry] = None
|
||||
|
||||
|
||||
def set_database(db: DatabaseManager):
|
||||
"""Set the database instance for the router."""
|
||||
global _db, _registry
|
||||
_db = db
|
||||
_registry = ScraperRegistry(db)
|
||||
|
||||
|
||||
def get_db() -> DatabaseManager:
|
||||
"""Dependency to get database instance."""
|
||||
if _db is None:
|
||||
raise HTTPException(status_code=500, detail="Database not initialized")
|
||||
return _db
|
||||
|
||||
|
||||
def get_registry() -> ScraperRegistry:
|
||||
"""Dependency to get scraper registry instance."""
|
||||
if _registry is None:
|
||||
raise HTTPException(status_code=500, detail="Scraper registry not initialized")
|
||||
return _registry
|
||||
|
||||
|
||||
# ==================== API Endpoints ====================
|
||||
|
||||
@router.get(
|
||||
"/scrapers",
|
||||
response_model=List[ScraperInfoResponse],
|
||||
summary="List All Scrapers",
|
||||
description="Get a list of all registered scrapers with their stats"
|
||||
)
|
||||
async def list_scrapers(
|
||||
job_type: Optional[str] = Query(None, description="Filter by job type"),
|
||||
include_deprecated: bool = Query(False, description="Include deprecated scrapers"),
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
List all registered scrapers with their configuration and stats.
|
||||
|
||||
Returns scraper information including:
|
||||
- Version and variant information
|
||||
- Traffic allocation percentage
|
||||
- Whether it's the default scraper
|
||||
- Last 24h performance stats (total jobs, success rate, avg duration)
|
||||
|
||||
Use `job_type` filter to get scrapers for a specific job type.
|
||||
Set `include_deprecated=true` to include deprecated scrapers.
|
||||
"""
|
||||
try:
|
||||
# Refresh cache to get latest data
|
||||
await registry.refresh_cache()
|
||||
|
||||
# Get all scrapers
|
||||
scrapers = await registry.list_scrapers(
|
||||
job_type=job_type,
|
||||
include_deprecated=include_deprecated
|
||||
)
|
||||
|
||||
# Enrich with stats
|
||||
result = []
|
||||
for scraper in scrapers:
|
||||
stats = await get_scraper_stats(db, scraper['id'])
|
||||
|
||||
# Get full scraper info from DB to include job_type
|
||||
full_info = await get_scraper_by_id_from_db(db, scraper['id'])
|
||||
|
||||
result.append(ScraperInfoResponse(
|
||||
id=scraper['id'],
|
||||
job_type=full_info['job_type'] if full_info else 'unknown',
|
||||
version=scraper['version'],
|
||||
variant=scraper['variant'],
|
||||
is_default=scraper['is_default'],
|
||||
traffic_pct=scraper['traffic_pct'],
|
||||
module_path=scraper['module_path'],
|
||||
function_name=scraper.get('function_name'),
|
||||
deprecated_at=str(full_info['deprecated_at']) if full_info and full_info.get('deprecated_at') else None,
|
||||
stats=stats
|
||||
))
|
||||
|
||||
# Sort by job_type, then by version descending
|
||||
result.sort(key=lambda x: (x.job_type, x.version), reverse=True)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error listing scrapers: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to list scrapers: {str(e)}")
|
||||
|
||||
|
||||
@router.post(
|
||||
"/scrapers",
|
||||
response_model=RegisterScraperResponse,
|
||||
summary="Register New Scraper",
|
||||
description="Register a new scraper version"
|
||||
)
|
||||
async def register_scraper(
|
||||
request: RegisterScraperRequest,
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
Register a new scraper version in the registry.
|
||||
|
||||
This allows adding new scraper implementations that can be used for:
|
||||
- A/B testing (set traffic_pct to allocate traffic)
|
||||
- Canary releases (set variant to 'canary' with low traffic_pct)
|
||||
- Beta testing (set variant to 'beta')
|
||||
|
||||
The scraper won't receive any traffic until traffic_pct > 0.
|
||||
|
||||
**Parameters:**
|
||||
- `job_type`: Type of scraping job (e.g., 'google_reviews')
|
||||
- `version`: Semantic version (e.g., '1.1.0')
|
||||
- `variant`: Release channel ('stable', 'beta', 'canary')
|
||||
- `module_path`: Python module path (e.g., 'scrapers.google_reviews.v1_1_0')
|
||||
- `function_name`: Entry function name (default: 'scrape')
|
||||
- `traffic_pct`: Initial traffic allocation (0-100, default: 0)
|
||||
- `config`: Optional configuration dict passed to the scraper
|
||||
"""
|
||||
try:
|
||||
# Check if version already exists for this job_type
|
||||
existing = await registry.list_scrapers(job_type=request.job_type, include_deprecated=True)
|
||||
for scraper in existing:
|
||||
if scraper['version'] == request.version:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"Scraper version {request.version} already exists for job_type {request.job_type}"
|
||||
)
|
||||
|
||||
# Register the new scraper
|
||||
scraper_id = await registry.register_scraper(
|
||||
job_type=request.job_type,
|
||||
version=request.version,
|
||||
variant=request.variant,
|
||||
module_path=request.module_path,
|
||||
function_name=request.function_name,
|
||||
is_default=False, # Never auto-set as default
|
||||
traffic_pct=request.traffic_pct,
|
||||
min_priority=request.min_priority,
|
||||
config=request.config
|
||||
)
|
||||
|
||||
log.info(f"Registered new scraper: {request.job_type} v{request.version} ({request.variant})")
|
||||
|
||||
return RegisterScraperResponse(
|
||||
id=scraper_id,
|
||||
job_type=request.job_type,
|
||||
version=request.version,
|
||||
variant=request.variant,
|
||||
message=f"Successfully registered scraper {request.job_type} v{request.version} ({request.variant})"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
log.error(f"Error registering scraper: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to register scraper: {str(e)}")
|
||||
|
||||
|
||||
@router.put(
|
||||
"/scrapers/{scraper_id}/traffic",
|
||||
response_model=UpdateTrafficResponse,
|
||||
summary="Update Traffic Percentage",
|
||||
description="Update the traffic allocation for a scraper"
|
||||
)
|
||||
async def update_traffic(
|
||||
scraper_id: str,
|
||||
request: UpdateTrafficRequest,
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
Update the traffic percentage for a specific scraper.
|
||||
|
||||
Traffic percentage determines what portion of requests are routed
|
||||
to this scraper version. Used for:
|
||||
- Gradual rollouts (start at 10%, increase to 50%, then 100%)
|
||||
- A/B testing (set two versions to 50% each)
|
||||
- Canary releases (set new version to 5-10%)
|
||||
|
||||
**Note:** Total traffic across all active scrapers of the same
|
||||
job_type should not exceed 100%. The system uses weighted random
|
||||
selection, so percentages are relative weights, not exact guarantees.
|
||||
"""
|
||||
try:
|
||||
# Validate UUID format
|
||||
try:
|
||||
UUID(scraper_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
|
||||
|
||||
# Check scraper exists
|
||||
scraper = await get_scraper_by_id_from_db(db, scraper_id)
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=404, detail="Scraper not found")
|
||||
|
||||
if scraper.get('deprecated_at'):
|
||||
raise HTTPException(status_code=400, detail="Cannot update traffic for deprecated scraper")
|
||||
|
||||
# Update traffic
|
||||
success = await update_scraper_traffic(db, scraper_id, request.traffic_pct)
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to update traffic allocation")
|
||||
|
||||
# Invalidate registry cache
|
||||
await registry.refresh_cache()
|
||||
|
||||
log.info(f"Updated traffic for scraper {scraper_id} to {request.traffic_pct}%")
|
||||
|
||||
return UpdateTrafficResponse(
|
||||
id=scraper_id,
|
||||
traffic_pct=request.traffic_pct,
|
||||
message=f"Traffic updated to {request.traffic_pct}%"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error(f"Error updating traffic for scraper {scraper_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to update traffic: {str(e)}")
|
||||
|
||||
|
||||
@router.post(
|
||||
"/scrapers/{scraper_id}/deprecate",
|
||||
response_model=DeprecateResponse,
|
||||
summary="Deprecate Scraper",
|
||||
description="Mark a scraper as deprecated (soft delete)"
|
||||
)
|
||||
async def deprecate_scraper(
|
||||
scraper_id: str,
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
Deprecate a scraper version (soft delete).
|
||||
|
||||
This will:
|
||||
- Set deprecated_at timestamp
|
||||
- Set traffic_pct to 0 (no new requests)
|
||||
- Keep the scraper in the registry for historical reference
|
||||
|
||||
Deprecated scrapers are excluded from normal routing but can
|
||||
still be explicitly requested by version for debugging.
|
||||
|
||||
To permanently remove a scraper, use database admin tools.
|
||||
"""
|
||||
try:
|
||||
# Validate UUID format
|
||||
try:
|
||||
UUID(scraper_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
|
||||
|
||||
# Check scraper exists
|
||||
scraper = await get_scraper_by_id_from_db(db, scraper_id)
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=404, detail="Scraper not found")
|
||||
|
||||
if scraper.get('deprecated_at'):
|
||||
raise HTTPException(status_code=400, detail="Scraper is already deprecated")
|
||||
|
||||
# Deprecate
|
||||
deprecated_at = await deprecate_scraper_by_id(db, scraper_id)
|
||||
if not deprecated_at:
|
||||
raise HTTPException(status_code=500, detail="Failed to deprecate scraper")
|
||||
|
||||
# Invalidate registry cache
|
||||
await registry.refresh_cache()
|
||||
|
||||
log.info(f"Deprecated scraper {scraper_id}")
|
||||
|
||||
return DeprecateResponse(
|
||||
id=scraper_id,
|
||||
deprecated_at=deprecated_at,
|
||||
message=f"Scraper deprecated. Traffic allocation set to 0%."
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error(f"Error deprecating scraper {scraper_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to deprecate scraper: {str(e)}")
|
||||
|
||||
|
||||
@router.post(
|
||||
"/scrapers/{scraper_id}/promote",
|
||||
response_model=PromoteResponse,
|
||||
summary="Promote Scraper",
|
||||
description="Promote scraper to stable variant and set as default"
|
||||
)
|
||||
async def promote_scraper(
|
||||
scraper_id: str,
|
||||
traffic_pct: int = Query(80, description="Traffic percentage to assign (0-100)", ge=0, le=100),
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
Promote a scraper to stable variant, set as default, and give it majority traffic.
|
||||
|
||||
This operation will:
|
||||
1. Set the scraper's variant to 'stable'
|
||||
2. Set is_default to True
|
||||
3. Set traffic_pct to the specified value (default: 80%)
|
||||
4. Unset is_default on other scrapers of the same job_type
|
||||
5. Redistribute remaining traffic among other active scrapers
|
||||
|
||||
**Use cases:**
|
||||
- Graduating a beta version to production
|
||||
- Making a canary release the new stable version
|
||||
- Switching to a new scraper implementation
|
||||
|
||||
**Parameters:**
|
||||
- `traffic_pct`: Traffic percentage to assign (default: 80%)
|
||||
"""
|
||||
try:
|
||||
# Validate UUID format
|
||||
try:
|
||||
UUID(scraper_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
|
||||
|
||||
# Check scraper exists
|
||||
scraper = await get_scraper_by_id_from_db(db, scraper_id)
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=404, detail="Scraper not found")
|
||||
|
||||
if scraper.get('deprecated_at'):
|
||||
raise HTTPException(status_code=400, detail="Cannot promote a deprecated scraper")
|
||||
|
||||
# Promote
|
||||
result = await promote_scraper_by_id(db, scraper_id, traffic_pct)
|
||||
if not result:
|
||||
raise HTTPException(status_code=500, detail="Failed to promote scraper")
|
||||
|
||||
# Invalidate registry cache
|
||||
await registry.refresh_cache()
|
||||
|
||||
log.info(f"Promoted scraper {scraper_id} to stable with {traffic_pct}% traffic")
|
||||
|
||||
return PromoteResponse(
|
||||
id=scraper_id,
|
||||
variant='stable',
|
||||
is_default=True,
|
||||
traffic_pct=traffic_pct,
|
||||
message=f"Scraper promoted to stable. Now default with {traffic_pct}% traffic."
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error(f"Error promoting scraper {scraper_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to promote scraper: {str(e)}")
|
||||
|
||||
|
||||
@router.get(
|
||||
"/scrapers/{scraper_id}",
|
||||
response_model=ScraperInfoResponse,
|
||||
summary="Get Scraper Details",
|
||||
description="Get detailed information about a specific scraper"
|
||||
)
|
||||
async def get_scraper_details(
|
||||
scraper_id: str,
|
||||
db: DatabaseManager = Depends(get_db),
|
||||
registry: ScraperRegistry = Depends(get_registry)
|
||||
):
|
||||
"""
|
||||
Get detailed information about a specific scraper including stats.
|
||||
"""
|
||||
try:
|
||||
# Validate UUID format
|
||||
try:
|
||||
UUID(scraper_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid scraper ID format")
|
||||
|
||||
# Get scraper from DB
|
||||
scraper = await get_scraper_by_id_from_db(db, scraper_id)
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=404, detail="Scraper not found")
|
||||
|
||||
# Get stats
|
||||
stats = await get_scraper_stats(db, scraper_id)
|
||||
|
||||
return ScraperInfoResponse(
|
||||
id=str(scraper['id']),
|
||||
job_type=scraper['job_type'],
|
||||
version=scraper['version'],
|
||||
variant=scraper['variant'],
|
||||
is_default=scraper['is_default'],
|
||||
traffic_pct=scraper['traffic_pct'],
|
||||
module_path=scraper['module_path'],
|
||||
function_name=scraper.get('function_name'),
|
||||
deprecated_at=str(scraper['deprecated_at']) if scraper.get('deprecated_at') else None,
|
||||
stats=stats
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error(f"Error getting scraper {scraper_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get scraper: {str(e)}")
|
||||
623
api/routes/dashboard.py
Normal file
623
api/routes/dashboard.py
Normal file
@@ -0,0 +1,623 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Dashboard API for ReviewIQ Phase 5.
|
||||
|
||||
Provides system-wide analytics and monitoring endpoints:
|
||||
- Overview statistics (jobs by status, success rates, durations)
|
||||
- Client-level aggregations
|
||||
- Problem detection (failures, slow jobs, callback issues)
|
||||
- Scraper version performance analysis
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List, Dict, Any
|
||||
from enum import Enum
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, Depends
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from core.database import DatabaseManager
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Create router
|
||||
router = APIRouter(prefix="/api/dashboard", tags=["dashboard"])
|
||||
|
||||
|
||||
# ==================== Enums ====================
|
||||
|
||||
class TimePeriod(str, Enum):
|
||||
"""Time period for filtering dashboard data"""
|
||||
HOUR_1 = "1h"
|
||||
HOUR_6 = "6h"
|
||||
HOUR_24 = "24h"
|
||||
DAY_7 = "7d"
|
||||
DAY_30 = "30d"
|
||||
|
||||
|
||||
# ==================== Pydantic Response Models ====================
|
||||
|
||||
class JobsByStatus(BaseModel):
|
||||
"""Job counts grouped by status"""
|
||||
pending: int = 0
|
||||
running: int = 0
|
||||
completed: int = 0
|
||||
failed: int = 0
|
||||
cancelled: int = 0
|
||||
partial: int = 0
|
||||
|
||||
|
||||
class OverviewResponse(BaseModel):
|
||||
"""System-wide dashboard overview statistics"""
|
||||
period: str = Field(..., description="Time period for the statistics (e.g., '24h')")
|
||||
total_jobs: int = Field(..., description="Total number of jobs in the period")
|
||||
completed_jobs: int = Field(..., description="Number of successfully completed jobs")
|
||||
failed_jobs: int = Field(..., description="Number of failed jobs")
|
||||
running_jobs: int = Field(..., description="Number of currently running jobs")
|
||||
success_rate: float = Field(..., description="Percentage of successful jobs (0-100)")
|
||||
avg_duration_seconds: Optional[float] = Field(None, description="Average job duration in seconds")
|
||||
jobs_by_status: JobsByStatus = Field(..., description="Job counts grouped by status")
|
||||
total_reviews_scraped: int = Field(0, description="Total reviews scraped in the period")
|
||||
|
||||
|
||||
class ClientStats(BaseModel):
|
||||
"""Job statistics for a single client"""
|
||||
client_id: str = Field(..., description="Client identifier")
|
||||
source: Optional[str] = Field(None, description="Source of the requests (e.g., 'veritasreview.com')")
|
||||
total_jobs: int = Field(..., description="Total jobs submitted by this client")
|
||||
completed: int = Field(..., description="Number of completed jobs")
|
||||
failed: int = Field(..., description="Number of failed jobs")
|
||||
success_rate: float = Field(..., description="Success rate percentage (0-100)")
|
||||
total_reviews: int = Field(0, description="Total reviews scraped for this client")
|
||||
|
||||
|
||||
class FailedJob(BaseModel):
|
||||
"""Details of a failed job"""
|
||||
job_id: str = Field(..., description="Job UUID")
|
||||
url: str = Field(..., description="URL that was being scraped")
|
||||
error_type: Optional[str] = Field(None, description="Categorized error type")
|
||||
error_message: Optional[str] = Field(None, description="Error message")
|
||||
failed_at: str = Field(..., description="ISO timestamp when the job failed")
|
||||
client_id: Optional[str] = Field(None, description="Client who submitted the job")
|
||||
|
||||
|
||||
class SlowJob(BaseModel):
|
||||
"""Details of a slow job (taking > 2x average duration)"""
|
||||
job_id: str = Field(..., description="Job UUID")
|
||||
url: str = Field(..., description="URL that was being scraped")
|
||||
duration_seconds: float = Field(..., description="Actual job duration in seconds")
|
||||
avg_duration_seconds: float = Field(..., description="Average duration for comparison")
|
||||
ratio: float = Field(..., description="How many times slower than average")
|
||||
completed_at: str = Field(..., description="ISO timestamp when the job completed")
|
||||
|
||||
|
||||
class CallbackFailure(BaseModel):
|
||||
"""Details of a failed webhook callback"""
|
||||
job_id: str = Field(..., description="Job UUID")
|
||||
callback_url: str = Field(..., description="Webhook URL that failed")
|
||||
status: str = Field(..., description="Callback status")
|
||||
attempts: int = Field(..., description="Number of delivery attempts")
|
||||
last_error: Optional[str] = Field(None, description="Last error message")
|
||||
|
||||
|
||||
class ProblemsResponse(BaseModel):
|
||||
"""Recent failures and issues"""
|
||||
failed_jobs: List[FailedJob] = Field(default_factory=list, description="Recent job failures")
|
||||
slow_jobs: List[SlowJob] = Field(default_factory=list, description="Jobs taking > 2x average duration")
|
||||
callback_failures: List[CallbackFailure] = Field(default_factory=list, description="Failed webhook deliveries")
|
||||
total_problems: int = Field(..., description="Total number of problems detected")
|
||||
|
||||
|
||||
class VersionStats(BaseModel):
|
||||
"""Performance statistics for a scraper version"""
|
||||
version: str = Field(..., description="Scraper version string (e.g., '1.0.0')")
|
||||
variant: Optional[str] = Field(None, description="Scraper variant (e.g., 'stable', 'stealth')")
|
||||
total_jobs: int = Field(..., description="Total jobs run with this version")
|
||||
success_rate: float = Field(..., description="Success rate percentage (0-100)")
|
||||
avg_duration: Optional[float] = Field(None, description="Average job duration in seconds")
|
||||
total_reviews: int = Field(0, description="Total reviews scraped with this version")
|
||||
|
||||
|
||||
# ==================== Helper Functions ====================
|
||||
|
||||
def get_period_delta(period: TimePeriod) -> timedelta:
|
||||
"""Convert period enum to timedelta"""
|
||||
mapping = {
|
||||
TimePeriod.HOUR_1: timedelta(hours=1),
|
||||
TimePeriod.HOUR_6: timedelta(hours=6),
|
||||
TimePeriod.HOUR_24: timedelta(hours=24),
|
||||
TimePeriod.DAY_7: timedelta(days=7),
|
||||
TimePeriod.DAY_30: timedelta(days=30),
|
||||
}
|
||||
return mapping.get(period, timedelta(hours=24))
|
||||
|
||||
|
||||
def categorize_error(error_message: Optional[str]) -> str:
|
||||
"""Categorize error message into a type"""
|
||||
if not error_message:
|
||||
return "unknown"
|
||||
|
||||
error_lower = error_message.lower()
|
||||
|
||||
if "rate" in error_lower and "limit" in error_lower:
|
||||
return "rate_limited"
|
||||
elif "timeout" in error_lower:
|
||||
return "timeout"
|
||||
elif "captcha" in error_lower or "recaptcha" in error_lower:
|
||||
return "captcha_blocked"
|
||||
elif "bot" in error_lower or "detected" in error_lower:
|
||||
return "bot_detected"
|
||||
elif "network" in error_lower or "connection" in error_lower:
|
||||
return "network_error"
|
||||
elif "element" in error_lower or "selector" in error_lower or "not found" in error_lower:
|
||||
return "selector_failed"
|
||||
elif "navigation" in error_lower or "page" in error_lower:
|
||||
return "navigation_error"
|
||||
elif "browser" in error_lower or "playwright" in error_lower:
|
||||
return "browser_error"
|
||||
else:
|
||||
return "other"
|
||||
|
||||
|
||||
# ==================== Database Query Functions ====================
|
||||
|
||||
async def get_overview_stats(
|
||||
db: DatabaseManager,
|
||||
period: TimePeriod
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get system-wide job statistics for the specified period.
|
||||
"""
|
||||
delta = get_period_delta(period)
|
||||
cutoff = datetime.now() - delta
|
||||
|
||||
async with db.pool.acquire() as conn:
|
||||
# Get job counts by status
|
||||
stats = await conn.fetchrow("""
|
||||
SELECT
|
||||
COUNT(*) as total_jobs,
|
||||
COUNT(*) FILTER (WHERE status = 'pending') as pending,
|
||||
COUNT(*) FILTER (WHERE status = 'running') as running,
|
||||
COUNT(*) FILTER (WHERE status = 'completed') as completed,
|
||||
COUNT(*) FILTER (WHERE status = 'failed') as failed,
|
||||
COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled,
|
||||
COUNT(*) FILTER (WHERE status = 'partial') as partial,
|
||||
AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration,
|
||||
COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
|
||||
FROM jobs
|
||||
WHERE created_at >= $1
|
||||
""", cutoff)
|
||||
|
||||
total = stats['total_jobs'] or 0
|
||||
completed = stats['completed'] or 0
|
||||
failed = stats['failed'] or 0
|
||||
|
||||
# Calculate success rate (only for finished jobs)
|
||||
finished = completed + failed + (stats['partial'] or 0)
|
||||
success_rate = (completed / finished * 100) if finished > 0 else 0.0
|
||||
|
||||
return {
|
||||
'period': period.value,
|
||||
'total_jobs': total,
|
||||
'completed_jobs': completed,
|
||||
'failed_jobs': failed,
|
||||
'running_jobs': stats['running'] or 0,
|
||||
'success_rate': round(success_rate, 1),
|
||||
'avg_duration_seconds': round(stats['avg_duration'], 1) if stats['avg_duration'] else None,
|
||||
'total_reviews_scraped': stats['total_reviews'] or 0,
|
||||
'jobs_by_status': {
|
||||
'pending': stats['pending'] or 0,
|
||||
'running': stats['running'] or 0,
|
||||
'completed': completed,
|
||||
'failed': failed,
|
||||
'cancelled': stats['cancelled'] or 0,
|
||||
'partial': stats['partial'] or 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async def get_stats_by_client(
|
||||
db: DatabaseManager,
|
||||
period: TimePeriod,
|
||||
limit: int = 50
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get job statistics grouped by client.
|
||||
"""
|
||||
delta = get_period_delta(period)
|
||||
cutoff = datetime.now() - delta
|
||||
|
||||
async with db.pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT
|
||||
COALESCE(requester_client_id, 'unknown') as client_id,
|
||||
requester_source as source,
|
||||
COUNT(*) as total_jobs,
|
||||
COUNT(*) FILTER (WHERE status = 'completed') as completed,
|
||||
COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed,
|
||||
COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
|
||||
FROM jobs
|
||||
WHERE created_at >= $1
|
||||
GROUP BY requester_client_id, requester_source
|
||||
ORDER BY total_jobs DESC
|
||||
LIMIT $2
|
||||
""", cutoff, limit)
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
total = row['total_jobs']
|
||||
completed = row['completed'] or 0
|
||||
failed = row['failed'] or 0
|
||||
finished = completed + failed
|
||||
success_rate = (completed / finished * 100) if finished > 0 else 0.0
|
||||
|
||||
results.append({
|
||||
'client_id': row['client_id'],
|
||||
'source': row['source'],
|
||||
'total_jobs': total,
|
||||
'completed': completed,
|
||||
'failed': failed,
|
||||
'success_rate': round(success_rate, 1),
|
||||
'total_reviews': row['total_reviews'] or 0,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def get_problems(
|
||||
db: DatabaseManager,
|
||||
period: TimePeriod,
|
||||
limit: int = 20
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get recent failures and issues.
|
||||
"""
|
||||
delta = get_period_delta(period)
|
||||
cutoff = datetime.now() - delta
|
||||
|
||||
async with db.pool.acquire() as conn:
|
||||
# Get failed jobs
|
||||
failed_rows = await conn.fetch("""
|
||||
SELECT
|
||||
job_id,
|
||||
url,
|
||||
error_message,
|
||||
completed_at,
|
||||
requester_client_id
|
||||
FROM jobs
|
||||
WHERE status IN ('failed', 'partial')
|
||||
AND created_at >= $1
|
||||
ORDER BY completed_at DESC
|
||||
LIMIT $2
|
||||
""", cutoff, limit)
|
||||
|
||||
failed_jobs = [
|
||||
{
|
||||
'job_id': str(row['job_id']),
|
||||
'url': row['url'],
|
||||
'error_type': categorize_error(row['error_message']),
|
||||
'error_message': row['error_message'],
|
||||
'failed_at': row['completed_at'].isoformat() if row['completed_at'] else datetime.now().isoformat(),
|
||||
'client_id': row['requester_client_id'],
|
||||
}
|
||||
for row in failed_rows
|
||||
]
|
||||
|
||||
# Get average duration for slow job detection
|
||||
avg_duration = await conn.fetchval("""
|
||||
SELECT AVG(scrape_time)
|
||||
FROM jobs
|
||||
WHERE status = 'completed'
|
||||
AND scrape_time IS NOT NULL
|
||||
AND created_at >= $1
|
||||
""", cutoff)
|
||||
|
||||
slow_jobs = []
|
||||
if avg_duration and avg_duration > 0:
|
||||
# Find jobs taking > 2x average duration
|
||||
slow_rows = await conn.fetch("""
|
||||
SELECT
|
||||
job_id,
|
||||
url,
|
||||
scrape_time,
|
||||
completed_at
|
||||
FROM jobs
|
||||
WHERE status = 'completed'
|
||||
AND scrape_time IS NOT NULL
|
||||
AND scrape_time > $1 * 2
|
||||
AND created_at >= $2
|
||||
ORDER BY scrape_time DESC
|
||||
LIMIT $3
|
||||
""", avg_duration, cutoff, limit)
|
||||
|
||||
slow_jobs = [
|
||||
{
|
||||
'job_id': str(row['job_id']),
|
||||
'url': row['url'],
|
||||
'duration_seconds': round(row['scrape_time'], 1),
|
||||
'avg_duration_seconds': round(avg_duration, 1),
|
||||
'ratio': round(row['scrape_time'] / avg_duration, 1),
|
||||
'completed_at': row['completed_at'].isoformat() if row['completed_at'] else datetime.now().isoformat(),
|
||||
}
|
||||
for row in slow_rows
|
||||
]
|
||||
|
||||
# Get callback failures
|
||||
callback_rows = await conn.fetch("""
|
||||
SELECT
|
||||
job_id,
|
||||
callback_url,
|
||||
callback_status,
|
||||
callback_attempts
|
||||
FROM jobs
|
||||
WHERE callback_url IS NOT NULL
|
||||
AND callback_status = 'failed'
|
||||
AND created_at >= $1
|
||||
ORDER BY completed_at DESC
|
||||
LIMIT $2
|
||||
""", cutoff, limit)
|
||||
|
||||
callback_failures = [
|
||||
{
|
||||
'job_id': str(row['job_id']),
|
||||
'callback_url': row['callback_url'],
|
||||
'status': row['callback_status'] or 'failed',
|
||||
'attempts': row['callback_attempts'] or 0,
|
||||
'last_error': None, # Would need to query webhook_attempts table
|
||||
}
|
||||
for row in callback_rows
|
||||
]
|
||||
|
||||
total_problems = len(failed_jobs) + len(slow_jobs) + len(callback_failures)
|
||||
|
||||
return {
|
||||
'failed_jobs': failed_jobs,
|
||||
'slow_jobs': slow_jobs,
|
||||
'callback_failures': callback_failures,
|
||||
'total_problems': total_problems,
|
||||
}
|
||||
|
||||
|
||||
async def get_stats_by_version(
|
||||
db: DatabaseManager,
|
||||
period: TimePeriod,
|
||||
limit: int = 20
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get performance statistics grouped by scraper version.
|
||||
"""
|
||||
delta = get_period_delta(period)
|
||||
cutoff = datetime.now() - delta
|
||||
|
||||
async with db.pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT
|
||||
COALESCE(scraper_version, 'unknown') as version,
|
||||
scraper_variant as variant,
|
||||
COUNT(*) as total_jobs,
|
||||
COUNT(*) FILTER (WHERE status = 'completed') as completed,
|
||||
COUNT(*) FILTER (WHERE status IN ('failed', 'partial')) as failed,
|
||||
AVG(scrape_time) FILTER (WHERE status = 'completed' AND scrape_time IS NOT NULL) as avg_duration,
|
||||
COALESCE(SUM(reviews_count) FILTER (WHERE status = 'completed'), 0) as total_reviews
|
||||
FROM jobs
|
||||
WHERE created_at >= $1
|
||||
GROUP BY scraper_version, scraper_variant
|
||||
ORDER BY total_jobs DESC
|
||||
LIMIT $2
|
||||
""", cutoff, limit)
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
completed = row['completed'] or 0
|
||||
failed = row['failed'] or 0
|
||||
finished = completed + failed
|
||||
success_rate = (completed / finished * 100) if finished > 0 else 0.0
|
||||
|
||||
results.append({
|
||||
'version': row['version'],
|
||||
'variant': row['variant'],
|
||||
'total_jobs': row['total_jobs'],
|
||||
'success_rate': round(success_rate, 1),
|
||||
'avg_duration': round(row['avg_duration'], 1) if row['avg_duration'] else None,
|
||||
'total_reviews': row['total_reviews'] or 0,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ==================== Dependency Injection ====================
|
||||
|
||||
_db: Optional[DatabaseManager] = None
|
||||
|
||||
|
||||
def set_database(db: DatabaseManager):
|
||||
"""Set the database instance for the router"""
|
||||
global _db
|
||||
_db = db
|
||||
|
||||
|
||||
def get_db() -> DatabaseManager:
|
||||
"""Dependency to get database instance"""
|
||||
if _db is None:
|
||||
raise HTTPException(status_code=500, detail="Database not initialized")
|
||||
return _db
|
||||
|
||||
|
||||
# ==================== API Endpoints ====================
|
||||
|
||||
@router.get(
|
||||
"/overview",
|
||||
response_model=OverviewResponse,
|
||||
summary="Get Dashboard Overview",
|
||||
description="Get system-wide job statistics and success rates"
|
||||
)
|
||||
async def get_overview(
|
||||
period: TimePeriod = Query(
|
||||
TimePeriod.HOUR_24,
|
||||
description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
|
||||
),
|
||||
db: DatabaseManager = Depends(get_db)
|
||||
) -> OverviewResponse:
|
||||
"""
|
||||
Get system-wide dashboard statistics.
|
||||
|
||||
Returns aggregate job counts, success rates, and average durations
|
||||
for the specified time period.
|
||||
|
||||
- **period**: Time window to analyze (default: 24h)
|
||||
- 1h: Last hour
|
||||
- 6h: Last 6 hours
|
||||
- 24h: Last 24 hours
|
||||
- 7d: Last 7 days
|
||||
- 30d: Last 30 days
|
||||
"""
|
||||
try:
|
||||
stats = await get_overview_stats(db, period)
|
||||
|
||||
return OverviewResponse(
|
||||
period=stats['period'],
|
||||
total_jobs=stats['total_jobs'],
|
||||
completed_jobs=stats['completed_jobs'],
|
||||
failed_jobs=stats['failed_jobs'],
|
||||
running_jobs=stats['running_jobs'],
|
||||
success_rate=stats['success_rate'],
|
||||
avg_duration_seconds=stats['avg_duration_seconds'],
|
||||
jobs_by_status=JobsByStatus(**stats['jobs_by_status']),
|
||||
total_reviews_scraped=stats['total_reviews_scraped'],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error getting dashboard overview: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get overview: {str(e)}")
|
||||
|
||||
|
||||
@router.get(
|
||||
"/by-client",
|
||||
response_model=List[ClientStats],
|
||||
summary="Get Stats by Client",
|
||||
description="Get job statistics grouped by client"
|
||||
)
|
||||
async def get_by_client(
|
||||
period: TimePeriod = Query(
|
||||
TimePeriod.HOUR_24,
|
||||
description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
|
||||
),
|
||||
limit: int = Query(50, description="Maximum number of clients to return", ge=1, le=200),
|
||||
db: DatabaseManager = Depends(get_db)
|
||||
) -> List[ClientStats]:
|
||||
"""
|
||||
Get job statistics grouped by client.
|
||||
|
||||
Returns aggregated statistics for each client including job counts,
|
||||
success rates, and total reviews scraped. Results are ordered by
|
||||
total job count descending.
|
||||
|
||||
- **period**: Time window to analyze (default: 24h)
|
||||
- **limit**: Maximum number of clients to return (default: 50)
|
||||
"""
|
||||
try:
|
||||
stats = await get_stats_by_client(db, period, limit)
|
||||
|
||||
return [
|
||||
ClientStats(
|
||||
client_id=s['client_id'],
|
||||
source=s['source'],
|
||||
total_jobs=s['total_jobs'],
|
||||
completed=s['completed'],
|
||||
failed=s['failed'],
|
||||
success_rate=s['success_rate'],
|
||||
total_reviews=s['total_reviews'],
|
||||
)
|
||||
for s in stats
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error getting client stats: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get client stats: {str(e)}")
|
||||
|
||||
|
||||
@router.get(
|
||||
"/problems",
|
||||
response_model=ProblemsResponse,
|
||||
summary="Get Recent Problems",
|
||||
description="Get recent failures, slow jobs, and callback issues"
|
||||
)
|
||||
async def get_problems_endpoint(
|
||||
period: TimePeriod = Query(
|
||||
TimePeriod.HOUR_24,
|
||||
description="Time period for problems (1h, 6h, 24h, 7d, 30d)"
|
||||
),
|
||||
limit: int = Query(20, description="Maximum number of items per category", ge=1, le=100),
|
||||
db: DatabaseManager = Depends(get_db)
|
||||
) -> ProblemsResponse:
|
||||
"""
|
||||
Get recent failures and issues.
|
||||
|
||||
Returns three categories of problems:
|
||||
- **failed_jobs**: Jobs that failed with errors
|
||||
- **slow_jobs**: Jobs that took more than 2x the average duration
|
||||
- **callback_failures**: Webhook deliveries that failed
|
||||
|
||||
Each category includes relevant details for debugging and resolution.
|
||||
|
||||
- **period**: Time window to analyze (default: 24h)
|
||||
- **limit**: Maximum items per category (default: 20)
|
||||
"""
|
||||
try:
|
||||
problems = await get_problems(db, period, limit)
|
||||
|
||||
return ProblemsResponse(
|
||||
failed_jobs=[FailedJob(**fj) for fj in problems['failed_jobs']],
|
||||
slow_jobs=[SlowJob(**sj) for sj in problems['slow_jobs']],
|
||||
callback_failures=[CallbackFailure(**cf) for cf in problems['callback_failures']],
|
||||
total_problems=problems['total_problems'],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error getting problems: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get problems: {str(e)}")
|
||||
|
||||
|
||||
@router.get(
|
||||
"/by-version",
|
||||
response_model=List[VersionStats],
|
||||
summary="Get Stats by Scraper Version",
|
||||
description="Get performance statistics grouped by scraper version"
|
||||
)
|
||||
async def get_by_version(
|
||||
period: TimePeriod = Query(
|
||||
TimePeriod.HOUR_24,
|
||||
description="Time period for statistics (1h, 6h, 24h, 7d, 30d)"
|
||||
),
|
||||
limit: int = Query(20, description="Maximum number of versions to return", ge=1, le=100),
|
||||
db: DatabaseManager = Depends(get_db)
|
||||
) -> List[VersionStats]:
|
||||
"""
|
||||
Get performance statistics grouped by scraper version.
|
||||
|
||||
Useful for comparing the performance of different scraper versions
|
||||
and variants (e.g., 'stable' vs 'stealth'). Results are ordered by
|
||||
total job count descending.
|
||||
|
||||
- **period**: Time window to analyze (default: 24h)
|
||||
- **limit**: Maximum number of versions to return (default: 20)
|
||||
"""
|
||||
try:
|
||||
stats = await get_stats_by_version(db, period, limit)
|
||||
|
||||
return [
|
||||
VersionStats(
|
||||
version=s['version'],
|
||||
variant=s['variant'],
|
||||
total_jobs=s['total_jobs'],
|
||||
success_rate=s['success_rate'],
|
||||
avg_duration=s['avg_duration'],
|
||||
total_reviews=s['total_reviews'],
|
||||
)
|
||||
for s in stats
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error getting version stats: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get version stats: {str(e)}")
|
||||
Reference in New Issue
Block a user