Wave 4: JobDevTools UI components and crash report API

- Task #5: Create JobDevTools container component
  (tabs: All/Scraper/Browser/Network/System, level filters, count badges)
- Task #11: Add crash report API endpoints
  (GET /jobs/{id}/crash-report, POST /jobs/{id}/retry?apply_fix=true, GET /crashes/stats)
- Task #14: Create SessionPanel component
  (fingerprint display, bot detection indicators, collapsible sections)
- Task #15: Create MetricsDashboard with recharts
  (extraction rate, cumulative reviews, memory usage, scroll progress)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-24 12:37:56 +00:00
parent 9515dd2d42
commit 2637d982e0
4 changed files with 1331 additions and 0 deletions

View File

@@ -11,6 +11,7 @@ import logging
import os
import time
from contextlib import asynccontextmanager
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
from uuid import UUID
@@ -23,6 +24,7 @@ from modules.database import DatabaseManager, JobStatus
from modules.webhooks import WebhookDispatcher, WebhookManager
from modules.health_checks import HealthCheckSystem
from modules.scraper_clean import fast_scrape_reviews, LogCapture, get_business_card_info # Clean scraper
from modules.crash_analyzer import analyze_crash, summarize_crash_patterns, apply_auto_fix
from modules.structured_logger import StructuredLogger, LogEntry
from modules.chrome_pool import (
start_worker_pools,
@@ -207,6 +209,51 @@ class StatsResponse(BaseModel):
total_reviews: Optional[int] = None
class CrashAnalysisModel(BaseModel):
"""Crash analysis details"""
pattern: str = Field(..., description="Identified crash pattern type")
confidence: float = Field(..., description="Confidence score 0.0 to 1.0")
description: str = Field(..., description="Description of the crash cause")
suggested_fix: str = Field(..., description="Recommended fix action")
auto_fix_params: Optional[Dict[str, Any]] = Field(None, description="Parameters for auto-fix")
class CrashReportResponse(BaseModel):
"""Response model for crash report"""
crash_id: str
job_id: str
crash_type: str
error_message: Optional[str] = None
analysis: Optional[CrashAnalysisModel] = None
metrics_history: Optional[List[Dict[str, Any]]] = None
logs_before_crash: Optional[List[Dict[str, Any]]] = None
screenshot_url: Optional[str] = None
created_at: str
class RetryJobResponse(BaseModel):
"""Response model for retry job"""
job_id: str
status: str
message: str
applied_fixes: Optional[Dict[str, Any]] = None
class CrashPatternStats(BaseModel):
"""Statistics for a single crash pattern"""
count: int
percentage: float
avg_confidence: float
class CrashStatsResponse(BaseModel):
"""Response model for aggregate crash statistics"""
total_crashes: int
patterns: Dict[str, CrashPatternStats]
most_common: Optional[str] = None
recommendations: List[Dict[str, Any]]
# ==================== API Endpoints ====================
@app.get("/", summary="API Health Check")
@@ -946,6 +993,329 @@ async def pool_stats():
return await asyncio.to_thread(get_pool_stats)
# ==================== Crash Report Endpoints ====================
@app.get("/jobs/{job_id}/crash-report", response_model=CrashReportResponse, summary="Get Crash Report")
async def get_crash_report(job_id: UUID):
"""
Get the crash report for a failed or partial job.
Returns detailed crash analysis including:
- Crash pattern identification (memory_exhaustion, rate_limited, etc.)
- Confidence score for the pattern match
- Suggested fixes and auto-fix parameters
- Metrics history and logs before the crash
"""
if not db:
raise HTTPException(status_code=500, detail="Database not initialized")
# Verify job exists
job = await db.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
# Only failed or partial jobs have crash reports
if job['status'] not in ['failed', 'partial']:
raise HTTPException(
status_code=400,
detail=f"Job status is '{job['status']}' - crash reports only available for failed or partial jobs"
)
# Get crash report from database
crash_report = await db.get_crash_report(str(job_id))
if not crash_report:
# No stored crash report - generate one from job data
# Build crash report from job data for analysis
scrape_logs = job.get('scrape_logs')
if isinstance(scrape_logs, str):
try:
scrape_logs = json.loads(scrape_logs)
except:
scrape_logs = []
# Get metrics_history if available
metrics_history = job.get('metrics_history')
if isinstance(metrics_history, str):
try:
metrics_history = json.loads(metrics_history)
except:
metrics_history = []
crash_data = {
'error_message': job.get('error_message', 'Unknown error'),
'metrics_history': metrics_history or [],
'logs_before_crash': scrape_logs or [],
'state': {
'reviews_extracted': job.get('reviews_count', 0),
'total_reviews': job.get('total_reviews')
}
}
# Analyze the crash
analysis = analyze_crash(crash_data)
# Build response from job data and analysis
return CrashReportResponse(
crash_id=str(job_id), # Use job_id as crash_id when no stored report
job_id=str(job_id),
crash_type=analysis.pattern,
error_message=job.get('error_message'),
analysis=CrashAnalysisModel(
pattern=analysis.pattern,
confidence=analysis.confidence,
description=analysis.description,
suggested_fix=analysis.suggested_fix,
auto_fix_params=analysis.auto_fix_params
),
metrics_history=metrics_history,
logs_before_crash=scrape_logs,
screenshot_url=None,
created_at=job['completed_at'].isoformat() if job.get('completed_at') else job['created_at'].isoformat()
)
# Parse JSONB fields if needed
metrics_history = crash_report.get('metrics_history')
if isinstance(metrics_history, str):
try:
metrics_history = json.loads(metrics_history)
except:
metrics_history = []
logs_before_crash = crash_report.get('logs_before_crash')
if isinstance(logs_before_crash, str):
try:
logs_before_crash = json.loads(logs_before_crash)
except:
logs_before_crash = []
stored_analysis = crash_report.get('analysis')
if isinstance(stored_analysis, str):
try:
stored_analysis = json.loads(stored_analysis)
except:
stored_analysis = None
# If no analysis stored, generate one
if not stored_analysis:
crash_data = {
'error_message': crash_report.get('error_message', ''),
'metrics_history': metrics_history or [],
'logs_before_crash': logs_before_crash or [],
'crash_type': crash_report.get('crash_type'),
'state': crash_report.get('state', {})
}
analysis = analyze_crash(crash_data)
stored_analysis = {
'pattern': analysis.pattern,
'confidence': analysis.confidence,
'description': analysis.description,
'suggested_fix': analysis.suggested_fix,
'auto_fix_params': analysis.auto_fix_params
}
return CrashReportResponse(
crash_id=crash_report['crash_id'],
job_id=crash_report['job_id'],
crash_type=crash_report['crash_type'],
error_message=crash_report.get('error_message'),
analysis=CrashAnalysisModel(**stored_analysis) if stored_analysis else None,
metrics_history=metrics_history,
logs_before_crash=logs_before_crash,
screenshot_url=crash_report.get('screenshot_url'),
created_at=crash_report['created_at'].isoformat()
)
@app.post("/jobs/{job_id}/retry", response_model=RetryJobResponse, summary="Retry Failed Job")
async def retry_job(
job_id: UUID,
apply_fix: bool = Query(False, description="Apply auto-fix parameters based on crash analysis")
):
"""
Retry a failed or partial job, optionally applying auto-fix parameters.
When apply_fix=true:
- Analyzes the crash pattern from the original job
- Applies recommended parameter adjustments (e.g., reduced batch size for memory issues)
- Creates a new job with the adjusted parameters
Returns the new job ID for tracking.
"""
if not db:
raise HTTPException(status_code=500, detail="Database not initialized")
# Get original job
original_job = await db.get_job(job_id)
if not original_job:
raise HTTPException(status_code=404, detail="Job not found")
# Can only retry failed or partial jobs
if original_job['status'] not in ['failed', 'partial']:
raise HTTPException(
status_code=400,
detail=f"Cannot retry job with status '{original_job['status']}' - only failed or partial jobs can be retried"
)
# Parse original metadata
original_metadata = original_job.get('metadata')
if isinstance(original_metadata, str):
try:
original_metadata = json.loads(original_metadata)
except:
original_metadata = {}
original_metadata = original_metadata or {}
applied_fixes = None
if apply_fix:
# Get crash analysis to determine fixes
scrape_logs = original_job.get('scrape_logs')
if isinstance(scrape_logs, str):
try:
scrape_logs = json.loads(scrape_logs)
except:
scrape_logs = []
metrics_history = original_job.get('metrics_history')
if isinstance(metrics_history, str):
try:
metrics_history = json.loads(metrics_history)
except:
metrics_history = []
crash_data = {
'error_message': original_job.get('error_message', 'Unknown error'),
'metrics_history': metrics_history or [],
'logs_before_crash': scrape_logs or [],
'state': {
'reviews_extracted': original_job.get('reviews_count', 0),
'total_reviews': original_job.get('total_reviews')
}
}
analysis = analyze_crash(crash_data)
if analysis.auto_fix_params:
# Get current scraper params from metadata or use defaults
current_params = original_metadata.get('scraper_params', {})
# Apply the auto-fix parameters
fixed_params = apply_auto_fix(analysis.pattern, current_params)
# Store applied fixes in metadata
original_metadata['scraper_params'] = fixed_params
original_metadata['retry_info'] = {
'original_job_id': str(job_id),
'crash_pattern': analysis.pattern,
'applied_fixes': analysis.auto_fix_params
}
applied_fixes = analysis.auto_fix_params
log.info(f"Applying auto-fix for pattern '{analysis.pattern}': {applied_fixes}")
# Create new job with same URL and (possibly modified) metadata
new_job_id = await db.create_job(
url=original_job['url'],
webhook_url=original_job.get('webhook_url'),
webhook_secret=original_job.get('webhook_secret'),
metadata=original_metadata
)
# Start the new scraping job
asyncio.create_task(run_scraping_job(new_job_id))
log.info(f"Created retry job {new_job_id} for original job {job_id}")
return RetryJobResponse(
job_id=str(new_job_id),
status="started",
message=f"Retry job created from original job {job_id}",
applied_fixes=applied_fixes
)
@app.get("/crashes/stats", response_model=CrashStatsResponse, summary="Get Crash Statistics")
async def get_crash_stats(
days: int = Query(7, description="Number of days to look back", ge=1, le=90)
):
"""
Get aggregate crash statistics and pattern analysis.
Analyzes all crash reports from the specified time period to identify:
- Most common crash patterns
- Confidence scores for pattern detection
- Recommended fixes based on recurring patterns
Use this to identify systemic issues and optimize scraper configuration.
"""
if not db:
raise HTTPException(status_code=500, detail="Database not initialized")
# Get basic crash stats from database
basic_stats = await db.get_crash_stats(days=days)
# Get all failed/partial jobs for deeper analysis
failed_jobs = await db.list_jobs(status=JobStatus.FAILED, limit=500)
partial_jobs = await db.list_jobs(status=JobStatus.PARTIAL, limit=500)
all_crash_jobs = failed_jobs + partial_jobs
# Filter by time if needed (list_jobs doesn't have date filter)
cutoff = datetime.now() - timedelta(days=days)
recent_crash_jobs = [
job for job in all_crash_jobs
if job.get('completed_at') and job['completed_at'] > cutoff
]
if not recent_crash_jobs:
return CrashStatsResponse(
total_crashes=0,
patterns={},
most_common=None,
recommendations=[]
)
# Build crash reports for analysis
crash_reports = []
for job in recent_crash_jobs:
scrape_logs = job.get('scrape_logs')
if isinstance(scrape_logs, str):
try:
scrape_logs = json.loads(scrape_logs)
except:
scrape_logs = []
crash_reports.append({
'error_message': job.get('error_message', ''),
'metrics_history': [], # Not stored in job list query
'logs_before_crash': scrape_logs or [],
'state': {
'reviews_extracted': job.get('reviews_count', 0),
'total_reviews': job.get('total_reviews')
}
})
# Use summarize_crash_patterns for deep analysis
summary = summarize_crash_patterns(crash_reports)
# Convert patterns to response model format
patterns_response = {}
for pattern_name, stats in summary.get('patterns', {}).items():
patterns_response[pattern_name] = CrashPatternStats(
count=stats['count'],
percentage=stats['percentage'],
avg_confidence=stats['avg_confidence']
)
return CrashStatsResponse(
total_crashes=summary.get('total_crashes', 0),
patterns=patterns_response,
most_common=summary.get('most_common'),
recommendations=summary.get('recommendations', [])
)
# ==================== Health Check Endpoints ====================
@app.get("/health/live", summary="Liveness Probe")