Wave 4: JobDevTools UI components and crash report API

- Task #5: Create JobDevTools container component
  (tabs: All/Scraper/Browser/Network/System, level filters, count badges)
- Task #11: Add crash report API endpoints
  (GET /jobs/{id}/crash-report, POST /jobs/{id}/retry?apply_fix=true, GET /crashes/stats)
- Task #14: Create SessionPanel component
  (fingerprint display, bot detection indicators, collapsible sections)
- Task #15: Create MetricsDashboard with recharts
  (extraction rate, cumulative reviews, memory usage, scroll progress)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-24 12:37:56 +00:00
parent 9515dd2d42
commit 2637d982e0
4 changed files with 1331 additions and 0 deletions

View File

@@ -11,6 +11,7 @@ import logging
import os import os
import time import time
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any from typing import Optional, List, Dict, Any
from uuid import UUID from uuid import UUID
@@ -23,6 +24,7 @@ from modules.database import DatabaseManager, JobStatus
from modules.webhooks import WebhookDispatcher, WebhookManager from modules.webhooks import WebhookDispatcher, WebhookManager
from modules.health_checks import HealthCheckSystem from modules.health_checks import HealthCheckSystem
from modules.scraper_clean import fast_scrape_reviews, LogCapture, get_business_card_info # Clean scraper from modules.scraper_clean import fast_scrape_reviews, LogCapture, get_business_card_info # Clean scraper
from modules.crash_analyzer import analyze_crash, summarize_crash_patterns, apply_auto_fix
from modules.structured_logger import StructuredLogger, LogEntry from modules.structured_logger import StructuredLogger, LogEntry
from modules.chrome_pool import ( from modules.chrome_pool import (
start_worker_pools, start_worker_pools,
@@ -207,6 +209,51 @@ class StatsResponse(BaseModel):
total_reviews: Optional[int] = None total_reviews: Optional[int] = None
class CrashAnalysisModel(BaseModel):
"""Crash analysis details"""
pattern: str = Field(..., description="Identified crash pattern type")
confidence: float = Field(..., description="Confidence score 0.0 to 1.0")
description: str = Field(..., description="Description of the crash cause")
suggested_fix: str = Field(..., description="Recommended fix action")
auto_fix_params: Optional[Dict[str, Any]] = Field(None, description="Parameters for auto-fix")
class CrashReportResponse(BaseModel):
"""Response model for crash report"""
crash_id: str
job_id: str
crash_type: str
error_message: Optional[str] = None
analysis: Optional[CrashAnalysisModel] = None
metrics_history: Optional[List[Dict[str, Any]]] = None
logs_before_crash: Optional[List[Dict[str, Any]]] = None
screenshot_url: Optional[str] = None
created_at: str
class RetryJobResponse(BaseModel):
"""Response model for retry job"""
job_id: str
status: str
message: str
applied_fixes: Optional[Dict[str, Any]] = None
class CrashPatternStats(BaseModel):
"""Statistics for a single crash pattern"""
count: int
percentage: float
avg_confidence: float
class CrashStatsResponse(BaseModel):
"""Response model for aggregate crash statistics"""
total_crashes: int
patterns: Dict[str, CrashPatternStats]
most_common: Optional[str] = None
recommendations: List[Dict[str, Any]]
# ==================== API Endpoints ==================== # ==================== API Endpoints ====================
@app.get("/", summary="API Health Check") @app.get("/", summary="API Health Check")
@@ -946,6 +993,329 @@ async def pool_stats():
return await asyncio.to_thread(get_pool_stats) return await asyncio.to_thread(get_pool_stats)
# ==================== Crash Report Endpoints ====================
@app.get("/jobs/{job_id}/crash-report", response_model=CrashReportResponse, summary="Get Crash Report")
async def get_crash_report(job_id: UUID):
"""
Get the crash report for a failed or partial job.
Returns detailed crash analysis including:
- Crash pattern identification (memory_exhaustion, rate_limited, etc.)
- Confidence score for the pattern match
- Suggested fixes and auto-fix parameters
- Metrics history and logs before the crash
"""
if not db:
raise HTTPException(status_code=500, detail="Database not initialized")
# Verify job exists
job = await db.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
# Only failed or partial jobs have crash reports
if job['status'] not in ['failed', 'partial']:
raise HTTPException(
status_code=400,
detail=f"Job status is '{job['status']}' - crash reports only available for failed or partial jobs"
)
# Get crash report from database
crash_report = await db.get_crash_report(str(job_id))
if not crash_report:
# No stored crash report - generate one from job data
# Build crash report from job data for analysis
scrape_logs = job.get('scrape_logs')
if isinstance(scrape_logs, str):
try:
scrape_logs = json.loads(scrape_logs)
except:
scrape_logs = []
# Get metrics_history if available
metrics_history = job.get('metrics_history')
if isinstance(metrics_history, str):
try:
metrics_history = json.loads(metrics_history)
except:
metrics_history = []
crash_data = {
'error_message': job.get('error_message', 'Unknown error'),
'metrics_history': metrics_history or [],
'logs_before_crash': scrape_logs or [],
'state': {
'reviews_extracted': job.get('reviews_count', 0),
'total_reviews': job.get('total_reviews')
}
}
# Analyze the crash
analysis = analyze_crash(crash_data)
# Build response from job data and analysis
return CrashReportResponse(
crash_id=str(job_id), # Use job_id as crash_id when no stored report
job_id=str(job_id),
crash_type=analysis.pattern,
error_message=job.get('error_message'),
analysis=CrashAnalysisModel(
pattern=analysis.pattern,
confidence=analysis.confidence,
description=analysis.description,
suggested_fix=analysis.suggested_fix,
auto_fix_params=analysis.auto_fix_params
),
metrics_history=metrics_history,
logs_before_crash=scrape_logs,
screenshot_url=None,
created_at=job['completed_at'].isoformat() if job.get('completed_at') else job['created_at'].isoformat()
)
# Parse JSONB fields if needed
metrics_history = crash_report.get('metrics_history')
if isinstance(metrics_history, str):
try:
metrics_history = json.loads(metrics_history)
except:
metrics_history = []
logs_before_crash = crash_report.get('logs_before_crash')
if isinstance(logs_before_crash, str):
try:
logs_before_crash = json.loads(logs_before_crash)
except:
logs_before_crash = []
stored_analysis = crash_report.get('analysis')
if isinstance(stored_analysis, str):
try:
stored_analysis = json.loads(stored_analysis)
except:
stored_analysis = None
# If no analysis stored, generate one
if not stored_analysis:
crash_data = {
'error_message': crash_report.get('error_message', ''),
'metrics_history': metrics_history or [],
'logs_before_crash': logs_before_crash or [],
'crash_type': crash_report.get('crash_type'),
'state': crash_report.get('state', {})
}
analysis = analyze_crash(crash_data)
stored_analysis = {
'pattern': analysis.pattern,
'confidence': analysis.confidence,
'description': analysis.description,
'suggested_fix': analysis.suggested_fix,
'auto_fix_params': analysis.auto_fix_params
}
return CrashReportResponse(
crash_id=crash_report['crash_id'],
job_id=crash_report['job_id'],
crash_type=crash_report['crash_type'],
error_message=crash_report.get('error_message'),
analysis=CrashAnalysisModel(**stored_analysis) if stored_analysis else None,
metrics_history=metrics_history,
logs_before_crash=logs_before_crash,
screenshot_url=crash_report.get('screenshot_url'),
created_at=crash_report['created_at'].isoformat()
)
@app.post("/jobs/{job_id}/retry", response_model=RetryJobResponse, summary="Retry Failed Job")
async def retry_job(
job_id: UUID,
apply_fix: bool = Query(False, description="Apply auto-fix parameters based on crash analysis")
):
"""
Retry a failed or partial job, optionally applying auto-fix parameters.
When apply_fix=true:
- Analyzes the crash pattern from the original job
- Applies recommended parameter adjustments (e.g., reduced batch size for memory issues)
- Creates a new job with the adjusted parameters
Returns the new job ID for tracking.
"""
if not db:
raise HTTPException(status_code=500, detail="Database not initialized")
# Get original job
original_job = await db.get_job(job_id)
if not original_job:
raise HTTPException(status_code=404, detail="Job not found")
# Can only retry failed or partial jobs
if original_job['status'] not in ['failed', 'partial']:
raise HTTPException(
status_code=400,
detail=f"Cannot retry job with status '{original_job['status']}' - only failed or partial jobs can be retried"
)
# Parse original metadata
original_metadata = original_job.get('metadata')
if isinstance(original_metadata, str):
try:
original_metadata = json.loads(original_metadata)
except:
original_metadata = {}
original_metadata = original_metadata or {}
applied_fixes = None
if apply_fix:
# Get crash analysis to determine fixes
scrape_logs = original_job.get('scrape_logs')
if isinstance(scrape_logs, str):
try:
scrape_logs = json.loads(scrape_logs)
except:
scrape_logs = []
metrics_history = original_job.get('metrics_history')
if isinstance(metrics_history, str):
try:
metrics_history = json.loads(metrics_history)
except:
metrics_history = []
crash_data = {
'error_message': original_job.get('error_message', 'Unknown error'),
'metrics_history': metrics_history or [],
'logs_before_crash': scrape_logs or [],
'state': {
'reviews_extracted': original_job.get('reviews_count', 0),
'total_reviews': original_job.get('total_reviews')
}
}
analysis = analyze_crash(crash_data)
if analysis.auto_fix_params:
# Get current scraper params from metadata or use defaults
current_params = original_metadata.get('scraper_params', {})
# Apply the auto-fix parameters
fixed_params = apply_auto_fix(analysis.pattern, current_params)
# Store applied fixes in metadata
original_metadata['scraper_params'] = fixed_params
original_metadata['retry_info'] = {
'original_job_id': str(job_id),
'crash_pattern': analysis.pattern,
'applied_fixes': analysis.auto_fix_params
}
applied_fixes = analysis.auto_fix_params
log.info(f"Applying auto-fix for pattern '{analysis.pattern}': {applied_fixes}")
# Create new job with same URL and (possibly modified) metadata
new_job_id = await db.create_job(
url=original_job['url'],
webhook_url=original_job.get('webhook_url'),
webhook_secret=original_job.get('webhook_secret'),
metadata=original_metadata
)
# Start the new scraping job
asyncio.create_task(run_scraping_job(new_job_id))
log.info(f"Created retry job {new_job_id} for original job {job_id}")
return RetryJobResponse(
job_id=str(new_job_id),
status="started",
message=f"Retry job created from original job {job_id}",
applied_fixes=applied_fixes
)
@app.get("/crashes/stats", response_model=CrashStatsResponse, summary="Get Crash Statistics")
async def get_crash_stats(
days: int = Query(7, description="Number of days to look back", ge=1, le=90)
):
"""
Get aggregate crash statistics and pattern analysis.
Analyzes all crash reports from the specified time period to identify:
- Most common crash patterns
- Confidence scores for pattern detection
- Recommended fixes based on recurring patterns
Use this to identify systemic issues and optimize scraper configuration.
"""
if not db:
raise HTTPException(status_code=500, detail="Database not initialized")
# Get basic crash stats from database
basic_stats = await db.get_crash_stats(days=days)
# Get all failed/partial jobs for deeper analysis
failed_jobs = await db.list_jobs(status=JobStatus.FAILED, limit=500)
partial_jobs = await db.list_jobs(status=JobStatus.PARTIAL, limit=500)
all_crash_jobs = failed_jobs + partial_jobs
# Filter by time if needed (list_jobs doesn't have date filter)
cutoff = datetime.now() - timedelta(days=days)
recent_crash_jobs = [
job for job in all_crash_jobs
if job.get('completed_at') and job['completed_at'] > cutoff
]
if not recent_crash_jobs:
return CrashStatsResponse(
total_crashes=0,
patterns={},
most_common=None,
recommendations=[]
)
# Build crash reports for analysis
crash_reports = []
for job in recent_crash_jobs:
scrape_logs = job.get('scrape_logs')
if isinstance(scrape_logs, str):
try:
scrape_logs = json.loads(scrape_logs)
except:
scrape_logs = []
crash_reports.append({
'error_message': job.get('error_message', ''),
'metrics_history': [], # Not stored in job list query
'logs_before_crash': scrape_logs or [],
'state': {
'reviews_extracted': job.get('reviews_count', 0),
'total_reviews': job.get('total_reviews')
}
})
# Use summarize_crash_patterns for deep analysis
summary = summarize_crash_patterns(crash_reports)
# Convert patterns to response model format
patterns_response = {}
for pattern_name, stats in summary.get('patterns', {}).items():
patterns_response[pattern_name] = CrashPatternStats(
count=stats['count'],
percentage=stats['percentage'],
avg_confidence=stats['avg_confidence']
)
return CrashStatsResponse(
total_crashes=summary.get('total_crashes', 0),
patterns=patterns_response,
most_common=summary.get('most_common'),
recommendations=summary.get('recommendations', [])
)
# ==================== Health Check Endpoints ==================== # ==================== Health Check Endpoints ====================
@app.get("/health/live", summary="Liveness Probe") @app.get("/health/live", summary="Liveness Probe")

View File

@@ -0,0 +1,386 @@
'use client';
import { useMemo } from 'react';
import {
LineChart,
Line,
AreaChart,
Area,
XAxis,
YAxis,
CartesianGrid,
Tooltip,
ResponsiveContainer,
ReferenceLine,
} from 'recharts';
import { Activity, TrendingUp, HardDrive, Scroll } from 'lucide-react';
/**
* Represents a single metrics sample collected during job execution
*/
export interface MetricsSample {
timestamp_ms: number;
reviews_extracted: number;
scroll_count: number;
memory_mb: number;
extraction_rate: number; // reviews per second
}
interface MetricsDashboardProps {
metricsHistory: MetricsSample[];
currentMetrics?: MetricsSample;
isStreaming: boolean;
}
/**
* Formats a timestamp (in ms) to a relative time string
* e.g., "0s", "30s", "1m", "1m 30s", etc.
*/
function formatRelativeTime(timestampMs: number, startMs: number): string {
const elapsedMs = timestampMs - startMs;
const totalSeconds = Math.floor(elapsedMs / 1000);
if (totalSeconds < 60) {
return `${totalSeconds}s`;
}
const minutes = Math.floor(totalSeconds / 60);
const seconds = totalSeconds % 60;
if (seconds === 0) {
return `${minutes}m`;
}
return `${minutes}m ${seconds}s`;
}
/**
* MetricsDashboard displays real-time metrics during job execution
* with charts for extraction rate, cumulative reviews, and memory usage.
*/
export default function MetricsDashboard({
metricsHistory,
currentMetrics,
isStreaming,
}: MetricsDashboardProps) {
// Determine the starting timestamp for relative time calculations
const startTimestamp = useMemo(() => {
if (metricsHistory.length > 0) {
return metricsHistory[0].timestamp_ms;
}
return currentMetrics?.timestamp_ms ?? Date.now();
}, [metricsHistory, currentMetrics]);
// Transform metrics history for charts with relative time labels
const chartData = useMemo(() => {
return metricsHistory.map((sample) => ({
...sample,
time: formatRelativeTime(sample.timestamp_ms, startTimestamp),
timeMs: sample.timestamp_ms - startTimestamp,
}));
}, [metricsHistory, startTimestamp]);
// Get the latest metrics (either current or last from history)
const latestMetrics = currentMetrics ?? metricsHistory[metricsHistory.length - 1];
// Memory warning threshold
const MEMORY_WARNING_MB = 1500;
// Check if memory is above warning threshold
const isMemoryWarning = latestMetrics && latestMetrics.memory_mb >= MEMORY_WARNING_MB;
// Custom tooltip style
const tooltipStyle = {
backgroundColor: '#1f2937',
border: '1px solid #374151',
borderRadius: '8px',
padding: '8px 12px',
};
return (
<div className="space-y-4">
{/* Header with Live Indicator */}
<div className="flex items-center justify-between">
<h3 className="text-lg font-semibold text-gray-100">Real-Time Metrics</h3>
{isStreaming && (
<div className="flex items-center gap-2 px-3 py-1 bg-green-900/50 border border-green-700 rounded-full">
<span className="relative flex h-2 w-2">
<span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-green-400 opacity-75"></span>
<span className="relative inline-flex rounded-full h-2 w-2 bg-green-500"></span>
</span>
<span className="text-sm font-medium text-green-400">Live</span>
</div>
)}
</div>
{/* Progress Summary - Current Stats */}
<div className="grid grid-cols-2 md:grid-cols-4 gap-3">
{/* Total Reviews */}
<div className="bg-gray-800 rounded-lg p-4 border border-gray-700">
<div className="flex items-center gap-2 text-gray-400 text-xs mb-1">
<TrendingUp className="w-4 h-4" />
<span>Total Reviews</span>
</div>
<div className="text-2xl font-bold text-blue-400">
{latestMetrics?.reviews_extracted ?? 0}
</div>
</div>
{/* Scroll Count */}
<div className="bg-gray-800 rounded-lg p-4 border border-gray-700">
<div className="flex items-center gap-2 text-gray-400 text-xs mb-1">
<Scroll className="w-4 h-4" />
<span>Scrolls</span>
</div>
<div className="text-2xl font-bold text-purple-400">
{latestMetrics?.scroll_count ?? 0}
</div>
</div>
{/* Extraction Rate */}
<div className="bg-gray-800 rounded-lg p-4 border border-gray-700">
<div className="flex items-center gap-2 text-gray-400 text-xs mb-1">
<Activity className="w-4 h-4" />
<span>Rate (r/s)</span>
</div>
<div className="text-2xl font-bold text-green-400">
{latestMetrics?.extraction_rate?.toFixed(2) ?? '0.00'}
</div>
</div>
{/* Memory Usage */}
<div className={`bg-gray-800 rounded-lg p-4 border ${
isMemoryWarning ? 'border-red-500' : 'border-gray-700'
}`}>
<div className="flex items-center gap-2 text-gray-400 text-xs mb-1">
<HardDrive className={`w-4 h-4 ${isMemoryWarning ? 'text-red-400' : ''}`} />
<span>Memory (MB)</span>
</div>
<div className={`text-2xl font-bold ${
isMemoryWarning ? 'text-red-400' : 'text-yellow-400'
}`}>
{latestMetrics?.memory_mb?.toFixed(0) ?? '0'}
</div>
{isMemoryWarning && (
<div className="text-xs text-red-400 mt-1">Warning: High memory</div>
)}
</div>
</div>
{/* Charts Grid - 2x2 on desktop, stacked on mobile */}
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
{/* Extraction Rate Chart */}
<div className="bg-gray-800 rounded-lg p-4 border border-gray-700">
<h4 className="text-sm font-medium text-gray-300 mb-3">
Extraction Rate (reviews/second)
</h4>
<div className="h-[200px]">
{chartData.length > 0 ? (
<ResponsiveContainer width="100%" height="100%">
<LineChart data={chartData} margin={{ top: 5, right: 20, left: 0, bottom: 5 }}>
<CartesianGrid strokeDasharray="3 3" stroke="#374151" />
<XAxis
dataKey="time"
tick={{ fill: '#9ca3af', fontSize: 11 }}
tickLine={{ stroke: '#4b5563' }}
axisLine={{ stroke: '#4b5563' }}
/>
<YAxis
tick={{ fill: '#9ca3af', fontSize: 11 }}
tickLine={{ stroke: '#4b5563' }}
axisLine={{ stroke: '#4b5563' }}
domain={[0, 'auto']}
/>
<Tooltip
contentStyle={tooltipStyle}
labelStyle={{ color: '#9ca3af', marginBottom: '4px' }}
itemStyle={{ color: '#22c55e' }}
formatter={(value) => [`${(value as number)?.toFixed(2) ?? '0.00'} r/s`, 'Rate']}
labelFormatter={(label) => `Time: ${label}`}
/>
<Line
type="monotone"
dataKey="extraction_rate"
stroke="#22c55e"
strokeWidth={2}
dot={false}
activeDot={{ r: 4, fill: '#22c55e' }}
/>
</LineChart>
</ResponsiveContainer>
) : (
<div className="h-full flex items-center justify-center text-gray-500">
No data yet
</div>
)}
</div>
</div>
{/* Cumulative Reviews Chart */}
<div className="bg-gray-800 rounded-lg p-4 border border-gray-700">
<h4 className="text-sm font-medium text-gray-300 mb-3">
Cumulative Reviews Extracted
</h4>
<div className="h-[200px]">
{chartData.length > 0 ? (
<ResponsiveContainer width="100%" height="100%">
<AreaChart data={chartData} margin={{ top: 5, right: 20, left: 0, bottom: 5 }}>
<CartesianGrid strokeDasharray="3 3" stroke="#374151" />
<XAxis
dataKey="time"
tick={{ fill: '#9ca3af', fontSize: 11 }}
tickLine={{ stroke: '#4b5563' }}
axisLine={{ stroke: '#4b5563' }}
/>
<YAxis
tick={{ fill: '#9ca3af', fontSize: 11 }}
tickLine={{ stroke: '#4b5563' }}
axisLine={{ stroke: '#4b5563' }}
domain={[0, 'auto']}
/>
<Tooltip
contentStyle={tooltipStyle}
labelStyle={{ color: '#9ca3af', marginBottom: '4px' }}
itemStyle={{ color: '#3b82f6' }}
formatter={(value) => [`${value ?? 0} reviews`, 'Total']}
labelFormatter={(label) => `Time: ${label}`}
/>
<defs>
<linearGradient id="reviewsGradient" x1="0" y1="0" x2="0" y2="1">
<stop offset="5%" stopColor="#3b82f6" stopOpacity={0.4} />
<stop offset="95%" stopColor="#3b82f6" stopOpacity={0} />
</linearGradient>
</defs>
<Area
type="monotone"
dataKey="reviews_extracted"
stroke="#3b82f6"
strokeWidth={2}
fill="url(#reviewsGradient)"
activeDot={{ r: 4, fill: '#3b82f6' }}
/>
</AreaChart>
</ResponsiveContainer>
) : (
<div className="h-full flex items-center justify-center text-gray-500">
No data yet
</div>
)}
</div>
</div>
{/* Memory Usage Chart */}
<div className="bg-gray-800 rounded-lg p-4 border border-gray-700">
<h4 className="text-sm font-medium text-gray-300 mb-3">
Memory Usage (MB)
<span className="ml-2 text-xs text-gray-500">Warning threshold: {MEMORY_WARNING_MB}MB</span>
</h4>
<div className="h-[200px]">
{chartData.length > 0 ? (
<ResponsiveContainer width="100%" height="100%">
<LineChart data={chartData} margin={{ top: 5, right: 20, left: 0, bottom: 5 }}>
<CartesianGrid strokeDasharray="3 3" stroke="#374151" />
<XAxis
dataKey="time"
tick={{ fill: '#9ca3af', fontSize: 11 }}
tickLine={{ stroke: '#4b5563' }}
axisLine={{ stroke: '#4b5563' }}
/>
<YAxis
tick={{ fill: '#9ca3af', fontSize: 11 }}
tickLine={{ stroke: '#4b5563' }}
axisLine={{ stroke: '#4b5563' }}
domain={[0, (dataMax: number) => Math.max(dataMax * 1.1, MEMORY_WARNING_MB * 1.2)]}
/>
<Tooltip
contentStyle={tooltipStyle}
labelStyle={{ color: '#9ca3af', marginBottom: '4px' }}
itemStyle={{ color: '#eab308' }}
formatter={(value) => [`${(value as number)?.toFixed(0) ?? '0'} MB`, 'Memory']}
labelFormatter={(label) => `Time: ${label}`}
/>
<ReferenceLine
y={MEMORY_WARNING_MB}
stroke="#ef4444"
strokeDasharray="5 5"
label={{
value: 'Warning',
position: 'right',
fill: '#ef4444',
fontSize: 10,
}}
/>
<Line
type="monotone"
dataKey="memory_mb"
stroke="#eab308"
strokeWidth={2}
dot={false}
activeDot={{ r: 4, fill: '#eab308' }}
/>
</LineChart>
</ResponsiveContainer>
) : (
<div className="h-full flex items-center justify-center text-gray-500">
No data yet
</div>
)}
</div>
</div>
{/* Scroll Count Chart */}
<div className="bg-gray-800 rounded-lg p-4 border border-gray-700">
<h4 className="text-sm font-medium text-gray-300 mb-3">
Scroll Progress
</h4>
<div className="h-[200px]">
{chartData.length > 0 ? (
<ResponsiveContainer width="100%" height="100%">
<AreaChart data={chartData} margin={{ top: 5, right: 20, left: 0, bottom: 5 }}>
<CartesianGrid strokeDasharray="3 3" stroke="#374151" />
<XAxis
dataKey="time"
tick={{ fill: '#9ca3af', fontSize: 11 }}
tickLine={{ stroke: '#4b5563' }}
axisLine={{ stroke: '#4b5563' }}
/>
<YAxis
tick={{ fill: '#9ca3af', fontSize: 11 }}
tickLine={{ stroke: '#4b5563' }}
axisLine={{ stroke: '#4b5563' }}
domain={[0, 'auto']}
/>
<Tooltip
contentStyle={tooltipStyle}
labelStyle={{ color: '#9ca3af', marginBottom: '4px' }}
itemStyle={{ color: '#a855f7' }}
formatter={(value) => [`${value ?? 0} scrolls`, 'Count']}
labelFormatter={(label) => `Time: ${label}`}
/>
<defs>
<linearGradient id="scrollsGradient" x1="0" y1="0" x2="0" y2="1">
<stop offset="5%" stopColor="#a855f7" stopOpacity={0.4} />
<stop offset="95%" stopColor="#a855f7" stopOpacity={0} />
</linearGradient>
</defs>
<Area
type="monotone"
dataKey="scroll_count"
stroke="#a855f7"
strokeWidth={2}
fill="url(#scrollsGradient)"
activeDot={{ r: 4, fill: '#a855f7' }}
/>
</AreaChart>
</ResponsiveContainer>
) : (
<div className="h-full flex items-center justify-center text-gray-500">
No data yet
</div>
)}
</div>
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,211 @@
'use client';
import { useState } from 'react';
import { ChevronDown, ChevronRight, User, Globe, Monitor, Cpu, Shield, Check, X, AlertTriangle } from 'lucide-react';
export interface SessionFingerprint {
user_agent: string;
platform: string;
language: string;
languages: string[];
timezone: string;
screen: { width: number; height: number; colorDepth: number };
viewport: { width: number; height: number };
webgl_vendor: string;
webgl_renderer: string;
canvas_fingerprint: string;
hardware_concurrency: number;
device_memory: number;
bot_detection_tests: {
webdriver_hidden: boolean;
chrome_runtime: boolean;
permissions_query: boolean;
};
captured_at: string;
}
interface SessionPanelProps {
fingerprint: SessionFingerprint;
}
function BotTestIndicator({ passed, label }: { passed: boolean | null | undefined; label: string }) {
if (passed === null || passed === undefined) {
return (
<div className="flex items-center gap-2 px-3 py-2 bg-yellow-900/30 border border-yellow-700/50 rounded-lg">
<AlertTriangle className="w-4 h-4 text-yellow-500" />
<span className="text-yellow-300 text-sm font-medium">{label}</span>
<span className="ml-auto text-yellow-500 text-xs font-mono">UNKNOWN</span>
</div>
);
}
if (passed) {
return (
<div className="flex items-center gap-2 px-3 py-2 bg-green-900/30 border border-green-700/50 rounded-lg">
<Check className="w-4 h-4 text-green-500" />
<span className="text-green-300 text-sm font-medium">{label}</span>
<span className="ml-auto text-green-500 text-xs font-mono">PASSED</span>
</div>
);
}
return (
<div className="flex items-center gap-2 px-3 py-2 bg-red-900/30 border border-red-700/50 rounded-lg">
<X className="w-4 h-4 text-red-500" />
<span className="text-red-300 text-sm font-medium">{label}</span>
<span className="ml-auto text-red-500 text-xs font-mono">FAILED</span>
</div>
);
}
function SectionHeader({ icon: Icon, title }: { icon: React.ElementType; title: string }) {
return (
<div className="flex items-center gap-2 mb-3 pb-2 border-b border-gray-700">
<Icon className="w-4 h-4 text-blue-400" />
<h4 className="text-sm font-semibold text-gray-300 uppercase tracking-wide">{title}</h4>
</div>
);
}
function DataRow({ label, value, mono = true }: { label: string; value: string | number; mono?: boolean }) {
return (
<div className="flex flex-col gap-0.5">
<span className="text-xs text-gray-500 uppercase tracking-wide">{label}</span>
<span className={`text-sm text-gray-200 ${mono ? 'font-mono' : ''} break-all`}>{value}</span>
</div>
);
}
export default function SessionPanel({ fingerprint }: SessionPanelProps) {
const [isExpanded, setIsExpanded] = useState(false);
// Calculate overall bot detection status
const tests = fingerprint.bot_detection_tests;
const testResults = [tests.webdriver_hidden, tests.chrome_runtime, tests.permissions_query];
const passedCount = testResults.filter(t => t === true).length;
const failedCount = testResults.filter(t => t === false).length;
const unknownCount = testResults.filter(t => t === null || t === undefined).length;
const overallStatus = failedCount > 0 ? 'warning' : unknownCount > 0 ? 'partial' : 'success';
const statusColors = {
success: 'bg-green-900/30 border-green-700/50 text-green-400',
partial: 'bg-yellow-900/30 border-yellow-700/50 text-yellow-400',
warning: 'bg-red-900/30 border-red-700/50 text-red-400',
};
return (
<div className="bg-gray-800 border border-gray-700 rounded-lg overflow-hidden">
{/* Collapsible Header */}
<button
onClick={() => setIsExpanded(!isExpanded)}
className="w-full flex items-center justify-between px-4 py-3 bg-gray-800 hover:bg-gray-750 transition-colors"
>
<div className="flex items-center gap-3">
{isExpanded ? (
<ChevronDown className="w-5 h-5 text-gray-400" />
) : (
<ChevronRight className="w-5 h-5 text-gray-400" />
)}
<Shield className="w-5 h-5 text-blue-400" />
<span className="text-sm font-semibold text-gray-200">What Google Saw</span>
</div>
<div className={`flex items-center gap-2 px-2.5 py-1 rounded-full text-xs font-semibold border ${statusColors[overallStatus]}`}>
{overallStatus === 'success' && <Check className="w-3 h-3" />}
{overallStatus === 'partial' && <AlertTriangle className="w-3 h-3" />}
{overallStatus === 'warning' && <X className="w-3 h-3" />}
<span>
{passedCount}/{testResults.length} Tests Passed
</span>
</div>
</button>
{/* Collapsible Content */}
{isExpanded && (
<div className="px-4 pb-4 border-t border-gray-700">
<div className="grid grid-cols-1 md:grid-cols-2 gap-6 pt-4">
{/* Identity Section */}
<div className="space-y-3">
<SectionHeader icon={User} title="Identity" />
<div className="space-y-3 bg-gray-900/50 rounded-lg p-3">
<DataRow label="User Agent" value={fingerprint.user_agent} />
<DataRow label="Platform" value={fingerprint.platform} />
<DataRow label="Primary Language" value={fingerprint.language} />
<DataRow label="Languages" value={fingerprint.languages.join(', ')} />
</div>
</div>
{/* Geolocation Section */}
<div className="space-y-3">
<SectionHeader icon={Globe} title="Geolocation" />
<div className="space-y-3 bg-gray-900/50 rounded-lg p-3">
<DataRow label="Timezone" value={fingerprint.timezone} />
<DataRow label="Captured At" value={fingerprint.captured_at} />
</div>
</div>
{/* Display Section */}
<div className="space-y-3">
<SectionHeader icon={Monitor} title="Display" />
<div className="space-y-3 bg-gray-900/50 rounded-lg p-3">
<DataRow
label="Screen Resolution"
value={`${fingerprint.screen.width} x ${fingerprint.screen.height}`}
/>
<DataRow
label="Viewport Size"
value={`${fingerprint.viewport.width} x ${fingerprint.viewport.height}`}
/>
<DataRow
label="Color Depth"
value={`${fingerprint.screen.colorDepth}-bit`}
/>
</div>
</div>
{/* Hardware Section */}
<div className="space-y-3">
<SectionHeader icon={Cpu} title="Hardware" />
<div className="space-y-3 bg-gray-900/50 rounded-lg p-3">
<DataRow label="WebGL Vendor" value={fingerprint.webgl_vendor} />
<DataRow label="WebGL Renderer" value={fingerprint.webgl_renderer} />
<DataRow label="CPU Cores" value={fingerprint.hardware_concurrency} />
<DataRow label="Device Memory" value={`${fingerprint.device_memory} GB`} />
<DataRow label="Canvas Fingerprint" value={fingerprint.canvas_fingerprint} />
</div>
</div>
</div>
{/* Bot Detection Section - Full Width */}
<div className="mt-6 space-y-3">
<SectionHeader icon={Shield} title="Bot Detection Tests" />
<div className="grid grid-cols-1 md:grid-cols-3 gap-3">
<BotTestIndicator
passed={tests.webdriver_hidden}
label="WebDriver Hidden"
/>
<BotTestIndicator
passed={tests.chrome_runtime}
label="Chrome Runtime"
/>
<BotTestIndicator
passed={tests.permissions_query}
label="Permissions Query"
/>
</div>
<div className="mt-3 text-xs text-gray-500 bg-gray-900/30 rounded-lg p-3">
<p>
<span className="text-green-400 font-semibold">Green checkmark</span> = Test passed (bot detection evaded)
</p>
<p>
<span className="text-red-400 font-semibold">Red X</span> = Test failed (may have been detected as a bot)
</p>
<p>
<span className="text-yellow-400 font-semibold">Yellow warning</span> = Test result unknown
</p>
</div>
</div>
</div>
)}
</div>
);
}

View File

@@ -0,0 +1,364 @@
'use client';
import { useState, useMemo } from 'react';
import { Bug, Globe, Network, Cpu, Filter, ChevronDown, ChevronUp } from 'lucide-react';
// Type definitions
export interface StructuredLog {
timestamp: string;
timestamp_ms: number;
level: 'DEBUG' | 'INFO' | 'WARN' | 'ERROR' | 'FATAL';
category: 'scraper' | 'browser' | 'network' | 'system';
message: string;
metrics?: Record<string, any>;
network?: Record<string, any>;
}
export interface MetricsData {
cpu_percent?: number;
memory_mb?: number;
duration_ms?: number;
requests_made?: number;
reviews_scraped?: number;
[key: string]: any;
}
export interface CrashReport {
error_type: string;
error_message: string;
stack_trace?: string;
timestamp: string;
context?: Record<string, any>;
}
export interface SessionFingerprint {
session_id: string;
browser_version?: string;
proxy_used?: boolean;
locale?: string;
viewport?: { width: number; height: number };
[key: string]: any;
}
export interface JobDevToolsProps {
logs: StructuredLog[];
metrics?: MetricsData;
crashReport?: CrashReport;
sessionFingerprint?: SessionFingerprint;
isStreaming?: boolean;
}
type TabType = 'all' | 'scraper' | 'browser' | 'network' | 'system';
type LogLevel = 'DEBUG' | 'INFO' | 'WARN' | 'ERROR' | 'FATAL';
const TAB_CONFIG: { id: TabType; label: string; icon: typeof Bug; category?: StructuredLog['category'] }[] = [
{ id: 'all', label: 'All', icon: Filter },
{ id: 'scraper', label: 'Scraper', icon: Bug, category: 'scraper' },
{ id: 'browser', label: 'Browser', icon: Globe, category: 'browser' },
{ id: 'network', label: 'Network', icon: Network, category: 'network' },
{ id: 'system', label: 'System', icon: Cpu, category: 'system' },
];
const LEVEL_COLORS: Record<LogLevel, { bg: string; text: string; border: string }> = {
DEBUG: { bg: 'bg-gray-700', text: 'text-gray-300', border: 'border-gray-600' },
INFO: { bg: 'bg-blue-900', text: 'text-blue-300', border: 'border-blue-700' },
WARN: { bg: 'bg-yellow-900', text: 'text-yellow-300', border: 'border-yellow-700' },
ERROR: { bg: 'bg-red-900', text: 'text-red-300', border: 'border-red-700' },
FATAL: { bg: 'bg-purple-900', text: 'text-purple-300', border: 'border-purple-700' },
};
const LEVEL_BADGE_COLORS: Record<LogLevel, string> = {
DEBUG: 'bg-gray-600 text-gray-200',
INFO: 'bg-blue-600 text-blue-100',
WARN: 'bg-yellow-600 text-yellow-100',
ERROR: 'bg-red-600 text-red-100',
FATAL: 'bg-purple-600 text-purple-100',
};
export default function JobDevTools({
logs,
metrics,
crashReport,
sessionFingerprint,
isStreaming = false,
}: JobDevToolsProps) {
const [activeTab, setActiveTab] = useState<TabType>('all');
const [enabledLevels, setEnabledLevels] = useState<Set<LogLevel>>(
new Set(['DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'])
);
const [showLevelFilter, setShowLevelFilter] = useState(false);
// Calculate counts per category
const categoryCounts = useMemo(() => {
const counts: Record<TabType, number> = {
all: logs.length,
scraper: 0,
browser: 0,
network: 0,
system: 0,
};
logs.forEach((log) => {
if (log.category in counts) {
counts[log.category as keyof typeof counts]++;
}
});
return counts;
}, [logs]);
// Calculate counts per level
const levelCounts = useMemo(() => {
const counts: Record<LogLevel, number> = {
DEBUG: 0,
INFO: 0,
WARN: 0,
ERROR: 0,
FATAL: 0,
};
logs.forEach((log) => {
if (log.level in counts) {
counts[log.level]++;
}
});
return counts;
}, [logs]);
// Filter logs by active tab and enabled levels
const filteredLogs = useMemo(() => {
return logs.filter((log) => {
const matchesTab = activeTab === 'all' || log.category === activeTab;
const matchesLevel = enabledLevels.has(log.level);
return matchesTab && matchesLevel;
});
}, [logs, activeTab, enabledLevels]);
const toggleLevel = (level: LogLevel) => {
setEnabledLevels((prev) => {
const next = new Set(prev);
if (next.has(level)) {
// Don't allow deselecting all levels
if (next.size > 1) {
next.delete(level);
}
} else {
next.add(level);
}
return next;
});
};
const formatTimestamp = (timestamp: string) => {
try {
const date = new Date(timestamp);
return date.toLocaleTimeString('en-US', {
hour12: false,
hour: '2-digit',
minute: '2-digit',
second: '2-digit',
fractionalSecondDigits: 3,
});
} catch {
return timestamp;
}
};
return (
<div className="w-full min-h-[400px] bg-gray-900 rounded-xl border-2 border-gray-700 flex flex-col">
{/* Header with streaming indicator */}
<div className="flex items-center justify-between px-4 py-2 border-b border-gray-700 bg-gray-800 rounded-t-xl">
<div className="flex items-center gap-2">
<Bug className="w-5 h-5 text-green-400" />
<span className="font-semibold text-gray-200">Job DevTools</span>
{isStreaming && (
<span className="flex items-center gap-1.5 px-2 py-0.5 bg-green-900 text-green-300 text-xs font-medium rounded-full border border-green-700">
<span className="w-2 h-2 bg-green-400 rounded-full animate-pulse" />
Streaming
</span>
)}
</div>
<span className="text-sm text-gray-400">
{filteredLogs.length} / {logs.length} logs
</span>
</div>
{/* Tab bar */}
<div className="flex items-center border-b border-gray-700 bg-gray-850 px-2">
{TAB_CONFIG.map((tab) => {
const Icon = tab.icon;
const count = categoryCounts[tab.id];
const isActive = activeTab === tab.id;
return (
<button
key={tab.id}
onClick={() => setActiveTab(tab.id)}
className={`flex items-center gap-2 px-4 py-3 text-sm font-medium transition-all border-b-2 ${
isActive
? 'text-blue-400 border-blue-400 bg-gray-800'
: 'text-gray-400 border-transparent hover:text-gray-200 hover:bg-gray-800'
}`}
>
<Icon className="w-4 h-4" />
<span>{tab.label}</span>
<span
className={`px-2 py-0.5 text-xs rounded-full ${
isActive
? 'bg-blue-900 text-blue-300'
: 'bg-gray-700 text-gray-400'
}`}
>
{count}
</span>
</button>
);
})}
{/* Level filter toggle */}
<div className="ml-auto relative">
<button
onClick={() => setShowLevelFilter(!showLevelFilter)}
className="flex items-center gap-2 px-3 py-2 text-sm font-medium text-gray-400 hover:text-gray-200 transition-colors"
>
<Filter className="w-4 h-4" />
<span>Levels</span>
{showLevelFilter ? (
<ChevronUp className="w-4 h-4" />
) : (
<ChevronDown className="w-4 h-4" />
)}
</button>
{/* Level filter dropdown */}
{showLevelFilter && (
<div className="absolute right-0 top-full mt-1 z-10 bg-gray-800 border border-gray-600 rounded-lg shadow-lg p-2 min-w-[160px]">
{(Object.keys(LEVEL_BADGE_COLORS) as LogLevel[]).map((level) => (
<label
key={level}
className="flex items-center gap-2 px-2 py-1.5 hover:bg-gray-700 rounded cursor-pointer"
>
<input
type="checkbox"
checked={enabledLevels.has(level)}
onChange={() => toggleLevel(level)}
className="w-4 h-4 rounded border-gray-600 bg-gray-700 text-blue-500 focus:ring-blue-500 focus:ring-offset-gray-800"
/>
<span
className={`px-2 py-0.5 text-xs font-semibold rounded ${LEVEL_BADGE_COLORS[level]}`}
>
{level}
</span>
<span className="text-gray-400 text-xs ml-auto">
{levelCounts[level]}
</span>
</label>
))}
</div>
)}
</div>
</div>
{/* Log entries - scrollable area */}
<div className="flex-1 overflow-y-auto min-h-[250px] max-h-[500px] font-mono text-sm">
{filteredLogs.length === 0 ? (
<div className="flex items-center justify-center h-full text-gray-500">
<div className="text-center">
<Bug className="w-8 h-8 mx-auto mb-2 opacity-50" />
<p>No logs to display</p>
<p className="text-xs mt-1">
{logs.length > 0
? 'Try adjusting your filters'
: 'Logs will appear here during job execution'}
</p>
</div>
</div>
) : (
<div className="divide-y divide-gray-800">
{filteredLogs.map((log, index) => {
const levelStyle = LEVEL_COLORS[log.level];
return (
<div
key={`${log.timestamp_ms}-${index}`}
className={`px-4 py-2 hover:bg-gray-800 transition-colors ${levelStyle.bg} bg-opacity-20`}
>
<div className="flex items-start gap-3">
{/* Timestamp */}
<span className="text-gray-500 text-xs whitespace-nowrap pt-0.5">
{formatTimestamp(log.timestamp)}
</span>
{/* Level badge */}
<span
className={`px-1.5 py-0.5 text-xs font-semibold rounded ${LEVEL_BADGE_COLORS[log.level]} whitespace-nowrap`}
>
{log.level}
</span>
{/* Category badge */}
<span className="px-1.5 py-0.5 text-xs font-medium rounded bg-gray-700 text-gray-300 whitespace-nowrap">
{log.category}
</span>
{/* Message */}
<span className={`flex-1 ${levelStyle.text} break-words`}>
{log.message}
</span>
</div>
{/* Additional data (metrics/network) */}
{(log.metrics || log.network) && (
<div className="mt-1 ml-[72px] text-xs text-gray-500">
{log.metrics && (
<span className="mr-4">
metrics: {JSON.stringify(log.metrics)}
</span>
)}
{log.network && (
<span>network: {JSON.stringify(log.network)}</span>
)}
</div>
)}
</div>
);
})}
</div>
)}
</div>
{/* Reserved space for metrics/session panels (footer) */}
<div className="border-t border-gray-700 bg-gray-800 px-4 py-3 rounded-b-xl">
<div className="flex items-center justify-between text-xs text-gray-400">
<div className="flex items-center gap-4">
{metrics && (
<>
{metrics.duration_ms !== undefined && (
<span>Duration: {(metrics.duration_ms / 1000).toFixed(2)}s</span>
)}
{metrics.reviews_scraped !== undefined && (
<span>Reviews: {metrics.reviews_scraped}</span>
)}
{metrics.memory_mb !== undefined && (
<span>Memory: {metrics.memory_mb.toFixed(1)}MB</span>
)}
</>
)}
</div>
<div className="flex items-center gap-4">
{sessionFingerprint && (
<span className="text-gray-500">
Session: {sessionFingerprint.session_id?.slice(0, 8)}...
</span>
)}
{crashReport && (
<span className="text-red-400 font-medium">
Crash: {crashReport.error_type}
</span>
)}
</div>
</div>
</div>
</div>
);
}