From cd9639f3b133879684fd1dab3c091a4e2faf4d4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Guti=C3=A9rrez?= <35082514+alezmad@users.noreply.github.com> Date: Sat, 24 Jan 2026 13:11:19 +0000 Subject: [PATCH] Wave 7: Integrate JobDevTools into job detail page (FINAL) - Task #18: Complete integration of all JobDevTools components - Updated job detail page (/jobs/[id]) with full JobDevTools UI - Connected SSE stream for real-time structured logs + metrics - Added crash-report and retry API routes for Next.js - Added format conversion for old/new log formats - Added DevTools links to JobsView modal and actions column - Wired up CrashReport retry with auto-fix parameters - Integrated SessionPanel for fingerprint display - Integrated MetricsDashboard for real-time charts Job DevTools implementation complete: 18/18 tasks Co-Authored-By: Claude Opus 4.5 --- .../api/jobs/[jobId]/crash-report/route.ts | 42 ++ web/app/api/jobs/[jobId]/retry/route.ts | 53 ++ web/app/jobs/[id]/page.tsx | 666 +++++++++++++++--- web/components/JobsView.tsx | 23 + 4 files changed, 699 insertions(+), 85 deletions(-) create mode 100644 web/app/api/jobs/[jobId]/crash-report/route.ts create mode 100644 web/app/api/jobs/[jobId]/retry/route.ts diff --git a/web/app/api/jobs/[jobId]/crash-report/route.ts b/web/app/api/jobs/[jobId]/crash-report/route.ts new file mode 100644 index 0000000..7e1014d --- /dev/null +++ b/web/app/api/jobs/[jobId]/crash-report/route.ts @@ -0,0 +1,42 @@ +import { NextRequest, NextResponse } from 'next/server'; + +const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000'; + +/** + * GET /api/jobs/[jobId]/crash-report + * + * Fetches the crash report for a failed or partial job. + * Returns detailed crash analysis including pattern identification, + * confidence score, suggested fixes, and auto-fix parameters. + */ +export async function GET( + request: NextRequest, + { params }: { params: Promise<{ jobId: string }> } +) { + try { + const { jobId } = await params; + + const response = await fetch(`${API_BASE_URL}/jobs/${jobId}/crash-report`, { + headers: { + 'Accept': 'application/json', + }, + }); + + const data = await response.json(); + + if (!response.ok) { + return NextResponse.json( + { error: data.detail || 'Failed to get crash report' }, + { status: response.status } + ); + } + + return NextResponse.json(data); + } catch (error) { + console.error('Crash report API error:', error); + return NextResponse.json( + { error: 'Failed to get crash report' }, + { status: 500 } + ); + } +} diff --git a/web/app/api/jobs/[jobId]/retry/route.ts b/web/app/api/jobs/[jobId]/retry/route.ts new file mode 100644 index 0000000..efdc061 --- /dev/null +++ b/web/app/api/jobs/[jobId]/retry/route.ts @@ -0,0 +1,53 @@ +import { NextRequest, NextResponse } from 'next/server'; + +const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000'; + +/** + * POST /api/jobs/[jobId]/retry + * + * Retries a failed or partial job, optionally applying auto-fix parameters. + * + * Query params: + * - apply_fix: boolean (default: false) - Whether to apply auto-fix parameters + * based on crash analysis (e.g., reduced batch size for memory issues) + * + * Returns the new job ID for tracking the retry attempt. + */ +export async function POST( + request: NextRequest, + { params }: { params: Promise<{ jobId: string }> } +) { + try { + const { jobId } = await params; + const searchParams = request.nextUrl.searchParams; + const applyFix = searchParams.get('apply_fix') === 'true'; + + const response = await fetch( + `${API_BASE_URL}/jobs/${jobId}/retry?apply_fix=${applyFix}`, + { + method: 'POST', + headers: { + 'Accept': 'application/json', + 'Content-Type': 'application/json', + }, + } + ); + + const data = await response.json(); + + if (!response.ok) { + return NextResponse.json( + { error: data.detail || 'Failed to retry job' }, + { status: response.status } + ); + } + + return NextResponse.json(data); + } catch (error) { + console.error('Retry job API error:', error); + return NextResponse.json( + { error: 'Failed to retry job' }, + { status: 500 } + ); + } +} diff --git a/web/app/jobs/[id]/page.tsx b/web/app/jobs/[id]/page.tsx index 984f63c..33bbc31 100644 --- a/web/app/jobs/[id]/page.tsx +++ b/web/app/jobs/[id]/page.tsx @@ -1,12 +1,20 @@ 'use client'; -import { useEffect, useState } from 'react'; +import { useEffect, useState, useRef, useCallback } from 'react'; import { useParams, useRouter } from 'next/navigation'; import Link from 'next/link'; import { useJobs } from '@/contexts/JobsContext'; import { JobStatus } from '@/components/ScraperTest'; +import JobDevTools, { + StructuredLog, + MetricsData, + SessionFingerprint as DevToolsSessionFingerprint, +} from '@/components/JobDevTools'; +import CrashReport, { CrashReportData } from '@/components/JobDevTools/CrashReport'; +import SessionPanel, { SessionFingerprint as DetailedSessionFingerprint } from '@/components/JobDevTools/SessionPanel'; +import MetricsDashboard, { MetricsSample } from '@/components/JobDevTools/MetricsDashboard'; -interface LogEntry { +interface OldLogEntry { timestamp: string; level: string; message: string; @@ -17,7 +25,7 @@ interface JobLogs { job_id: string; status: string; error_message: string | null; - logs: LogEntry[]; + logs: OldLogEntry[] | StructuredLog[]; log_count: number; } @@ -39,42 +47,412 @@ function extractBusinessName(job: JobStatus): string { } } +/** + * Check if a log entry is in the old format (has 'source' property) + * or new structured format (has 'category' property) + */ +function isOldLogFormat(log: OldLogEntry | StructuredLog): log is OldLogEntry { + return 'source' in log && !('category' in log); +} + +/** + * Convert old log format to new StructuredLog format + */ +function convertOldToStructured(oldLog: OldLogEntry): StructuredLog { + // Map old source to new category + const categoryMap: Record = { + browser: 'browser', + scraper: 'scraper', + network: 'network', + system: 'system', + }; + + // Map old level to new level + const levelMap: Record = { + DEBUG: 'DEBUG', + INFO: 'INFO', + WARNING: 'WARN', + WARN: 'WARN', + ERROR: 'ERROR', + FATAL: 'FATAL', + }; + + const timestamp = oldLog.timestamp; + const timestampMs = new Date(timestamp).getTime(); + + return { + timestamp, + timestamp_ms: timestampMs || Date.now(), + level: levelMap[oldLog.level?.toUpperCase()] || 'INFO', + category: categoryMap[oldLog.source] || 'system', + message: oldLog.message, + }; +} + +/** + * Convert array of logs to structured format if needed + */ +function normalizeLogsTOStructured(logs: (OldLogEntry | StructuredLog)[]): StructuredLog[] { + return logs.map((log) => { + if (isOldLogFormat(log)) { + return convertOldToStructured(log); + } + return log as StructuredLog; + }); +} + export default function JobDetailPage() { const params = useParams(); const router = useRouter(); const { jobs, refreshJobs } = useJobs(); const [job, setJob] = useState(null); - const [logs, setLogs] = useState(null); + const [structuredLogs, setStructuredLogs] = useState([]); + const [metricsData, setMetricsData] = useState(undefined); + const [metricsHistory, setMetricsHistory] = useState([]); + const [crashReport, setCrashReport] = useState(null); + const [sessionFingerprint, setSessionFingerprint] = useState(undefined); + const [isStreaming, setIsStreaming] = useState(false); const [isLoadingLogs, setIsLoadingLogs] = useState(false); + const [isLoadingCrashReport, setIsLoadingCrashReport] = useState(false); const [isDeleting, setIsDeleting] = useState(false); + const [retryFeedback, setRetryFeedback] = useState<{ type: 'success' | 'error'; message: string } | null>(null); const jobId = params.id as string; + const eventSourceRef = useRef(null); // Find job from context or fetch it useEffect(() => { - const foundJob = jobs.find(j => j.job_id === jobId); + const foundJob = jobs.find((j) => j.job_id === jobId); if (foundJob) { setJob(foundJob); } else { // Fetch job directly if not in context fetch(`/api/jobs/${jobId}`) - .then(res => res.json()) - .then(data => setJob(data)) + .then((res) => res.json()) + .then((data) => setJob(data)) .catch(console.error); } }, [jobId, jobs]); - // Fetch logs + // Fetch initial logs when job is loaded useEffect(() => { if (!jobId) return; + setIsLoadingLogs(true); fetch(`/api/jobs/${jobId}/logs`) - .then(res => res.json()) - .then(data => setLogs(data)) + .then((res) => res.json()) + .then((data: JobLogs) => { + if (data.logs && data.logs.length > 0) { + const normalized = normalizeLogsTOStructured(data.logs); + setStructuredLogs(normalized); + } + }) .catch(console.error) .finally(() => setIsLoadingLogs(false)); }, [jobId]); + // Connect to SSE stream for running jobs + useEffect(() => { + if (!job || job.status !== 'running') { + // Close any existing connection for non-running jobs + if (eventSourceRef.current) { + eventSourceRef.current.close(); + eventSourceRef.current = null; + setIsStreaming(false); + } + return; + } + + // Create SSE connection + const eventSource = new EventSource(`/api/jobs/${jobId}/stream`); + eventSourceRef.current = eventSource; + + eventSource.onopen = () => { + console.log('SSE connected for job:', jobId); + setIsStreaming(true); + }; + + eventSource.onerror = (err) => { + console.error('SSE error:', err); + setIsStreaming(false); + // Try to reconnect after a delay + setTimeout(() => { + if (eventSourceRef.current === eventSource) { + eventSource.close(); + // Will reconnect on next render cycle if job is still running + } + }, 3000); + }; + + // Handle structured log events + eventSource.addEventListener('log', (event) => { + try { + const data = JSON.parse(event.data); + // Handle {"type": "log", "data": {...}} format + const logData = data.data || data; + + const newLog: StructuredLog = { + timestamp: logData.timestamp || new Date().toISOString(), + timestamp_ms: logData.timestamp_ms || Date.now(), + level: logData.level || 'INFO', + category: logData.category || 'system', + message: logData.message || '', + metrics: logData.metrics, + network: logData.network, + }; + + setStructuredLogs((prev) => [...prev, newLog]); + } catch (err) { + console.error('Failed to parse log event:', err); + } + }); + + // Handle metrics events + eventSource.addEventListener('metrics', (event) => { + try { + const data = JSON.parse(event.data); + // Handle {"type": "metrics", "data": {...}} format + const metricsPayload = data.data || data; + + setMetricsData({ + cpu_percent: metricsPayload.cpu_percent, + memory_mb: metricsPayload.memory_mb, + duration_ms: metricsPayload.duration_ms, + requests_made: metricsPayload.requests_made, + reviews_scraped: metricsPayload.reviews_extracted || metricsPayload.reviews_scraped, + }); + + // Add to metrics history for charts + const sample: MetricsSample = { + timestamp_ms: metricsPayload.timestamp_ms || Date.now(), + reviews_extracted: metricsPayload.reviews_extracted || 0, + scroll_count: metricsPayload.scroll_count || 0, + memory_mb: metricsPayload.memory_mb || 0, + extraction_rate: metricsPayload.extraction_rate || 0, + }; + + setMetricsHistory((prev) => [...prev, sample]); + } catch (err) { + console.error('Failed to parse metrics event:', err); + } + }); + + // Handle job progress events (from existing SSE format) + eventSource.addEventListener('job_progress', (event) => { + try { + const data = JSON.parse(event.data); + setJob((prev) => + prev + ? { + ...prev, + reviews_count: data.reviews_count, + total_reviews: data.total_reviews, + scrape_time: data.scrape_time, + } + : prev + ); + } catch (err) { + console.error('Failed to parse job_progress event:', err); + } + }); + + // Handle job completed + eventSource.addEventListener('job_completed', (event) => { + try { + const data = JSON.parse(event.data); + setJob((prev) => + prev + ? { + ...prev, + status: 'completed', + reviews_count: data.reviews_count, + total_reviews: data.total_reviews, + scrape_time: data.scrape_time, + } + : prev + ); + eventSource.close(); + setIsStreaming(false); + refreshJobs(); + } catch (err) { + console.error('Failed to parse job_completed event:', err); + } + }); + + // Handle job failed + eventSource.addEventListener('job_failed', (event) => { + try { + const data = JSON.parse(event.data); + setJob((prev) => + prev + ? { + ...prev, + status: 'failed', + error_message: data.error || data.error_message, + } + : prev + ); + eventSource.close(); + setIsStreaming(false); + refreshJobs(); + // Fetch crash report when job fails + fetchCrashReport(); + } catch (err) { + console.error('Failed to parse job_failed event:', err); + } + }); + + // Handle job partial + eventSource.addEventListener('job_partial', (event) => { + try { + const data = JSON.parse(event.data); + setJob((prev) => + prev + ? { + ...prev, + status: 'partial', + reviews_count: data.reviews_count, + error_message: data.error || data.error_message, + } + : prev + ); + eventSource.close(); + setIsStreaming(false); + refreshJobs(); + // Fetch crash report for partial jobs too + fetchCrashReport(); + } catch (err) { + console.error('Failed to parse job_partial event:', err); + } + }); + + // Handle initial state (all current logs) + eventSource.addEventListener('initial_state', (event) => { + try { + const data = JSON.parse(event.data); + if (data.logs && data.logs.length > 0) { + const normalized = normalizeLogsTOStructured(data.logs); + setStructuredLogs(normalized); + } + } catch (err) { + console.error('Failed to parse initial_state event:', err); + } + }); + + // Handle generic message events + eventSource.onmessage = (event) => { + try { + const data = JSON.parse(event.data); + + // Check for type field to route to correct handler + if (data.type === 'log') { + const logData = data.data || data; + const newLog: StructuredLog = { + timestamp: logData.timestamp || new Date().toISOString(), + timestamp_ms: logData.timestamp_ms || Date.now(), + level: logData.level || 'INFO', + category: logData.category || 'system', + message: logData.message || '', + metrics: logData.metrics, + network: logData.network, + }; + setStructuredLogs((prev) => [...prev, newLog]); + } else if (data.type === 'metrics') { + const metricsPayload = data.data || data; + setMetricsData({ + cpu_percent: metricsPayload.cpu_percent, + memory_mb: metricsPayload.memory_mb, + duration_ms: metricsPayload.duration_ms, + requests_made: metricsPayload.requests_made, + reviews_scraped: metricsPayload.reviews_extracted || metricsPayload.reviews_scraped, + }); + } + } catch { + // Ignore non-JSON messages + } + }; + + return () => { + eventSource.close(); + eventSourceRef.current = null; + setIsStreaming(false); + }; + }, [job?.status, jobId, refreshJobs]); + + // Fetch crash report when job status is failed or partial + const fetchCrashReport = useCallback(async () => { + if (!jobId) return; + + setIsLoadingCrashReport(true); + try { + const response = await fetch(`/api/jobs/${jobId}/crash-report`); + if (response.ok) { + const data = await response.json(); + setCrashReport(data); + } + } catch (err) { + console.error('Failed to fetch crash report:', err); + } finally { + setIsLoadingCrashReport(false); + } + }, [jobId]); + + // Fetch crash report if job is failed or partial on load + useEffect(() => { + if (job && (job.status === 'failed' || job.status === 'partial')) { + fetchCrashReport(); + } + }, [job?.status, fetchCrashReport]); + + // Extract session fingerprint from job metadata + useEffect(() => { + if (!job) return; + + // Try to get session fingerprint from job metadata + fetch(`/api/jobs/${jobId}`) + .then((res) => res.json()) + .then((fullJob) => { + if (fullJob.metadata) { + const metadata = + typeof fullJob.metadata === 'string' + ? JSON.parse(fullJob.metadata) + : fullJob.metadata; + + if (metadata.session_fingerprint) { + setSessionFingerprint(metadata.session_fingerprint); + } else if (metadata.browser_fingerprint) { + // Convert browser fingerprint to session fingerprint format + const bf = metadata.browser_fingerprint; + setSessionFingerprint({ + user_agent: bf.userAgent || '', + platform: bf.platform || '', + language: bf.language || '', + languages: bf.languages || [bf.language || ''], + timezone: bf.timezone || '', + screen: { + width: bf.viewport?.width || 1920, + height: bf.viewport?.height || 1080, + colorDepth: 24, + }, + viewport: bf.viewport || { width: 1920, height: 1080 }, + webgl_vendor: '', + webgl_renderer: '', + canvas_fingerprint: '', + hardware_concurrency: 4, + device_memory: 8, + bot_detection_tests: { + webdriver_hidden: true, + chrome_runtime: true, + permissions_query: true, + }, + captured_at: new Date().toISOString(), + }); + } + } + }) + .catch(console.error); + }, [job, jobId]); + const handleDelete = async () => { if (!confirm('Are you sure you want to delete this job?')) return; setIsDeleting(true); @@ -89,6 +467,24 @@ export default function JobDetailPage() { } }; + const handleRetry = useCallback( + (applyFix: boolean) => { + setRetryFeedback({ + type: 'success', + message: applyFix + ? 'Retrying job with auto-fix applied...' + : 'Retrying job without modifications...', + }); + + // Refresh jobs to pick up the new job + setTimeout(() => { + refreshJobs(); + setRetryFeedback(null); + }, 2000); + }, + [refreshJobs] + ); + if (!job) { return (
@@ -99,12 +495,15 @@ export default function JobDetailPage() { const businessName = extractBusinessName(job); const canViewAnalytics = job.reviews_count && job.reviews_count > 0; + const showCrashReport = (job.status === 'failed' || job.status === 'partial') && crashReport; return (
{/* Breadcrumb */}
- Jobs + + Jobs + / {jobId.slice(0, 8)}...
@@ -118,13 +517,19 @@ export default function JobDetailPage() {

{job.business_address}

)}
- + {job.status === 'running' && (
)} @@ -136,13 +541,17 @@ export default function JobDetailPage() {
{job.reviews_count !== null && (
-
{job.reviews_count.toLocaleString()}
+
+ {job.reviews_count.toLocaleString()} +
Reviews
)} {job.scrape_time !== null && (
-
{formatDuration(job.scrape_time)}
+
+ {formatDuration(job.scrape_time)} +
Duration
)} @@ -175,8 +584,18 @@ export default function JobDetailPage() { href={`/analytics/${jobId}`} className="flex-1 py-3 bg-blue-600 hover:bg-blue-700 text-white rounded-xl font-semibold transition-colors flex items-center justify-center gap-2" > - - + + View Analytics @@ -187,8 +606,18 @@ export default function JobDetailPage() { rel="noopener noreferrer" className="px-6 py-3 bg-gray-100 hover:bg-gray-200 text-gray-700 rounded-xl font-semibold transition-colors flex items-center justify-center gap-2" > - - + + Open in Maps @@ -200,20 +629,38 @@ export default function JobDetailPage() { {isDeleting ? (
) : ( - - + + )} Delete
- {/* Error Message */} - {job.error_message && ( + {/* Error Message (legacy - shown when no crash report) */} + {job.error_message && !showCrashReport && (
- - + +

Error

@@ -222,67 +669,116 @@ export default function JobDetailPage() {
)} + + {/* Retry Feedback */} + {retryFeedback && ( +
+
+ {retryFeedback.type === 'success' ? ( + + + + ) : ( + + + + )} + {retryFeedback.message} +
+
+ )}
- {/* Logs Section */} -
-
-

Logs

- {logs && ( - {logs.log_count} entries - )} + {/* Crash Report Section (for failed/partial jobs) */} + {showCrashReport && ( +
+
+ )} -
- {isLoadingLogs ? ( -
-
-
- ) : logs && logs.logs.length > 0 ? ( -
- {[...logs.logs] - .sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()) - .map((log, idx) => ( -
- - {new Date(log.timestamp).toLocaleTimeString()} - - {' '} - - [{log.level}] - - {' '} - - [{log.source}] - - {' '} - {log.message} -
- ))} -
- ) : ( -
-

No logs available

-

Logs are recorded during scraping

-
- )} + {/* Metrics Dashboard (for running jobs) */} + {job.status === 'running' && metricsHistory.length > 0 && ( +
+
+ )} + + {/* Session Panel (if fingerprint available) */} + {sessionFingerprint && ( +
+ +
+ )} + + {/* Job DevTools - Main Log Viewer */} +
+ {isLoadingLogs ? ( +
+
+
+

Loading logs...

+
+
+ ) : ( + + )}
+ + {/* Loading Crash Report Indicator */} + {isLoadingCrashReport && ( +
+
+ Loading crash report... +
+ )}
); } diff --git a/web/components/JobsView.tsx b/web/components/JobsView.tsx index 3729fdb..e814f48 100644 --- a/web/components/JobsView.tsx +++ b/web/components/JobsView.tsx @@ -1,6 +1,7 @@ 'use client'; import React, { useState, useMemo, useEffect, useCallback } from 'react'; +import Link from 'next/link'; import { useReactTable, getCoreRowModel, @@ -822,6 +823,18 @@ export default function JobsView({ jobs, onSelectJob, isLoadingJob, onRefresh }: ); })()} + {/* View DevTools */} + e.stopPropagation()} + > + + + + + {/* View Logs */} {(() => { const isStuck = job.status === 'running' && @@ -1491,6 +1504,16 @@ export default function JobsView({ jobs, onSelectJob, isLoadingJob, onRefresh }: View Reviews )} + stopMonitoring()} + className="py-2.5 px-6 bg-purple-600 text-white rounded-lg font-semibold hover:bg-purple-700 transition-colors flex items-center gap-2" + > + + + + DevTools +