Wave 2: Migrate scraper to StructuredLogger, add crash detection & topic tags
- Task #2: Migrate scraper_clean.py to use StructuredLogger with categories (37 log calls with metrics across browser/scraper/network/system) - Task #4: Add crash_reports table schema and database methods (save_crash_report, get_crash_report, get_crash_stats) - Task #9: Implement crash detection wrapper with metrics sampling (get_chrome_memory, get_dom_node_count, classify_crash) - Task #17: Add topic tags to frontend ReviewAnalytics (topic filter UI, tags on cards, topics in modal) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -154,6 +154,41 @@ class DatabaseManager:
|
||||
CREATE INDEX IF NOT EXISTS idx_webhook_job_id ON webhook_attempts(job_id);
|
||||
""")
|
||||
|
||||
# Add session_fingerprint and metrics_history columns to jobs table
|
||||
await conn.execute("""
|
||||
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS session_fingerprint JSONB;
|
||||
""")
|
||||
await conn.execute("""
|
||||
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS metrics_history JSONB;
|
||||
""")
|
||||
|
||||
# Create crash_reports table
|
||||
await conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS crash_reports (
|
||||
crash_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
job_id UUID REFERENCES jobs(job_id) ON DELETE CASCADE,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
crash_type VARCHAR(50) NOT NULL,
|
||||
error_message TEXT,
|
||||
state JSONB NOT NULL,
|
||||
metrics_history JSONB,
|
||||
logs_before_crash JSONB,
|
||||
analysis JSONB,
|
||||
screenshot_url TEXT,
|
||||
dom_snapshot_id UUID
|
||||
);
|
||||
""")
|
||||
|
||||
await conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_crash_reports_job ON crash_reports(job_id);
|
||||
""")
|
||||
await conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_crash_reports_type ON crash_reports(crash_type);
|
||||
""")
|
||||
await conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_crash_reports_created ON crash_reports(created_at DESC);
|
||||
""")
|
||||
|
||||
log.info("Database schema initialized")
|
||||
|
||||
# ==================== Job Operations ====================
|
||||
@@ -657,3 +692,150 @@ class DatabaseManager:
|
||||
INSERT INTO webhook_attempts (job_id, attempt_number, success, status_code, error_message, response_time_ms)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
""", job_id, attempt_number, success, status_code, error_message, response_time_ms)
|
||||
|
||||
# ==================== Crash Reports ====================
|
||||
|
||||
async def save_crash_report(self, job_id: str, crash_data: dict) -> str:
|
||||
"""
|
||||
Save a crash report and return the crash_id.
|
||||
|
||||
Args:
|
||||
job_id: Job UUID as string
|
||||
crash_data: Dictionary containing crash report data:
|
||||
- crash_type: Type of crash (required)
|
||||
- error_message: Error message (optional)
|
||||
- state: Current state at crash time (required)
|
||||
- metrics_history: Historical metrics (optional)
|
||||
- logs_before_crash: Log entries before crash (optional)
|
||||
- analysis: Crash analysis data (optional)
|
||||
- screenshot_url: URL to screenshot (optional)
|
||||
- dom_snapshot_id: UUID of DOM snapshot (optional)
|
||||
|
||||
Returns:
|
||||
UUID of created crash report as string
|
||||
"""
|
||||
async with self.pool.acquire() as conn:
|
||||
# Convert job_id string to UUID
|
||||
job_uuid = UUID(job_id) if isinstance(job_id, str) else job_id
|
||||
|
||||
crash_id = await conn.fetchval("""
|
||||
INSERT INTO crash_reports (
|
||||
job_id,
|
||||
crash_type,
|
||||
error_message,
|
||||
state,
|
||||
metrics_history,
|
||||
logs_before_crash,
|
||||
analysis,
|
||||
screenshot_url,
|
||||
dom_snapshot_id
|
||||
)
|
||||
VALUES ($1, $2, $3, $4::jsonb, $5::jsonb, $6::jsonb, $7::jsonb, $8, $9)
|
||||
RETURNING crash_id
|
||||
""",
|
||||
job_uuid,
|
||||
crash_data.get('crash_type'),
|
||||
crash_data.get('error_message'),
|
||||
json.dumps(crash_data.get('state', {})),
|
||||
json.dumps(crash_data.get('metrics_history')) if crash_data.get('metrics_history') else None,
|
||||
json.dumps(crash_data.get('logs_before_crash')) if crash_data.get('logs_before_crash') else None,
|
||||
json.dumps(crash_data.get('analysis')) if crash_data.get('analysis') else None,
|
||||
crash_data.get('screenshot_url'),
|
||||
UUID(crash_data['dom_snapshot_id']) if crash_data.get('dom_snapshot_id') else None
|
||||
)
|
||||
|
||||
log.info(f"Saved crash report {crash_id} for job {job_id}, type: {crash_data.get('crash_type')}")
|
||||
return str(crash_id)
|
||||
|
||||
async def get_crash_report(self, job_id: str) -> Optional[dict]:
|
||||
"""
|
||||
Get crash report for a job, if any.
|
||||
|
||||
Args:
|
||||
job_id: Job UUID as string
|
||||
|
||||
Returns:
|
||||
Crash report dictionary or None if not found
|
||||
"""
|
||||
async with self.pool.acquire() as conn:
|
||||
job_uuid = UUID(job_id) if isinstance(job_id, str) else job_id
|
||||
|
||||
row = await conn.fetchrow("""
|
||||
SELECT
|
||||
crash_id,
|
||||
job_id,
|
||||
created_at,
|
||||
crash_type,
|
||||
error_message,
|
||||
state,
|
||||
metrics_history,
|
||||
logs_before_crash,
|
||||
analysis,
|
||||
screenshot_url,
|
||||
dom_snapshot_id
|
||||
FROM crash_reports
|
||||
WHERE job_id = $1
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
""", job_uuid)
|
||||
|
||||
if not row:
|
||||
return None
|
||||
|
||||
result = dict(row)
|
||||
# Convert UUIDs to strings for JSON serialization
|
||||
result['crash_id'] = str(result['crash_id'])
|
||||
result['job_id'] = str(result['job_id'])
|
||||
if result.get('dom_snapshot_id'):
|
||||
result['dom_snapshot_id'] = str(result['dom_snapshot_id'])
|
||||
|
||||
return result
|
||||
|
||||
async def get_crash_stats(self, days: int = 7) -> dict:
|
||||
"""
|
||||
Get crash statistics for the last N days.
|
||||
|
||||
Args:
|
||||
days: Number of days to look back (default: 7)
|
||||
|
||||
Returns:
|
||||
Dictionary with:
|
||||
- total: Total number of crashes
|
||||
- by_type: Dict mapping crash type to count
|
||||
- by_day: List of dicts with date and count
|
||||
"""
|
||||
async with self.pool.acquire() as conn:
|
||||
# Get total count
|
||||
total = await conn.fetchval("""
|
||||
SELECT COUNT(*)
|
||||
FROM crash_reports
|
||||
WHERE created_at >= NOW() - INTERVAL '%s days'
|
||||
""", days)
|
||||
|
||||
# Get counts by type
|
||||
type_rows = await conn.fetch("""
|
||||
SELECT crash_type, COUNT(*) as count
|
||||
FROM crash_reports
|
||||
WHERE created_at >= NOW() - INTERVAL '%s days'
|
||||
GROUP BY crash_type
|
||||
ORDER BY count DESC
|
||||
""", days)
|
||||
|
||||
by_type = {row['crash_type']: row['count'] for row in type_rows}
|
||||
|
||||
# Get counts by day
|
||||
day_rows = await conn.fetch("""
|
||||
SELECT DATE(created_at) as date, COUNT(*) as count
|
||||
FROM crash_reports
|
||||
WHERE created_at >= NOW() - INTERVAL '%s days'
|
||||
GROUP BY DATE(created_at)
|
||||
ORDER BY date DESC
|
||||
""", days)
|
||||
|
||||
by_day = [{'date': str(row['date']), 'count': row['count']} for row in day_rows]
|
||||
|
||||
return {
|
||||
'total': total or 0,
|
||||
'by_type': by_type,
|
||||
'by_day': by_day
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user