From 59368a5bd551e29fa2191872cfb16b48190153a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Guti=C3=A9rrez?= <35082514+alezmad@users.noreply.github.com> Date: Sat, 24 Jan 2026 11:14:02 +0000 Subject: [PATCH] Add Job DevTools implementation task breakdown 18 tasks organized in 5 parallel tracks: - Track A: Backend logging infrastructure (4 tasks) - Track B: Frontend log viewer (5 tasks) - Track C: Crash analysis (4 tasks) - Track D: Session & metrics (3 tasks) - Track E: Review topics (2 tasks) Includes dependency graph and 7-wave execution plan for parallel AI agent workflow. Co-Authored-By: Claude Opus 4.5 --- .artifacts/job-devtools-tasks.md | 336 +++++++++++++++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100644 .artifacts/job-devtools-tasks.md diff --git a/.artifacts/job-devtools-tasks.md b/.artifacts/job-devtools-tasks.md new file mode 100644 index 0000000..e688e22 --- /dev/null +++ b/.artifacts/job-devtools-tasks.md @@ -0,0 +1,336 @@ +# Job DevTools - Implementation Tasks + +## Dependency Graph + +``` +Wave 1 (Parallel start): + #1 StructuredLogger ──┬──▶ #2 Migrate scraper ──▶ #3 SSE stream ──▶ #5 JobDevTools + │ │ + ├──▶ #4 DB schema ──┬──▶ #10 Crash analyzer ▼ + │ │ │ #6 LogViewer + │ │ ▼ │ + │ ├──▶ #11 Crash API ▼ + │ │ │ #7 CopyToolbar + │ │ ▼ │ + │ │ #12 CrashReport ▼ + │ │ #8 LogEntry + │ └──▶ #13 Session capture │ + │ │ │ + └──▶ #9 Crash detection ▼ │ + │ #14 SessionPanel │ + │ │ │ + └───────────────────┼───────────────┘ + │ + #16 Topics inference ──▶ #17 Topic tags ▼ + #15 MetricsDashboard + │ + ▼ + #18 INTEGRATION +``` + +--- + +## Task Details + +### Track A: Backend Logging Infrastructure + +#### Task #1: Create StructuredLogger class in Python backend +**Priority:** P0 (Foundation) +**Blocks:** #2, #3, #4, #9 + +Create `modules/structured_logger.py`: + +```python +from dataclasses import dataclass, field, asdict +from typing import Optional, Dict, Any, List, Literal +from datetime import datetime +import threading +import time + +LogLevel = Literal['DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'] +LogCategory = Literal['scraper', 'browser', 'network', 'system'] + +@dataclass +class LogEntry: + timestamp: str + timestamp_ms: int + level: LogLevel + category: LogCategory + message: str + metrics: Optional[Dict[str, Any]] = None + network: Optional[Dict[str, Any]] = None + snapshot_id: Optional[str] = None + +class StructuredLogger: + def __init__(self, max_entries: int = 10000): + self._logs: List[LogEntry] = [] + self._lock = threading.Lock() + self._max_entries = max_entries + + def _log(self, level: LogLevel, category: LogCategory, message: str, + metrics: Dict = None, network: Dict = None, snapshot_id: str = None): + now = datetime.utcnow() + entry = LogEntry( + timestamp=now.isoformat() + 'Z', + timestamp_ms=int(time.time() * 1000), + level=level, + category=category, + message=message, + metrics=metrics, + network=network, + snapshot_id=snapshot_id + ) + with self._lock: + self._logs.append(entry) + if len(self._logs) > self._max_entries: + self._logs = self._logs[-self._max_entries:] + + def debug(self, category: LogCategory, message: str, **kwargs): + self._log('DEBUG', category, message, **kwargs) + + def info(self, category: LogCategory, message: str, **kwargs): + self._log('INFO', category, message, **kwargs) + + def warn(self, category: LogCategory, message: str, **kwargs): + self._log('WARN', category, message, **kwargs) + + def error(self, category: LogCategory, message: str, **kwargs): + self._log('ERROR', category, message, **kwargs) + + def fatal(self, category: LogCategory, message: str, **kwargs): + self._log('FATAL', category, message, **kwargs) + + def get_logs(self) -> List[Dict]: + with self._lock: + return [asdict(e) for e in self._logs] + + def get_logs_by_category(self, category: LogCategory) -> List[Dict]: + with self._lock: + return [asdict(e) for e in self._logs if e.category == category] +``` + +--- + +#### Task #2: Migrate scraper_clean.py to use StructuredLogger +**Blocked by:** #1 +**Blocks:** #3 + +Update all log calls in `modules/scraper_clean.py`: +- Replace `LogCapture` with `StructuredLogger` +- Add category to each log call +- Add metrics where relevant (scroll_count, reviews_count, memory_mb) + +--- + +#### Task #3: Update SSE stream to emit structured log events +**Blocked by:** #1, #2 +**Blocks:** #5, #15 + +Update `api_server_production.py`: +- Change log event format to include full LogEntry structure +- Add metrics event type emitted every 5 seconds +- Backward compatibility for old clients + +--- + +#### Task #4: Add crash_reports table and schema +**Blocked by:** #1 +**Blocks:** #10, #11, #13 + +Add to `modules/database.py`: +```sql +CREATE TABLE crash_reports ( + crash_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + job_id UUID REFERENCES jobs(job_id) ON DELETE CASCADE, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + crash_type VARCHAR(50) NOT NULL, + error_message TEXT, + state JSONB NOT NULL, + metrics_history JSONB, + logs_before_crash JSONB, + analysis JSONB, + screenshot_url TEXT +); + +ALTER TABLE jobs ADD COLUMN IF NOT EXISTS session_fingerprint JSONB; +ALTER TABLE jobs ADD COLUMN IF NOT EXISTS metrics_history JSONB; +``` + +--- + +### Track B: Frontend Log Viewer + +#### Task #5: Create JobDevTools React container component +**Blocked by:** #3 +**Blocks:** #6, #18 + +Create `web/components/JobDevTools/index.tsx`: +- Tab bar: All, Scraper, Browser, Network, System +- Count badges per tab +- Renders LogViewer, CopyToolbar, SessionPanel, CrashReport + +--- + +#### Task #6: Create LogViewer component with virtualized list +**Blocked by:** #5 +**Blocks:** #7, #18 + +Create `web/components/JobDevTools/LogViewer.tsx`: +- Virtualized list (react-window) +- Level filter, search, auto-scroll toggle +- Timestamp format toggle + +--- + +#### Task #7: Create CopyToolbar and copy utilities +**Blocked by:** #6 +**Blocks:** #8, #18 + +Create: +- `web/components/JobDevTools/CopyToolbar.tsx` +- `web/lib/copy-utils.ts` + +--- + +#### Task #8: Create LogEntry row component with click-to-copy +**Blocked by:** #7 +**Blocks:** #18 + +Create `web/components/JobDevTools/LogEntry.tsx`: +- Click to copy, shift+click for range +- Level/category badges with colors +- Expandable metrics view + +--- + +### Track C: Crash Analysis + +#### Task #9: Implement crash detection wrapper in scraper +**Blocked by:** #1 +**Blocks:** #10 + +Add to `modules/scraper_clean.py`: +- Wrap execution in try/catch +- Periodic metrics sampling (5s interval) +- Compile CrashReport on failure +- Helper: get_chrome_memory(), get_dom_node_count(), classify_crash() + +--- + +#### Task #10: Create crash pattern analyzer +**Blocked by:** #4, #9 +**Blocks:** #11 + +Create `modules/crash_analyzer.py`: +- Pattern detection: memory_exhaustion, dom_bloat, rate_limited, consent_loop, scroll_timeout, element_stale +- Confidence scoring +- Suggested fix generation +- Auto-fix parameters + +--- + +#### Task #11: Add crash report API endpoints +**Blocked by:** #4, #10 +**Blocks:** #12 + +Add to `api_server_production.py`: +- GET /jobs/{job_id}/crash-report +- POST /jobs/{job_id}/retry?apply_fix=... +- GET /crashes/stats + +--- + +#### Task #12: Create CrashReport frontend component +**Blocked by:** #11 +**Blocks:** #18 + +Create `web/components/JobDevTools/CrashReport.tsx`: +- Timeline to crash visualization +- Pattern analysis display +- "Apply Fix & Retry" button +- Collapsible logs before crash + +--- + +### Track D: Session & Metrics + +#### Task #13: Capture and store session fingerprint in backend +**Blocked by:** #4 +**Blocks:** #14 + +Add to `modules/scraper_clean.py`: +- Compile SessionFingerprint at job start +- Run bot detection tests +- Store in job metadata + +--- + +#### Task #14: Create SessionPanel frontend component +**Blocked by:** #13 +**Blocks:** #18 + +Create `web/components/JobDevTools/SessionPanel.tsx`: +- "What Google Saw" display +- Identity, Geolocation, Viewport sections +- Bot detection indicators (green/yellow/red) + +--- + +#### Task #15: Create MetricsDashboard with real-time charts +**Blocked by:** #3 +**Blocks:** #18 + +Create `web/components/JobDevTools/MetricsDashboard.tsx`: +- Extraction rate line chart +- Cumulative reviews area chart +- Memory usage line chart +- API vs DOM pie chart + +--- + +### Track E: Review Topics + +#### Task #16: Implement review topics inference algorithm +**Blocks:** #17 + +Add to `modules/scraper_clean.py`: +- `infer_review_topics(review_text, topics)` function +- Word boundary matching +- Simple stemming variants +- Add 'topics' field to each review + +--- + +#### Task #17: Add topic tags to review cards in frontend +**Blocked by:** #16 + +Update: +- `web/components/ReviewAnalytics.tsx` +- `web/lib/analytics.ts` + +Add topic tags to reviews, topic filter, topic distribution chart. + +--- + +#### Task #18: Integrate JobDevTools into job detail page +**Blocked by:** #5, #6, #7, #8, #12, #14, #15 + +Replace current log display with JobDevTools component. +Handle both old and new log formats. +Connect SSE stream for real-time updates. + +--- + +## Execution Waves + +| Wave | Tasks | Parallel Agents | +|------|-------|-----------------| +| 1 | #1, #16 | 2 | +| 2 | #2, #4, #9, #17 | 4 | +| 3 | #3, #10, #13 | 3 | +| 4 | #5, #11, #14, #15 | 4 | +| 5 | #6, #12 | 2 | +| 6 | #7 → #8 | 1 (sequential) | +| 7 | #18 | 1 | + +**Critical Path:** #1 → #2 → #3 → #5 → #6 → #7 → #8 → #18