Phase 0: Project restructure to ReviewIQ platform architecture

New structure: - scrapers/google_reviews/v1_0_0.py (was modules/scraper_clean.py) - scrapers/base.py (BaseScraper interface) - scrapers/registry.py (ScraperRegistry for version routing) - core/database.py, models.py, config.py, enums.py - utils/logger.py, crash_analyzer.py, health_checks.py, helpers.py, date_converter.py - workers/chrome_pool.py - services/webhook_service.py - api/ routes structure (empty, ready for Phase 2) - tests/ structure mirroring source All imports updated in: - api_server_production.py (7 import paths updated) - utils/health_checks.py (scraper import path) Legacy modules moved to modules/_legacy/: - data_storage.py, image_handler.py, s3_handler.py (unused) Syntax verified, frontend build passing. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 15:22:08 +00:00
parent bb0291f265
commit 544e028c3f
37 changed files with 5782 additions and 30 deletions
--- a/modules/_legacy/data_storage.py
+++ b/modules/_legacy/data_storage.py
--- a/modules/_legacy/image_handler.py
+++ b/modules/_legacy/image_handler.py
--- a/modules/_legacy/s3_handler.py
+++ b/modules/_legacy/s3_handler.py
--- a/modules/chrome_pool.py
+++ b/modules/chrome_pool.py
@@ -1,388 +0,0 @@
-#!/usr/bin/env python3
-"""
-Chrome Worker Pool Manager
-
-Maintains a pool of idle Chrome instances for faster scraping.
-Pre-warms browsers on startup to eliminate cold-start delays.
-"""
-import logging
-import asyncio
-import time
-from typing import Optional, Dict, Any
-from seleniumbase import Driver
-from queue import Queue, Empty
-import threading
-
-log = logging.getLogger(__name__)
-
-
-class ChromeWorker:
-    """Single Chrome worker instance"""
-
-    def __init__(self, worker_id: str, headless: bool = True):
-        self.worker_id = worker_id
-        self.headless = headless
-        self.driver: Optional[Driver] = None
-        self.created_at = None
-        self.last_used = None
-        self.use_count = 0
-        self.is_busy = False
-
-    def initialize(self):
-        """Initialize Chrome driver with stability flags for unlimited scraping"""
-        try:
-            log.info(f"Worker {self.worker_id}: Initializing Chrome for unlimited review scraping...")
-
-            # SeleniumBase Driver automatically includes UC mode anti-detection
-            # Initialize with longer timeouts for large scraping jobs
-            # Chrome arguments for Docker stability
-            chrome_args = [
-                "--disable-dev-shm-usage",  # Use /tmp instead of /dev/shm (critical for Docker)
-                "--disable-gpu",  # Disable GPU acceleration
-                "--no-sandbox",  # Required for Docker
-                "--disable-software-rasterizer",
-                "--disable-extensions",
-                "--disable-background-networking",
-                "--disable-default-apps",
-                "--disable-sync",
-                "--metrics-recording-only",
-                "--mute-audio",
-                "--no-first-run",
-                "--safebrowsing-disable-auto-update",
-            ]
-
-            self.driver = Driver(
-                uc=True,
-                headless=self.headless,
-                page_load_strategy="normal",
-                chromium_arg=",".join(chrome_args)
-            )
-
-            # Set generous timeouts for large scraping jobs
-            self.driver.set_page_load_timeout(120)  # 2 minutes for slow networks
-            self.driver.set_script_timeout(60)  # 1 minute for complex extraction
-
-            # Set Chrome geolocation to US (Boston, MA) for consistent Google Maps results
-            # This prevents location-based variations in search results
-            try:
-                self.driver.execute_cdp_cmd('Emulation.setGeolocationOverride', {
-                    'latitude': 42.3601,
-                    'longitude': -71.0589,
-                    'accuracy': 100
-                })
-                log.info(f"Worker {self.worker_id}: Geolocation set to US (Boston, MA)")
-            except Exception as e:
-                log.warning(f"Worker {self.worker_id}: Could not set geolocation: {e}")
-
-            self.driver.maximize_window()
-            self.created_at = time.time()
-            self.last_used = time.time()
-            log.info(f"Worker {self.worker_id}: Chrome ready for unlimited scraping")
-            return True
-        except Exception as e:
-            log.error(f"Worker {self.worker_id}: Failed to initialize: {e}")
-            return False
-
-    def reset(self):
-        """Reset worker to clean state"""
-        try:
-            if self.driver:
-                # Clear cookies, cache, local storage
-                self.driver.delete_all_cookies()
-                self.driver.execute_script("window.localStorage.clear();")
-                self.driver.execute_script("window.sessionStorage.clear();")
-                log.debug(f"Worker {self.worker_id}: Reset complete")
-        except Exception as e:
-            log.warning(f"Worker {self.worker_id}: Reset failed: {e}")
-
-    def shutdown(self):
-        """Shutdown worker"""
-        try:
-            if self.driver:
-                self.driver.quit()
-                log.info(f"Worker {self.worker_id}: Shutdown complete")
-        except Exception as e:
-            log.warning(f"Worker {self.worker_id}: Shutdown error: {e}")
-        finally:
-            self.driver = None
-
-    def should_recycle(self, max_age_seconds: int = 3600, max_uses: int = 50):
-        """Check if worker should be recycled"""
-        if not self.driver:
-            return True
-
-        age = time.time() - self.created_at if self.created_at else 0
-        if age > max_age_seconds:
-            log.info(f"Worker {self.worker_id}: Recycling due to age ({age:.0f}s)")
-            return True
-
-        if self.use_count > max_uses:
-            log.info(f"Worker {self.worker_id}: Recycling due to use count ({self.use_count})")
-            return True
-
-        return False
-
-
-class ChromeWorkerPool:
-    """
-    Pool of Chrome worker instances for faster scraping.
-
-    Maintains idle workers ready to execute tasks immediately.
-    Workers are recycled after max age or max uses to prevent memory leaks.
-    """
-
-    def __init__(self, pool_size: int = 2, headless: bool = True):
-        """
-        Initialize worker pool.
-
-        Args:
-            pool_size: Number of idle workers to maintain
-            headless: Run Chrome in headless mode
-        """
-        self.pool_size = pool_size
-        self.headless = headless
-        self.workers: Queue[ChromeWorker] = Queue(maxsize=pool_size)
-        self.active_workers: Dict[str, ChromeWorker] = {}
-        self.worker_counter = 0
-        self.lock = threading.Lock()
-        self.running = False
-        self.maintenance_thread = None
-
-    def start(self):
-        """Start the worker pool"""
-        log.info(f"Starting Chrome worker pool (size={self.pool_size}, headless={self.headless})")
-        self.running = True
-
-        # Pre-warm workers
-        for _ in range(self.pool_size):
-            self._create_worker()
-
-        # Start maintenance thread
-        self.maintenance_thread = threading.Thread(target=self._maintenance_loop, daemon=True)
-        self.maintenance_thread.start()
-
-        log.info(f"Chrome worker pool started with {self.workers.qsize()} ready workers")
-
-    def stop(self):
-        """Stop the worker pool"""
-        log.info("Stopping Chrome worker pool...")
-        self.running = False
-
-        if self.maintenance_thread:
-            self.maintenance_thread.join(timeout=5)
-
-        # Shutdown all workers
-        while not self.workers.empty():
-            try:
-                worker = self.workers.get_nowait()
-                worker.shutdown()
-            except Empty:
-                break
-
-        # Shutdown active workers
-        with self.lock:
-            for worker in self.active_workers.values():
-                worker.shutdown()
-            self.active_workers.clear()
-
-        log.info("Chrome worker pool stopped")
-
-    def _create_worker(self) -> Optional[ChromeWorker]:
-        """Create a new worker and add to pool"""
-        with self.lock:
-            self.worker_counter += 1
-            worker_id = f"worker-{self.worker_counter}"
-
-        worker = ChromeWorker(worker_id, headless=self.headless)
-        if worker.initialize():
-            try:
-                self.workers.put_nowait(worker)
-                return worker
-            except:
-                worker.shutdown()
-                return None
-        return None
-
-    def acquire_worker(self, timeout: float = 30) -> Optional[ChromeWorker]:
-        """
-        Acquire a worker from the pool.
-
-        Args:
-            timeout: Maximum time to wait for a worker
-
-        Returns:
-            ChromeWorker instance or None if timeout
-        """
-        try:
-            worker = self.workers.get(timeout=timeout)
-            worker.is_busy = True
-            worker.last_used = time.time()
-            worker.use_count += 1
-
-            with self.lock:
-                self.active_workers[worker.worker_id] = worker
-
-            log.debug(f"Acquired {worker.worker_id} (uses: {worker.use_count}, pool: {self.workers.qsize()}/{self.pool_size})")
-
-            # No need to create replacement - worker will be returned to pool after use
-            # Maintenance thread ensures pool stays at capacity
-
-            return worker
-        except Empty:
-            log.warning(f"Failed to acquire worker within {timeout}s")
-            return None
-
-    def release_worker(self, worker: ChromeWorker, recycle: bool = False):
-        """
-        Release a worker back to the pool.
-
-        Args:
-            worker: Worker to release
-            recycle: Force worker recycling
-        """
-        with self.lock:
-            if worker.worker_id in self.active_workers:
-                del self.active_workers[worker.worker_id]
-
-        worker.is_busy = False
-
-        # Check if worker should be recycled
-        if recycle or worker.should_recycle():
-            log.info(f"Recycling {worker.worker_id}")
-            worker.shutdown()
-            # Create replacement worker in background
-            threading.Thread(target=self._create_worker, daemon=True).start()
-        else:
-            # Reset and return to pool
-            worker.reset()
-            try:
-                # Non-blocking put - if pool is full, it means we have extra workers
-                # Just keep the worker for next time instead of destroying it
-                current_size = self.workers.qsize()
-                if current_size < self.pool_size:
-                    self.workers.put_nowait(worker)
-                    log.debug(f"Released {worker.worker_id} back to pool ({current_size + 1}/{self.pool_size})")
-                else:
-                    # Pool already at capacity, recycle this extra worker
-                    log.debug(f"Pool at capacity ({current_size}/{self.pool_size}), recycling extra {worker.worker_id}")
-                    worker.shutdown()
-            except Exception as e:
-                # Unexpected error, shutdown worker
-                log.error(f"Failed to release {worker.worker_id}: {e}")
-                worker.shutdown()
-
-    def _maintenance_loop(self):
-        """Background maintenance thread"""
-        while self.running:
-            try:
-                # Ensure pool is at capacity
-                current_size = self.workers.qsize()
-                needed = self.pool_size - current_size
-
-                if needed > 0:
-                    log.debug(f"Pool needs {needed} more workers")
-                    for _ in range(needed):
-                        self._create_worker()
-
-                # Sleep for 10 seconds
-                time.sleep(10)
-
-            except Exception as e:
-                log.error(f"Maintenance loop error: {e}")
-                time.sleep(5)
-
-    def get_stats(self) -> Dict[str, Any]:
-        """Get pool statistics"""
-        with self.lock:
-            active_count = len(self.active_workers)
-
-        return {
-            "pool_size": self.pool_size,
-            "idle_workers": self.workers.qsize(),
-            "active_workers": active_count,
-            "total_workers_created": self.worker_counter,
-            "headless": self.headless
-        }
-
-
-# Global worker pool instances
-validation_pool: Optional[ChromeWorkerPool] = None
-scraping_pool: Optional[ChromeWorkerPool] = None
-
-
-def start_worker_pools(validation_size: int = 1, scraping_size: int = 2, headless: bool = True):
-    """
-    Start global worker pools.
-
-    Args:
-        validation_size: Number of workers for validation checks
-        scraping_size: Number of workers for scraping jobs
-        headless: Run Chrome in headless mode
-    """
-    global validation_pool, scraping_pool
-
-    log.info("Starting global Chrome worker pools...")
-
-    validation_pool = ChromeWorkerPool(pool_size=validation_size, headless=headless)
-    validation_pool.start()
-
-    scraping_pool = ChromeWorkerPool(pool_size=scraping_size, headless=headless)
-    scraping_pool.start()
-
-    log.info("Global Chrome worker pools started")
-
-
-def stop_worker_pools():
-    """Stop global worker pools"""
-    global validation_pool, scraping_pool
-
-    log.info("Stopping global Chrome worker pools...")
-
-    if validation_pool:
-        validation_pool.stop()
-        validation_pool = None
-
-    if scraping_pool:
-        scraping_pool.stop()
-        scraping_pool = None
-
-    log.info("Global Chrome worker pools stopped")
-
-
-def get_validation_worker(timeout: float = 10) -> Optional[ChromeWorker]:
-    """Get a worker for validation check"""
-    if validation_pool:
-        return validation_pool.acquire_worker(timeout=timeout)
-    return None
-
-
-def release_validation_worker(worker: ChromeWorker, recycle: bool = False):
-    """Release a validation worker"""
-    if validation_pool:
-        validation_pool.release_worker(worker, recycle=recycle)
-
-
-def get_scraping_worker(timeout: float = 30) -> Optional[ChromeWorker]:
-    """Get a worker for scraping"""
-    if scraping_pool:
-        return scraping_pool.acquire_worker(timeout=timeout)
-    return None
-
-
-def release_scraping_worker(worker: ChromeWorker, recycle: bool = False):
-    """Release a scraping worker"""
-    if scraping_pool:
-        scraping_pool.release_worker(worker, recycle=recycle)
-
-
-def get_pool_stats() -> Dict[str, Any]:
-    """Get statistics for all pools"""
-    stats = {}
-
-    if validation_pool:
-        stats['validation'] = validation_pool.get_stats()
-
-    if scraping_pool:
-        stats['scraping'] = scraping_pool.get_stats()
-
-    return stats
--- a/modules/config.py
+++ b/modules/config.py
@@ -1,82 +0,0 @@
-"""
-Configuration management for Google Maps Reviews Scraper.
-"""
-
-import logging
-from pathlib import Path
-from typing import Dict, Any
-
-import yaml
-
-# Configure logging - can be overridden by environment variable
-import os
-log_level = getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO)
-logging.basicConfig(level=log_level, format="[%(asctime)s] %(levelname)s: %(message)s")
-log = logging.getLogger("scraper")
-
-# Default configuration path
-DEFAULT_CONFIG_PATH = Path("config.yaml")
-
-# Default configuration - will be overridden by config file
-DEFAULT_CONFIG = {
-    "url": "https://maps.app.goo.gl/6tkNMDjcj3SS6LJe9",
-    "headless": True,
-    "sort_by": "relevance",
-    "stop_on_match": False,
-    "overwrite_existing": False,
-    "use_mongodb": True,
-    "mongodb": {
-        "uri": "mongodb://localhost:27017",
-        "database": "reviews",
-        "collection": "google_reviews"
-    },
-    "backup_to_json": True,
-    "json_path": "google_reviews.json",
-    "seen_ids_path": "google_reviews.ids",
-    "convert_dates": True,
-    "download_images": True,
-    "image_dir": "review_images",
-    "download_threads": 4,
-    "store_local_paths": True,  # Option to control storing local image paths
-    "replace_urls": False,  # Option to control URL replacement
-    "custom_url_base": "https://mycustomurl.com",  # Base URL for replacement
-    "custom_url_profiles": "/profiles/",  # Path for profile images
-    "custom_url_reviews": "/reviews/",  # Path for review images
-    "preserve_original_urls": True,  # Option to preserve original URLs
-    "custom_params": {  # Custom parameters to add to each document
-        "company": "Thaitours",  # Default example
-        "source": "Google Maps"  # Default example
-    }
-}
-
-
-def load_config(config_path: Path = DEFAULT_CONFIG_PATH) -> Dict[str, Any]:
-    """Load configuration from YAML file or use defaults"""
-    config = DEFAULT_CONFIG.copy()
-
-    if config_path.exists():
-        try:
-            with open(config_path, 'r') as f:
-                user_config = yaml.safe_load(f)
-                if user_config:
-                    # Merge configs, with nested dictionary support
-                    def deep_update(d, u):
-                        for k, v in u.items():
-                            if isinstance(v, dict) and k in d and isinstance(d[k], dict):
-                                deep_update(d[k], v)
-                            else:
-                                d[k] = v
-
-                    deep_update(config, user_config)
-                    log.info(f"Loaded configuration from {config_path}")
-        except Exception as e:
-            log.error(f"Error loading config from {config_path}: {e}")
-            log.info("Using default configuration")
-    else:
-        log.info(f"Config file {config_path} not found, using default configuration")
-        # Create a default config file for future use
-        with open(config_path, 'w') as f:
-            yaml.dump(config, f, default_flow_style=False)
-            log.info(f"Created default configuration file at {config_path}")
-
-    return config
--- a/modules/crash_analyzer.py
+++ b/modules/crash_analyzer.py
@@ -1,666 +0,0 @@
-"""
-Crash Pattern Analyzer Module
-
-Provides deep analysis of scraper crashes with pattern detection,
-confidence scoring, and auto-fix parameter suggestions.
-
-Builds on top of the basic classify_crash function in scraper_clean.py
-with more sophisticated pattern matching and multi-signal analysis.
-"""
-
-from dataclasses import dataclass
-from typing import Any, Dict, List, Optional
-import re
-
-
-@dataclass
-class CrashAnalysis:
-    """
-    Result of crash pattern analysis.
-
-    Attributes:
-        pattern: The identified crash pattern type (e.g., "memory_exhaustion", "dom_bloat")
-        confidence: Confidence score from 0.0 to 1.0 based on multiple signals
-        description: Human-readable description of the crash cause
-        suggested_fix: Recommended action to prevent this crash
-        auto_fix_params: Parameters that can be applied automatically to prevent recurrence
-    """
-    pattern: str  # e.g., "memory_exhaustion", "dom_bloat", "rate_limited"
-    confidence: float  # 0.0 to 1.0
-    description: str
-    suggested_fix: str
-    auto_fix_params: Optional[Dict[str, Any]]
-
-
-# Thresholds for pattern detection
-MEMORY_EXHAUSTION_THRESHOLD_MB = 1500  # 1.5GB in MB
-MEMORY_GROWTH_RATE_THRESHOLD_MB_S = 10  # 10MB/s
-DOM_BLOAT_THRESHOLD = 50000  # 50000 nodes
-SCROLL_TIMEOUT_MIN_SCROLLS = 10  # Minimum scrolls before considering scroll_timeout
-
-
-# Auto-fix parameters for each crash pattern
-AUTO_FIX_PARAMS = {
-    "memory_exhaustion": {
-        "max_reviews": 500,
-        "restart_browser_after": 200
-    },
-    "dom_bloat": {
-        "scroll_cleanup": True,
-        "lazy_load": True
-    },
-    "rate_limited": {
-        "delay_multiplier": 2.0,
-        "use_different_proxy": True
-    },
-    "consent_loop": {
-        "skip_consent_retries": True
-    },
-    "scroll_timeout": {
-        "reduce_target": True,
-        "target_reviews": "current - 10%"
-    },
-    "element_stale": {
-        "retry_with_fresh_elements": True
-    }
-}
-
-
-def _calculate_memory_growth_rate(metrics_history: List[Dict]) -> Optional[float]:
-    """
-    Calculate memory growth rate in MB/s from metrics history.
-
-    Args:
-        metrics_history: List of metric samples with timestamp_ms and memory_mb
-
-    Returns:
-        Growth rate in MB/s, or None if cannot be calculated
-    """
-    if not metrics_history or len(metrics_history) < 2:
-        return None
-
-    # Filter samples that have valid memory readings
-    valid_samples = [
-        m for m in metrics_history
-        if m.get('memory_mb') is not None and m.get('timestamp_ms') is not None
-    ]
-
-    if len(valid_samples) < 2:
-        return None
-
-    # Use first and last valid samples
-    first = valid_samples[0]
-    last = valid_samples[-1]
-
-    time_delta_s = (last['timestamp_ms'] - first['timestamp_ms']) / 1000
-    if time_delta_s <= 0:
-        return None
-
-    memory_delta_mb = last['memory_mb'] - first['memory_mb']
-    return memory_delta_mb / time_delta_s
-
-
-def _get_max_memory(metrics_history: List[Dict]) -> Optional[int]:
-    """Get maximum memory usage from metrics history."""
-    if not metrics_history:
-        return None
-
-    memories = [m.get('memory_mb') for m in metrics_history if m.get('memory_mb') is not None]
-    return max(memories) if memories else None
-
-
-def _get_max_dom_nodes(metrics_history: List[Dict]) -> Optional[int]:
-    """Get maximum DOM node count from metrics history."""
-    if not metrics_history:
-        return None
-
-    nodes = [m.get('dom_nodes') for m in metrics_history if m.get('dom_nodes') is not None]
-    return max(nodes) if nodes else None
-
-
-def _check_memory_exhaustion(
-    error_message: str,
-    metrics_history: List[Dict],
-    logs: List[Dict]
-) -> tuple[float, str]:
-    """
-    Check for memory exhaustion pattern.
-
-    Returns:
-        Tuple of (confidence, description)
-    """
-    confidence = 0.0
-    signals = []
-
-    # Check for high memory usage
-    max_memory = _get_max_memory(metrics_history)
-    if max_memory is not None:
-        if max_memory >= MEMORY_EXHAUSTION_THRESHOLD_MB:
-            confidence += 0.5
-            signals.append(f"Memory reached {max_memory}MB (threshold: {MEMORY_EXHAUSTION_THRESHOLD_MB}MB)")
-        elif max_memory >= MEMORY_EXHAUSTION_THRESHOLD_MB * 0.8:
-            confidence += 0.3
-            signals.append(f"Memory at {max_memory}MB approaching threshold")
-
-    # Check for rapid memory growth
-    growth_rate = _calculate_memory_growth_rate(metrics_history)
-    if growth_rate is not None and growth_rate >= MEMORY_GROWTH_RATE_THRESHOLD_MB_S:
-        confidence += 0.3
-        signals.append(f"Memory growing at {growth_rate:.1f}MB/s (threshold: {MEMORY_GROWTH_RATE_THRESHOLD_MB_S}MB/s)")
-
-    # Check error message for memory-related keywords
-    error_lower = error_message.lower()
-    memory_keywords = ['memory', 'heap', 'out of memory', 'oom', 'aw, snap', 'status_access_violation']
-    for keyword in memory_keywords:
-        if keyword in error_lower:
-            confidence += 0.2
-            signals.append(f"Error contains '{keyword}'")
-            break
-
-    # Check logs for memory warnings
-    for log_entry in logs:
-        msg = log_entry.get('message', '').lower()
-        if 'memory' in msg and ('high' in msg or 'warning' in msg or 'exceeded' in msg):
-            confidence += 0.1
-            signals.append("Memory warning found in logs")
-            break
-
-    description = "; ".join(signals) if signals else "No memory exhaustion signals detected"
-    return min(confidence, 1.0), description
-
-
-def _check_dom_bloat(
-    error_message: str,
-    metrics_history: List[Dict],
-    logs: List[Dict]
-) -> tuple[float, str]:
-    """
-    Check for DOM bloat pattern.
-
-    Returns:
-        Tuple of (confidence, description)
-    """
-    confidence = 0.0
-    signals = []
-
-    # Check for high DOM node count
-    max_nodes = _get_max_dom_nodes(metrics_history)
-    if max_nodes is not None:
-        if max_nodes >= DOM_BLOAT_THRESHOLD:
-            confidence += 0.6
-            signals.append(f"DOM nodes reached {max_nodes} (threshold: {DOM_BLOAT_THRESHOLD})")
-        elif max_nodes >= DOM_BLOAT_THRESHOLD * 0.8:
-            confidence += 0.3
-            signals.append(f"DOM nodes at {max_nodes} approaching threshold")
-
-    # Check error message for DOM-related keywords
-    error_lower = error_message.lower()
-    dom_keywords = ['dom', 'element', 'node', 'render', 'paint', 'layout']
-    for keyword in dom_keywords:
-        if keyword in error_lower:
-            confidence += 0.2
-            signals.append(f"Error contains '{keyword}'")
-            break
-
-    # Check if memory is high too (DOM bloat often causes memory issues)
-    max_memory = _get_max_memory(metrics_history)
-    if max_memory is not None and max_memory >= 800:  # 800MB
-        confidence += 0.1
-        signals.append(f"Memory also elevated ({max_memory}MB)")
-
-    # Check logs for DOM-related messages
-    for log_entry in logs:
-        msg = log_entry.get('message', '').lower()
-        if 'dom' in msg and ('large' in msg or 'cleanup' in msg or 'remove' in msg):
-            confidence += 0.1
-            signals.append("DOM warning found in logs")
-            break
-
-    description = "; ".join(signals) if signals else "No DOM bloat signals detected"
-    return min(confidence, 1.0), description
-
-
-def _check_rate_limited(
-    error_message: str,
-    metrics_history: List[Dict],
-    logs: List[Dict]
-) -> tuple[float, str]:
-    """
-    Check for rate limiting pattern.
-
-    Returns:
-        Tuple of (confidence, description)
-    """
-    confidence = 0.0
-    signals = []
-
-    # Check error message for rate limit indicators
-    error_lower = error_message.lower()
-    if '429' in error_message:
-        confidence += 0.6
-        signals.append("HTTP 429 status code in error")
-
-    rate_keywords = ['rate limit', 'too many requests', 'unusual traffic', 'captcha', 'blocked']
-    for keyword in rate_keywords:
-        if keyword in error_lower:
-            confidence += 0.4
-            signals.append(f"Error contains '{keyword}'")
-            break
-
-    # Check logs for rate limiting signals
-    rate_log_count = 0
-    for log_entry in logs:
-        msg = log_entry.get('message', '').lower()
-        network = log_entry.get('network', {})
-        status = network.get('status')
-
-        if status == 429:
-            rate_log_count += 1
-            confidence += 0.2
-
-        if 'unusual traffic' in msg or 'rate' in msg or 'blocked' in msg:
-            rate_log_count += 1
-            confidence += 0.1
-
-    if rate_log_count > 0:
-        signals.append(f"Found {rate_log_count} rate-limiting indicators in logs")
-
-    description = "; ".join(signals) if signals else "No rate limiting signals detected"
-    return min(confidence, 1.0), description
-
-
-def _check_consent_loop(
-    error_message: str,
-    metrics_history: List[Dict],
-    logs: List[Dict]
-) -> tuple[float, str]:
-    """
-    Check for consent popup loop pattern.
-
-    Returns:
-        Tuple of (confidence, description)
-    """
-    confidence = 0.0
-    signals = []
-
-    # Check error message for consent keywords
-    error_lower = error_message.lower()
-    if 'consent' in error_lower:
-        confidence += 0.3
-        signals.append("Error mentions consent")
-
-    # Count consent-related log entries
-    consent_count = 0
-    consent_messages = []
-    for log_entry in logs:
-        msg = log_entry.get('message', '').lower()
-        if 'consent' in msg:
-            consent_count += 1
-            consent_messages.append(msg[:50])
-
-    # Multiple consent messages indicate a loop
-    if consent_count >= 3:
-        confidence += 0.5
-        signals.append(f"Consent popup appeared {consent_count} times in logs")
-    elif consent_count >= 2:
-        confidence += 0.3
-        signals.append(f"Consent popup appeared {consent_count} times")
-    elif consent_count == 1:
-        confidence += 0.1
-        signals.append("Single consent popup detected")
-
-    # Check for timeout after consent handling
-    if 'timeout' in error_lower and consent_count > 0:
-        confidence += 0.2
-        signals.append("Timeout occurred with consent activity")
-
-    description = "; ".join(signals) if signals else "No consent loop signals detected"
-    return min(confidence, 1.0), description
-
-
-def _check_scroll_timeout(
-    error_message: str,
-    metrics_history: List[Dict],
-    logs: List[Dict],
-    state: Optional[Dict] = None
-) -> tuple[float, str]:
-    """
-    Check for scroll timeout pattern (no new reviews after many scrolls).
-
-    Returns:
-        Tuple of (confidence, description)
-    """
-    confidence = 0.0
-    signals = []
-
-    # Check state for scroll count
-    scroll_count = 0
-    reviews_count = 0
-    if state:
-        scroll_count = state.get('scroll_count', 0)
-        reviews_count = state.get('reviews_extracted', 0)
-
-    # Check error for timeout indicators
-    error_lower = error_message.lower()
-    if 'timeout' in error_lower:
-        confidence += 0.2
-        signals.append("Timeout in error message")
-
-    # Count recovery attempts in logs (indicate stuck scrolling)
-    recovery_count = 0
-    no_new_count = 0
-    for log_entry in logs:
-        msg = log_entry.get('message', '').lower()
-        if 'recovery attempt' in msg:
-            recovery_count += 1
-        if 'no new' in msg or 'stuck' in msg:
-            no_new_count += 1
-
-    if recovery_count >= SCROLL_TIMEOUT_MIN_SCROLLS:
-        confidence += 0.5
-        signals.append(f"Made {recovery_count} recovery attempts")
-    elif recovery_count >= 5:
-        confidence += 0.3
-        signals.append(f"Made {recovery_count} recovery attempts")
-
-    if no_new_count > 0:
-        confidence += 0.2
-        signals.append(f"Found {no_new_count} 'no new reviews' log entries")
-
-    # Check if reviews stopped growing
-    if metrics_history and len(metrics_history) >= 5:
-        # Check if reviews count plateaued
-        recent_counts = [m.get('reviews_count', 0) for m in metrics_history[-5:] if m.get('reviews_count')]
-        if recent_counts and len(set(recent_counts)) == 1:
-            confidence += 0.2
-            signals.append(f"Review count stuck at {recent_counts[0]}")
-
-    description = "; ".join(signals) if signals else "No scroll timeout signals detected"
-    return min(confidence, 1.0), description
-
-
-def _check_element_stale(
-    error_message: str,
-    metrics_history: List[Dict],
-    logs: List[Dict]
-) -> tuple[float, str]:
-    """
-    Check for stale element reference pattern.
-
-    Returns:
-        Tuple of (confidence, description)
-    """
-    confidence = 0.0
-    signals = []
-
-    # Check error message for stale element indicators
-    error_lower = error_message.lower()
-    stale_keywords = [
-        'stale element', 'staleelement', 'stale_element',
-        'element is not attached', 'element reference',
-        'no such element', 'element not found',
-        'element is no longer valid'
-    ]
-
-    for keyword in stale_keywords:
-        if keyword in error_lower:
-            confidence += 0.6
-            signals.append(f"Error contains '{keyword}'")
-            break
-
-    # Check logs for stale element patterns
-    stale_log_count = 0
-    for log_entry in logs:
-        msg = log_entry.get('message', '').lower()
-        for keyword in stale_keywords:
-            if keyword in msg:
-                stale_log_count += 1
-                break
-
-    if stale_log_count > 0:
-        confidence += 0.2
-        signals.append(f"Found {stale_log_count} stale element references in logs")
-
-    # Check if DOM was changing rapidly (indicates dynamic page)
-    if metrics_history and len(metrics_history) >= 3:
-        dom_counts = [m.get('dom_nodes') for m in metrics_history if m.get('dom_nodes')]
-        if len(dom_counts) >= 3:
-            # Calculate variance
-            avg = sum(dom_counts) / len(dom_counts)
-            variance = sum((x - avg) ** 2 for x in dom_counts) / len(dom_counts)
-            std_dev = variance ** 0.5
-            # High variance indicates rapidly changing DOM
-            if std_dev > 1000:
-                confidence += 0.2
-                signals.append(f"High DOM variability (std dev: {std_dev:.0f})")
-
-    description = "; ".join(signals) if signals else "No stale element signals detected"
-    return min(confidence, 1.0), description
-
-
-def analyze_crash(crash_report: Dict) -> CrashAnalysis:
-    """
-    Analyze a crash report to determine the most likely crash pattern.
-
-    Examines error_message, metrics_history, and logs_before_crash to
-    calculate confidence scores for each crash pattern type.
-
-    Args:
-        crash_report: Dictionary containing:
-            - error_message: str - The exception message
-            - metrics_history: List[Dict] - Sampled metrics with timestamp_ms, memory_mb, dom_nodes
-            - logs_before_crash: List[Dict] - Recent log entries before the crash
-            - state: Optional[Dict] - Scraper state (reviews_extracted, scroll_count, etc.)
-            - crash_type: Optional[str] - Basic crash classification from classify_crash()
-
-    Returns:
-        CrashAnalysis with the highest-confidence pattern match
-    """
-    # Extract data from crash report
-    error_message = crash_report.get('error_message', '')
-    metrics_history = crash_report.get('metrics_history', [])
-    logs = crash_report.get('logs_before_crash', [])
-    state = crash_report.get('state', {})
-    basic_type = crash_report.get('crash_type', 'unknown')
-
-    # Run all pattern checks
-    pattern_results = {}
-
-    # Memory exhaustion
-    conf, desc = _check_memory_exhaustion(error_message, metrics_history, logs)
-    pattern_results['memory_exhaustion'] = (conf, desc)
-
-    # DOM bloat
-    conf, desc = _check_dom_bloat(error_message, metrics_history, logs)
-    pattern_results['dom_bloat'] = (conf, desc)
-
-    # Rate limited
-    conf, desc = _check_rate_limited(error_message, metrics_history, logs)
-    pattern_results['rate_limited'] = (conf, desc)
-
-    # Consent loop
-    conf, desc = _check_consent_loop(error_message, metrics_history, logs)
-    pattern_results['consent_loop'] = (conf, desc)
-
-    # Scroll timeout
-    conf, desc = _check_scroll_timeout(error_message, metrics_history, logs, state)
-    pattern_results['scroll_timeout'] = (conf, desc)
-
-    # Element stale
-    conf, desc = _check_element_stale(error_message, metrics_history, logs)
-    pattern_results['element_stale'] = (conf, desc)
-
-    # Find the pattern with highest confidence
-    best_pattern = max(pattern_results.items(), key=lambda x: x[1][0])
-    pattern_name = best_pattern[0]
-    confidence = best_pattern[1][0]
-    description = best_pattern[1][1]
-
-    # If confidence is too low, fall back to basic classification
-    if confidence < 0.2:
-        # Map basic crash types to our patterns
-        basic_to_pattern = {
-            'memory_exhaustion': 'memory_exhaustion',
-            'tab_crash': 'memory_exhaustion',  # Tab crashes often from memory
-            'timeout': 'scroll_timeout',
-            'element_not_found': 'element_stale',
-            'rate_limited': 'rate_limited',
-            'network_failure': 'rate_limited',  # Could be blocking
-        }
-
-        if basic_type in basic_to_pattern:
-            pattern_name = basic_to_pattern[basic_type]
-            confidence = 0.3  # Low confidence fallback
-            description = f"Inferred from basic crash type '{basic_type}'"
-        else:
-            pattern_name = 'unknown'
-            confidence = 0.0
-            description = f"Unable to determine crash pattern (basic type: {basic_type})"
-
-    # Generate suggested fix based on pattern
-    suggested_fixes = {
-        'memory_exhaustion': (
-            "Reduce batch size and restart browser more frequently. "
-            "Consider limiting max_reviews to 500 and restarting browser after every 200 reviews."
-        ),
-        'dom_bloat': (
-            "Enable DOM cleanup during scrolling. "
-            "Hide processed review cards and remove separator elements to keep DOM light."
-        ),
-        'rate_limited': (
-            "Increase delays between requests and consider rotating proxies. "
-            "Double the delay multiplier and switch to a different proxy if available."
-        ),
-        'consent_loop': (
-            "Skip consent handling after initial attempt to avoid infinite loops. "
-            "The consent popup may be appearing due to cookie clearing or navigation issues."
-        ),
-        'scroll_timeout': (
-            "The page may have stopped loading new reviews. "
-            "Try reducing the target review count by 10% and accepting partial results."
-        ),
-        'element_stale': (
-            "Page elements are being removed/replaced during scraping. "
-            "Retry operations with freshly-located elements and add defensive waits."
-        ),
-        'unknown': (
-            "Unable to determine specific crash cause. "
-            "Review logs and consider restarting with fresh browser session."
-        )
-    }
-
-    suggested_fix = suggested_fixes.get(pattern_name, suggested_fixes['unknown'])
-    auto_fix_params = AUTO_FIX_PARAMS.get(pattern_name)
-
-    return CrashAnalysis(
-        pattern=pattern_name,
-        confidence=confidence,
-        description=description,
-        suggested_fix=suggested_fix,
-        auto_fix_params=auto_fix_params
-    )
-
-
-def get_auto_fix_params(pattern: str) -> Optional[Dict[str, Any]]:
-    """
-    Get auto-fix parameters for a specific crash pattern.
-
-    Args:
-        pattern: The crash pattern name
-
-    Returns:
-        Dictionary of auto-fix parameters, or None if pattern not recognized
-    """
-    return AUTO_FIX_PARAMS.get(pattern)
-
-
-def apply_auto_fix(pattern: str, current_params: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Apply auto-fix parameters to current scraper parameters.
-
-    Args:
-        pattern: The crash pattern name
-        current_params: Current scraper parameters to modify
-
-    Returns:
-        Updated parameters dictionary with fixes applied
-    """
-    fix_params = AUTO_FIX_PARAMS.get(pattern, {})
-    updated = current_params.copy()
-
-    for key, value in fix_params.items():
-        if key == 'target_reviews' and value == 'current - 10%':
-            # Special case: reduce target by 10%
-            current_target = updated.get('max_reviews', 1000)
-            updated['max_reviews'] = int(current_target * 0.9)
-        elif key == 'delay_multiplier':
-            # Multiply existing delay
-            current_delay = updated.get('scroll_delay', 1.0)
-            updated['scroll_delay'] = current_delay * value
-        else:
-            updated[key] = value
-
-    return updated
-
-
-def summarize_crash_patterns(crash_reports: List[Dict]) -> Dict[str, Any]:
-    """
-    Analyze multiple crash reports to identify recurring patterns.
-
-    Args:
-        crash_reports: List of crash report dictionaries
-
-    Returns:
-        Summary dictionary with pattern frequencies and recommendations
-    """
-    if not crash_reports:
-        return {
-            'total_crashes': 0,
-            'patterns': {},
-            'most_common': None,
-            'recommendations': []
-        }
-
-    pattern_counts: Dict[str, int] = {}
-    pattern_confidences: Dict[str, List[float]] = {}
-
-    for report in crash_reports:
-        analysis = analyze_crash(report)
-        pattern = analysis.pattern
-
-        pattern_counts[pattern] = pattern_counts.get(pattern, 0) + 1
-        if pattern not in pattern_confidences:
-            pattern_confidences[pattern] = []
-        pattern_confidences[pattern].append(analysis.confidence)
-
-    # Calculate average confidence per pattern
-    patterns_summary = {}
-    for pattern, count in pattern_counts.items():
-        avg_confidence = sum(pattern_confidences[pattern]) / len(pattern_confidences[pattern])
-        patterns_summary[pattern] = {
-            'count': count,
-            'percentage': count / len(crash_reports) * 100,
-            'avg_confidence': avg_confidence
-        }
-
-    # Find most common pattern
-    most_common = max(pattern_counts.items(), key=lambda x: x[1])[0] if pattern_counts else None
-
-    # Generate recommendations
-    recommendations = []
-    for pattern, stats in sorted(patterns_summary.items(), key=lambda x: x[1]['count'], reverse=True):
-        if stats['count'] >= 2:  # Only recommend for recurring patterns
-            fix_params = AUTO_FIX_PARAMS.get(pattern)
-            if fix_params:
-                recommendations.append({
-                    'pattern': pattern,
-                    'occurrences': stats['count'],
-                    'auto_fix_params': fix_params
-                })
-
-    return {
-        'total_crashes': len(crash_reports),
-        'patterns': patterns_summary,
-        'most_common': most_common,
-        'recommendations': recommendations
-    }
--- a/modules/database.py
+++ b/modules/database.py
@@ -1,882 +0,0 @@
-#!/usr/bin/env python3
-"""
-PostgreSQL database module for production microservice.
-Stores job metadata and reviews as JSONB.
-"""
-import asyncpg
-import json
-from datetime import datetime
-from typing import Optional, List, Dict, Any
-from uuid import UUID, uuid4
-from enum import Enum
-import logging
-
-log = logging.getLogger(__name__)
-
-
-class JobStatus(str, Enum):
-    """Job status enumeration"""
-    PENDING = "pending"
-    RUNNING = "running"
-    COMPLETED = "completed"
-    FAILED = "failed"
-    CANCELLED = "cancelled"
-    PARTIAL = "partial"  # Job crashed but has partial reviews saved
-
-
-class DatabaseManager:
-    """PostgreSQL database manager with connection pooling"""
-
-    def __init__(self, database_url: str):
-        """
-        Initialize database manager.
-
-        Args:
-            database_url: PostgreSQL connection URL
-                         Format: postgresql://user:password@host:port/database
-        """
-        self.database_url = database_url
-        self.pool: Optional[asyncpg.Pool] = None
-
-    async def connect(self):
-        """Create connection pool"""
-        log.info("Connecting to PostgreSQL database...")
-        self.pool = await asyncpg.create_pool(
-            self.database_url,
-            min_size=5,
-            max_size=20,
-            command_timeout=60
-        )
-        log.info("Database connection pool created")
-
-    async def disconnect(self):
-        """Close connection pool"""
-        if self.pool:
-            await self.pool.close()
-            log.info("Database connection pool closed")
-
-    async def initialize_schema(self):
-        """Create database schema if it doesn't exist"""
-        async with self.pool.acquire() as conn:
-            # Create jobs table
-            await conn.execute("""
-                CREATE TABLE IF NOT EXISTS jobs (
-                    job_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-                    status VARCHAR(20) NOT NULL DEFAULT 'pending',
-                    url TEXT NOT NULL,
-                    webhook_url TEXT,
-                    webhook_secret TEXT,
-
-                    created_at TIMESTAMP NOT NULL DEFAULT NOW(),
-                    started_at TIMESTAMP,
-                    completed_at TIMESTAMP,
-                    updated_at TIMESTAMP,
-
-                    reviews_count INTEGER,
-                    total_reviews INTEGER,
-                    reviews_data JSONB,
-                    scrape_time REAL,
-
-                    error_message TEXT,
-                    metadata JSONB,
-                    scrape_logs JSONB,
-
-                    CONSTRAINT valid_status CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled', 'partial'))
-                );
-            """)
-
-            # Add scrape_logs column if it doesn't exist (for existing databases)
-            await conn.execute("""
-                ALTER TABLE jobs ADD COLUMN IF NOT EXISTS scrape_logs JSONB;
-            """)
-
-            # Add updated_at column if it doesn't exist (for incremental progress tracking)
-            await conn.execute("""
-                ALTER TABLE jobs ADD COLUMN IF NOT EXISTS updated_at TIMESTAMP;
-            """)
-
-            # Add review_topics column if it doesn't exist (extracted topic filters with mention counts)
-            await conn.execute("""
-                ALTER TABLE jobs ADD COLUMN IF NOT EXISTS review_topics JSONB;
-            """)
-
-            # Update constraint to include 'partial' status (for existing databases)
-            await conn.execute("""
-                ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_status;
-            """)
-            await conn.execute("""
-                ALTER TABLE jobs ADD CONSTRAINT valid_status CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled', 'partial'));
-            """)
-
-            # Create indexes
-            await conn.execute("""
-                CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status);
-            """)
-            await conn.execute("""
-                CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at DESC);
-            """)
-            await conn.execute("""
-                CREATE INDEX IF NOT EXISTS idx_jobs_webhook ON jobs(webhook_url) WHERE webhook_url IS NOT NULL;
-            """)
-
-            # Create canary results table
-            await conn.execute("""
-                CREATE TABLE IF NOT EXISTS canary_results (
-                    id SERIAL PRIMARY KEY,
-                    timestamp TIMESTAMP NOT NULL DEFAULT NOW(),
-                    success BOOLEAN NOT NULL,
-                    reviews_count INTEGER,
-                    scrape_time REAL,
-                    error_message TEXT,
-                    metadata JSONB
-                );
-            """)
-
-            await conn.execute("""
-                CREATE INDEX IF NOT EXISTS idx_canary_timestamp ON canary_results(timestamp DESC);
-            """)
-
-            # Create webhook attempts table (for retry tracking)
-            await conn.execute("""
-                CREATE TABLE IF NOT EXISTS webhook_attempts (
-                    id SERIAL PRIMARY KEY,
-                    job_id UUID NOT NULL REFERENCES jobs(job_id) ON DELETE CASCADE,
-                    attempt_number INTEGER NOT NULL,
-                    timestamp TIMESTAMP NOT NULL DEFAULT NOW(),
-                    success BOOLEAN NOT NULL,
-                    status_code INTEGER,
-                    error_message TEXT,
-                    response_time_ms REAL
-                );
-            """)
-
-            await conn.execute("""
-                CREATE INDEX IF NOT EXISTS idx_webhook_job_id ON webhook_attempts(job_id);
-            """)
-
-            # Add session_fingerprint and metrics_history columns to jobs table
-            await conn.execute("""
-                ALTER TABLE jobs ADD COLUMN IF NOT EXISTS session_fingerprint JSONB;
-            """)
-            await conn.execute("""
-                ALTER TABLE jobs ADD COLUMN IF NOT EXISTS metrics_history JSONB;
-            """)
-
-            # Create crash_reports table
-            await conn.execute("""
-                CREATE TABLE IF NOT EXISTS crash_reports (
-                    crash_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-                    job_id UUID REFERENCES jobs(job_id) ON DELETE CASCADE,
-                    created_at TIMESTAMP NOT NULL DEFAULT NOW(),
-                    crash_type VARCHAR(50) NOT NULL,
-                    error_message TEXT,
-                    state JSONB NOT NULL,
-                    metrics_history JSONB,
-                    logs_before_crash JSONB,
-                    analysis JSONB,
-                    screenshot_url TEXT,
-                    dom_snapshot_id UUID
-                );
-            """)
-
-            await conn.execute("""
-                CREATE INDEX IF NOT EXISTS idx_crash_reports_job ON crash_reports(job_id);
-            """)
-            await conn.execute("""
-                CREATE INDEX IF NOT EXISTS idx_crash_reports_type ON crash_reports(crash_type);
-            """)
-            await conn.execute("""
-                CREATE INDEX IF NOT EXISTS idx_crash_reports_created ON crash_reports(created_at DESC);
-            """)
-
-            log.info("Database schema initialized")
-
-    # ==================== Job Operations ====================
-
-    async def create_job(
-        self,
-        url: str,
-        webhook_url: Optional[str] = None,
-        webhook_secret: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None
-    ) -> UUID:
-        """
-        Create a new scraping job.
-
-        Args:
-            url: Google Maps URL to scrape
-            webhook_url: Optional webhook URL for notifications
-            webhook_secret: Optional secret for webhook signature
-            metadata: Optional additional metadata
-
-        Returns:
-            UUID of created job
-        """
-        async with self.pool.acquire() as conn:
-            job_id = await conn.fetchval("""
-                INSERT INTO jobs (url, webhook_url, webhook_secret, metadata)
-                VALUES ($1, $2, $3, $4)
-                RETURNING job_id
-            """, url, webhook_url, webhook_secret, json.dumps(metadata) if metadata else None)
-
-            log.info(f"Created job {job_id} for URL: {url[:80]}...")
-            return job_id
-
-    async def get_job(self, job_id: UUID) -> Optional[Dict[str, Any]]:
-        """
-        Get job by ID.
-
-        Args:
-            job_id: Job UUID
-
-        Returns:
-            Job dictionary or None if not found
-        """
-        async with self.pool.acquire() as conn:
-            row = await conn.fetchrow("""
-                SELECT
-                    job_id,
-                    status,
-                    url,
-                    webhook_url,
-                    created_at,
-                    started_at,
-                    completed_at,
-                    updated_at,
-                    reviews_count,
-                    total_reviews,
-                    reviews_data,
-                    scrape_time,
-                    error_message,
-                    metadata,
-                    scrape_logs,
-                    review_topics
-                FROM jobs
-                WHERE job_id = $1
-            """, job_id)
-
-            if not row:
-                return None
-
-            return dict(row)
-
-    async def get_job_reviews(self, job_id: UUID, include_partial: bool = True) -> Optional[List[Dict[str, Any]]]:
-        """
-        Get reviews for a specific job.
-
-        Args:
-            job_id: Job UUID
-            include_partial: If True, also return reviews for running and partial jobs
-
-        Returns:
-            List of reviews or None if not found/no reviews
-        """
-        async with self.pool.acquire() as conn:
-            if include_partial:
-                # Return reviews for completed, running, or partial jobs
-                reviews_data = await conn.fetchval("""
-                    SELECT reviews_data
-                    FROM jobs
-                    WHERE job_id = $1 AND status IN ('completed', 'running', 'partial')
-                """, job_id)
-            else:
-                # Only return reviews for completed jobs
-                reviews_data = await conn.fetchval("""
-                    SELECT reviews_data
-                    FROM jobs
-                    WHERE job_id = $1 AND status = 'completed'
-                """, job_id)
-
-            if not reviews_data:
-                return None
-
-            # asyncpg returns JSONB as string, need to parse it
-            if isinstance(reviews_data, str):
-                return json.loads(reviews_data)
-
-            return reviews_data
-
-    async def update_job_status(
-        self,
-        job_id: UUID,
-        status: JobStatus,
-        **kwargs
-    ):
-        """
-        Update job status and optional fields.
-
-        Args:
-            job_id: Job UUID
-            status: New status
-            **kwargs: Additional fields to update (started_at, completed_at, error_message, etc.)
-        """
-        # Build dynamic UPDATE query
-        set_clauses = ["status = $2"]
-        params = [job_id, status.value]
-        param_idx = 3
-
-        if status == JobStatus.RUNNING and 'started_at' not in kwargs:
-            kwargs['started_at'] = datetime.now()
-        elif status in [JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED] and 'completed_at' not in kwargs:
-            kwargs['completed_at'] = datetime.now()
-
-        for key, value in kwargs.items():
-            # Handle JSONB fields specially
-            if key == 'scrape_logs' and value is not None:
-                set_clauses.append(f"{key} = ${param_idx}::jsonb")
-                params.append(json.dumps(value) if not isinstance(value, str) else value)
-            else:
-                set_clauses.append(f"{key} = ${param_idx}")
-                params.append(value)
-            param_idx += 1
-
-        query = f"""
-            UPDATE jobs
-            SET {', '.join(set_clauses)}
-            WHERE job_id = $1
-        """
-
-        async with self.pool.acquire() as conn:
-            await conn.execute(query, *params)
-
-    async def save_job_result(
-        self,
-        job_id: UUID,
-        reviews: List[Dict[str, Any]],
-        scrape_time: float,
-        total_reviews: Optional[int] = None,
-        scrape_logs: Optional[List[Dict[str, Any]]] = None,
-        review_topics: Optional[List[Dict[str, Any]]] = None
-    ):
-        """
-        Save scraping results to database.
-
-        Args:
-            job_id: Job UUID
-            reviews: List of review dictionaries
-            scrape_time: Time taken to scrape in seconds
-            total_reviews: Total reviews available (from page counter)
-            scrape_logs: List of log entries from the scraper
-            review_topics: List of topic filter dictionaries with topic and count
-        """
-        async with self.pool.acquire() as conn:
-            # If reviews list is empty, check if job already has reviews from incremental saves
-            # This happens when flush_callback was used during scraping
-            if not reviews:
-                existing = await conn.fetchval(
-                    "SELECT reviews_count FROM jobs WHERE job_id = $1", job_id
-                )
-                if existing and existing > 0:
-                    # Job has reviews from incremental saves, don't overwrite reviews_data
-                    await conn.execute("""
-                        UPDATE jobs
-                        SET
-                            status = 'completed',
-                            completed_at = NOW(),
-                            total_reviews = COALESCE($2, total_reviews),
-                            scrape_time = $3,
-                            scrape_logs = $4::jsonb,
-                            review_topics = $5::jsonb
-                        WHERE job_id = $1
-                    """, job_id, total_reviews, scrape_time,
-                        json.dumps(scrape_logs) if scrape_logs else None,
-                        json.dumps(review_topics) if review_topics else None)
-                    log.info(f"Completed job {job_id} with {existing} reviews (from incremental saves)")
-                    return
-
-            await conn.execute("""
-                UPDATE jobs
-                SET
-                    status = 'completed',
-                    completed_at = NOW(),
-                    reviews_count = $2,
-                    total_reviews = $3,
-                    reviews_data = $4::jsonb,
-                    scrape_time = $5,
-                    scrape_logs = $6::jsonb,
-                    review_topics = $7::jsonb
-                WHERE job_id = $1
-            """, job_id, len(reviews), total_reviews, json.dumps(reviews), scrape_time,
-                json.dumps(scrape_logs) if scrape_logs else None,
-                json.dumps(review_topics) if review_topics else None)
-
-            log.info(f"Saved {len(reviews)} reviews for job {job_id}")
-
-    async def save_reviews_incremental(
-        self,
-        job_id: UUID,
-        reviews: List[Dict[str, Any]],
-        total_reviews: Optional[int] = None
-    ):
-        """
-        Save reviews incrementally during scraping.
-        Called on each flush to preserve progress in case of crash.
-
-        Args:
-            job_id: Job UUID
-            reviews: ALL reviews collected so far (not just new ones)
-            total_reviews: Total reviews available (from page counter)
-        """
-        async with self.pool.acquire() as conn:
-            await conn.execute("""
-                UPDATE jobs
-                SET
-                    reviews_count = $2,
-                    total_reviews = COALESCE($3, total_reviews),
-                    reviews_data = $4::jsonb,
-                    updated_at = NOW()
-                WHERE job_id = $1 AND status = 'running'
-            """, job_id, len(reviews), total_reviews, json.dumps(reviews))
-
-            log.debug(f"Incremental save: {len(reviews)} reviews for job {job_id}")
-
-    async def update_session_fingerprint(
-        self,
-        job_id: UUID,
-        session_fingerprint: Dict[str, Any]
-    ):
-        """
-        Update the session fingerprint for a job.
-
-        This should be called early in the scraping process after the browser
-        fingerprint is captured, to record browser characteristics for
-        bot detection analysis.
-
-        Args:
-            job_id: Job UUID
-            session_fingerprint: Dictionary containing browser fingerprint data:
-                - user_agent: Browser user agent string
-                - platform: OS platform
-                - language: Primary language
-                - languages: List of accepted languages
-                - timezone: Timezone string
-                - screen: {width, height, colorDepth}
-                - viewport: {width, height}
-                - webgl_vendor: WebGL vendor string
-                - webgl_renderer: WebGL renderer string
-                - canvas_fingerprint: Canvas fingerprint hash
-                - hardware_concurrency: Number of CPU cores
-                - device_memory: Device memory in GB
-                - bot_detection_tests: {webdriver_hidden, chrome_runtime, permissions_query}
-                - captured_at: ISO timestamp when fingerprint was captured
-        """
-        async with self.pool.acquire() as conn:
-            await conn.execute("""
-                UPDATE jobs
-                SET
-                    session_fingerprint = $2::jsonb,
-                    updated_at = NOW()
-                WHERE job_id = $1
-            """, job_id, json.dumps(session_fingerprint))
-
-            log.debug(f"Updated session fingerprint for job {job_id}")
-
-    async def mark_job_partial(
-        self,
-        job_id: UUID,
-        error_message: str,
-        scrape_logs: Optional[List[Dict[str, Any]]] = None
-    ):
-        """
-        Mark a job as partial (crashed but has some reviews saved).
-
-        Args:
-            job_id: Job UUID
-            error_message: Error that caused the crash
-            scrape_logs: Log entries from the scraper
-        """
-        async with self.pool.acquire() as conn:
-            await conn.execute("""
-                UPDATE jobs
-                SET
-                    status = 'partial',
-                    completed_at = NOW(),
-                    error_message = $2,
-                    scrape_logs = $3::jsonb
-                WHERE job_id = $1
-            """, job_id, error_message, json.dumps(scrape_logs) if scrape_logs else None)
-
-            log.info(f"Marked job {job_id} as partial due to: {error_message}")
-
-    async def list_jobs(
-        self,
-        status: Optional[JobStatus] = None,
-        limit: int = 100,
-        offset: int = 0
-    ) -> List[Dict[str, Any]]:
-        """
-        List jobs with optional filtering.
-
-        Args:
-            status: Optional status filter
-            limit: Maximum number of jobs to return
-            offset: Number of jobs to skip
-
-        Returns:
-            List of job dictionaries
-        """
-        async with self.pool.acquire() as conn:
-            if status:
-                rows = await conn.fetch("""
-                    SELECT
-                        job_id,
-                        status,
-                        url,
-                        created_at,
-                        completed_at,
-                        reviews_count,
-                        total_reviews,
-                        scrape_time,
-                        error_message,
-                        metadata,
-                        review_topics
-                    FROM jobs
-                    WHERE status = $1
-                    ORDER BY created_at DESC
-                    LIMIT $2 OFFSET $3
-                """, status.value, limit, offset)
-            else:
-                rows = await conn.fetch("""
-                    SELECT
-                        job_id,
-                        status,
-                        url,
-                        created_at,
-                        completed_at,
-                        reviews_count,
-                        total_reviews,
-                        scrape_time,
-                        error_message,
-                        metadata,
-                        review_topics
-                    FROM jobs
-                    ORDER BY created_at DESC
-                    LIMIT $1 OFFSET $2
-                """, limit, offset)
-
-            return [dict(row) for row in rows]
-
-    async def get_pending_jobs_with_webhooks(self, limit: int = 100) -> List[Dict[str, Any]]:
-        """
-        Get completed jobs that have webhooks pending delivery.
-
-        Args:
-            limit: Maximum number of jobs to return
-
-        Returns:
-            List of job dictionaries with webhook info
-        """
-        async with self.pool.acquire() as conn:
-            rows = await conn.fetch("""
-                SELECT
-                    job_id,
-                    status,
-                    url,
-                    webhook_url,
-                    webhook_secret,
-                    reviews_count,
-                    scrape_time,
-                    error_message,
-                    completed_at
-                FROM jobs
-                WHERE webhook_url IS NOT NULL
-                  AND status IN ('completed', 'failed')
-                  AND job_id NOT IN (
-                      SELECT job_id
-                      FROM webhook_attempts
-                      WHERE success = true
-                  )
-                ORDER BY completed_at ASC
-                LIMIT $1
-            """, limit)
-
-            return [dict(row) for row in rows]
-
-    async def delete_job(self, job_id: UUID) -> bool:
-        """
-        Delete a job from the database.
-
-        Args:
-            job_id: Job UUID
-
-        Returns:
-            True if deleted, False if not found
-        """
-        async with self.pool.acquire() as conn:
-            result = await conn.execute("""
-                DELETE FROM jobs WHERE job_id = $1
-            """, job_id)
-
-            deleted = result.split()[-1] == "1"
-            if deleted:
-                log.info(f"Deleted job {job_id}")
-            return deleted
-
-    async def cleanup_old_jobs(self, max_age_days: int = 30):
-        """
-        Delete old completed/failed jobs.
-
-        Args:
-            max_age_days: Maximum age in days before deletion
-        """
-        async with self.pool.acquire() as conn:
-            result = await conn.execute("""
-                DELETE FROM jobs
-                WHERE status IN ('completed', 'failed', 'cancelled')
-                  AND completed_at < NOW() - INTERVAL '%s days'
-            """, max_age_days)
-
-            deleted_count = int(result.split()[-1])
-            if deleted_count > 0:
-                log.info(f"Cleaned up {deleted_count} old jobs")
-
-    # ==================== Statistics ====================
-
-    async def get_stats(self) -> Dict[str, Any]:
-        """
-        Get job statistics.
-
-        Returns:
-            Statistics dictionary
-        """
-        async with self.pool.acquire() as conn:
-            stats = await conn.fetchrow("""
-                SELECT
-                    COUNT(*) as total_jobs,
-                    COUNT(*) FILTER (WHERE status = 'pending') as pending,
-                    COUNT(*) FILTER (WHERE status = 'running') as running,
-                    COUNT(*) FILTER (WHERE status = 'completed') as completed,
-                    COUNT(*) FILTER (WHERE status = 'failed') as failed,
-                    COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled,
-                    AVG(scrape_time) FILTER (WHERE status = 'completed') as avg_scrape_time,
-                    SUM(reviews_count) FILTER (WHERE status = 'completed') as total_reviews
-                FROM jobs
-            """)
-
-            return dict(stats)
-
-    # ==================== Canary Operations ====================
-
-    async def save_canary_result(
-        self,
-        success: bool,
-        reviews_count: Optional[int] = None,
-        scrape_time: Optional[float] = None,
-        error_message: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None
-    ):
-        """
-        Save canary test result.
-
-        Args:
-            success: Whether canary test succeeded
-            reviews_count: Number of reviews scraped
-            scrape_time: Time taken in seconds
-            error_message: Error message if failed
-            metadata: Additional metadata
-        """
-        async with self.pool.acquire() as conn:
-            await conn.execute("""
-                INSERT INTO canary_results (success, reviews_count, scrape_time, error_message, metadata)
-                VALUES ($1, $2, $3, $4, $5)
-            """, success, reviews_count, scrape_time, error_message, json.dumps(metadata) if metadata else None)
-
-    async def get_canary_history(self, limit: int = 100) -> List[Dict[str, Any]]:
-        """
-        Get canary test history.
-
-        Args:
-            limit: Maximum number of results to return
-
-        Returns:
-            List of canary result dictionaries
-        """
-        async with self.pool.acquire() as conn:
-            rows = await conn.fetch("""
-                SELECT
-                    timestamp,
-                    success,
-                    reviews_count,
-                    scrape_time,
-                    error_message
-                FROM canary_results
-                ORDER BY timestamp DESC
-                LIMIT $1
-            """, limit)
-
-            return [dict(row) for row in rows]
-
-    # ==================== Webhook Attempts ====================
-
-    async def log_webhook_attempt(
-        self,
-        job_id: UUID,
-        attempt_number: int,
-        success: bool,
-        status_code: Optional[int] = None,
-        error_message: Optional[str] = None,
-        response_time_ms: Optional[float] = None
-    ):
-        """
-        Log a webhook delivery attempt.
-
-        Args:
-            job_id: Job UUID
-            attempt_number: Attempt number (1, 2, 3...)
-            success: Whether delivery succeeded
-            status_code: HTTP status code
-            error_message: Error message if failed
-            response_time_ms: Response time in milliseconds
-        """
-        async with self.pool.acquire() as conn:
-            await conn.execute("""
-                INSERT INTO webhook_attempts (job_id, attempt_number, success, status_code, error_message, response_time_ms)
-                VALUES ($1, $2, $3, $4, $5, $6)
-            """, job_id, attempt_number, success, status_code, error_message, response_time_ms)
-
-    # ==================== Crash Reports ====================
-
-    async def save_crash_report(self, job_id: str, crash_data: dict) -> str:
-        """
-        Save a crash report and return the crash_id.
-
-        Args:
-            job_id: Job UUID as string
-            crash_data: Dictionary containing crash report data:
-                - crash_type: Type of crash (required)
-                - error_message: Error message (optional)
-                - state: Current state at crash time (required)
-                - metrics_history: Historical metrics (optional)
-                - logs_before_crash: Log entries before crash (optional)
-                - analysis: Crash analysis data (optional)
-                - screenshot_url: URL to screenshot (optional)
-                - dom_snapshot_id: UUID of DOM snapshot (optional)
-
-        Returns:
-            UUID of created crash report as string
-        """
-        async with self.pool.acquire() as conn:
-            # Convert job_id string to UUID
-            job_uuid = UUID(job_id) if isinstance(job_id, str) else job_id
-
-            crash_id = await conn.fetchval("""
-                INSERT INTO crash_reports (
-                    job_id,
-                    crash_type,
-                    error_message,
-                    state,
-                    metrics_history,
-                    logs_before_crash,
-                    analysis,
-                    screenshot_url,
-                    dom_snapshot_id
-                )
-                VALUES ($1, $2, $3, $4::jsonb, $5::jsonb, $6::jsonb, $7::jsonb, $8, $9)
-                RETURNING crash_id
-            """,
-                job_uuid,
-                crash_data.get('crash_type'),
-                crash_data.get('error_message'),
-                json.dumps(crash_data.get('state', {})),
-                json.dumps(crash_data.get('metrics_history')) if crash_data.get('metrics_history') else None,
-                json.dumps(crash_data.get('logs_before_crash')) if crash_data.get('logs_before_crash') else None,
-                json.dumps(crash_data.get('analysis')) if crash_data.get('analysis') else None,
-                crash_data.get('screenshot_url'),
-                UUID(crash_data['dom_snapshot_id']) if crash_data.get('dom_snapshot_id') else None
-            )
-
-            log.info(f"Saved crash report {crash_id} for job {job_id}, type: {crash_data.get('crash_type')}")
-            return str(crash_id)
-
-    async def get_crash_report(self, job_id: str) -> Optional[dict]:
-        """
-        Get crash report for a job, if any.
-
-        Args:
-            job_id: Job UUID as string
-
-        Returns:
-            Crash report dictionary or None if not found
-        """
-        async with self.pool.acquire() as conn:
-            job_uuid = UUID(job_id) if isinstance(job_id, str) else job_id
-
-            row = await conn.fetchrow("""
-                SELECT
-                    crash_id,
-                    job_id,
-                    created_at,
-                    crash_type,
-                    error_message,
-                    state,
-                    metrics_history,
-                    logs_before_crash,
-                    analysis,
-                    screenshot_url,
-                    dom_snapshot_id
-                FROM crash_reports
-                WHERE job_id = $1
-                ORDER BY created_at DESC
-                LIMIT 1
-            """, job_uuid)
-
-            if not row:
-                return None
-
-            result = dict(row)
-            # Convert UUIDs to strings for JSON serialization
-            result['crash_id'] = str(result['crash_id'])
-            result['job_id'] = str(result['job_id'])
-            if result.get('dom_snapshot_id'):
-                result['dom_snapshot_id'] = str(result['dom_snapshot_id'])
-
-            return result
-
-    async def get_crash_stats(self, days: int = 7) -> dict:
-        """
-        Get crash statistics for the last N days.
-
-        Args:
-            days: Number of days to look back (default: 7)
-
-        Returns:
-            Dictionary with:
-                - total: Total number of crashes
-                - by_type: Dict mapping crash type to count
-                - by_day: List of dicts with date and count
-        """
-        async with self.pool.acquire() as conn:
-            # Get total count
-            total = await conn.fetchval("""
-                SELECT COUNT(*)
-                FROM crash_reports
-                WHERE created_at >= NOW() - INTERVAL '%s days'
-            """, days)
-
-            # Get counts by type
-            type_rows = await conn.fetch("""
-                SELECT crash_type, COUNT(*) as count
-                FROM crash_reports
-                WHERE created_at >= NOW() - INTERVAL '%s days'
-                GROUP BY crash_type
-                ORDER BY count DESC
-            """, days)
-
-            by_type = {row['crash_type']: row['count'] for row in type_rows}
-
-            # Get counts by day
-            day_rows = await conn.fetch("""
-                SELECT DATE(created_at) as date, COUNT(*) as count
-                FROM crash_reports
-                WHERE created_at >= NOW() - INTERVAL '%s days'
-                GROUP BY DATE(created_at)
-                ORDER BY date DESC
-            """, days)
-
-            by_day = [{'date': str(row['date']), 'count': row['count']} for row in day_rows]
-
-            return {
-                'total': total or 0,
-                'by_type': by_type,
-                'by_day': by_day
-            }
--- a/modules/date_converter.py
+++ b/modules/date_converter.py
@@ -1,391 +0,0 @@
-"""
-Date conversion utilities for Google Maps reviews.
-"""
-
-import logging
-import re
-from datetime import datetime, timedelta
-from typing import Dict, Any, Optional
-
-# Logger
-log = logging.getLogger("scraper")
-
-
-def relative_to_datetime(date_str: str, lang: str = "en") -> Optional[datetime]:
-    """
-    Convert a relative date string to a datetime object.
-
-    Args:
-        date_str: The relative date string (e.g., "2 years ago")
-        lang: Language code ("en" or "he")
-
-    Returns:
-        datetime object or None if conversion fails
-    """
-    if not date_str:
-        return None
-
-    try:
-        # Convert to ISO format first
-        iso_date = parse_relative_date(date_str, lang)
-
-        # If original string was returned, it wasn't in the expected format
-        if iso_date == date_str:
-            return None
-
-        # Parse the ISO format into datetime
-        return datetime.fromisoformat(iso_date)
-    except Exception as e:
-        log.debug(f"Failed to convert relative date '{date_str}': {e}")
-        return None
-
-
-class DateConverter:
-    """Handler for converting string dates to datetime objects in MongoDB"""
-
-    @staticmethod
-    def convert_dates_in_document(doc: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Convert string dates to datetime objects in a document.
-
-        Args:
-            doc: MongoDB document with string dates
-
-        Returns:
-            Document with string dates converted to datetime objects
-        """
-        # Remove the original date string field if it exists
-        if "date" in doc:
-            original_date = doc.pop("date")
-
-            # Try to use the original date to fix review_date if needed
-            if "review_date" not in doc or not doc["review_date"]:
-                lang = next(iter(doc.get("description", {}).keys()), "en")
-                date_obj = relative_to_datetime(original_date, lang)
-                if date_obj:
-                    doc["review_date"] = date_obj
-
-        # Fields that should be converted to dates
-        date_fields = ["created_date", "last_modified_date", "review_date"]
-
-        # Convert date fields to datetime
-        for field in date_fields:
-            if field in doc and isinstance(doc[field], str):
-                try:
-                    # Try to parse as ISO format first
-                    doc[field] = datetime.fromisoformat(doc[field].replace('Z', '+00:00'))
-                except (ValueError, TypeError):
-                    # If that fails, try parsing as relative date
-                    lang = next(iter(doc.get("description", {}).keys()), "en")
-                    date_obj = relative_to_datetime(doc[field], lang)
-                    if date_obj:
-                        doc[field] = date_obj
-
-        # Handle nested date fields in owner_responses
-        if "owner_responses" in doc and isinstance(doc["owner_responses"], dict):
-            for lang, response in doc["owner_responses"].items():
-                if isinstance(response, dict) and "date" in response:
-                    # Remove the date string field from owner responses
-                    del response["date"]
-
-        return doc
-
-    @staticmethod
-    def convert_dates_in_reviews(reviews: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
-        """
-        Convert string dates to datetime objects for all reviews.
-
-        Args:
-            reviews: Dictionary of review documents
-
-        Returns:
-            Reviews with dates converted to datetime objects
-        """
-        log.info("Converting string dates to datetime objects...")
-
-        for review_id, review in reviews.items():
-            reviews[review_id] = DateConverter.convert_dates_in_document(review)
-
-        return reviews
-
-
-def parse_relative_date(date_str: str, lang: str, now: Optional[datetime] = None) -> str:
-    """
-    Converts a relative review_date (in English or Hebrew) such as "a week ago" or "לפני 7 שנים"
-    into an ISO formatted datetime string (UTC).
-
-    For English, supported formats include:
-       - "a day ago", "an hour ago", "3 weeks ago", "4 months ago", "2 years ago", etc.
-    For Hebrew, supported formats include:
-       - "לפני יום", "לפני 2 ימים", "לפני שבוע", "לפני שבועיים", "לפני חודש",
-         "לפני חודשיים", "לפני 10 חודשים", "לפני שנה", "לפני 3 שנים", etc.
-
-    Parameters:
-      - date_str (str): the relative date string.
-      - lang (str): "en" for English or "he" for Hebrew.
-      - now (Optional[datetime]): reference datetime; if None, current local time is used.
-
-    Returns:
-      A string representing the calculated absolute datetime in ISO 8601 format.
-      If parsing fails in all supported languages, returns a random date within the last year.
-    """
-    import random
-
-    if now is None:
-        now = datetime.utcnow()  # use UTC for consistency
-
-    # Try with the provided language first
-    result = try_parse_date(date_str, lang, now)
-    if result != date_str:
-        return result
-
-    # If the provided language failed, try other supported languages
-    supported_langs = ["en", "he", "th"]
-    for alt_lang in supported_langs:
-        if alt_lang != lang.lower():
-            result = try_parse_date(date_str, alt_lang, now)
-            if result != date_str:
-                return result
-
-    # If all parsing attempts failed, generate a random date within the last year
-    # This creates a date between 1 day ago and 365 days ago
-    random_days_ago = random.randint(1, 365)
-    random_date = now - timedelta(days=random_days_ago)
-    return random_date.isoformat()
-
-
-def try_parse_date(date_str: str, lang: str, now: datetime) -> str:
-    """
-    Helper function that attempts to parse a date string in a specific language.
-
-    Returns the ISO formatted date if successful, or the original string if not.
-    """
-    delta = timedelta(0)
-    parsed = False
-
-    if lang.lower() == "en":
-        # Pattern: capture number or "a"/"an", then unit.
-        pattern = re.compile(r'(?P<num>a|an|\d+)\s+(?P<unit>day|week|month|year)s?\s+ago', re.IGNORECASE)
-        m = pattern.search(date_str)
-        if m:
-            num_str = m.group("num").lower()
-            num = 1 if num_str in ("a", "an") else int(num_str)
-            unit = m.group("unit").lower()
-            if unit == "day":
-                delta = timedelta(days=num)
-            elif unit == "week":
-                delta = timedelta(weeks=num)
-            elif unit == "month":
-                delta = timedelta(days=30 * num)  # approximate
-            elif unit == "year":
-                delta = timedelta(days=365 * num)  # approximate
-            parsed = True
-    elif lang.lower() == "he":
-        # Remove the "לפני" prefix if present
-        text = date_str.strip()
-        if text.startswith("לפני"):
-            text = text[len("לפני"):].strip()
-
-        # Handle special cases where the number and unit are combined:
-        special = {
-            "חודשיים": (2, "month"),
-            "שבועיים": (2, "week"),
-            "יומיים": (2, "day"),
-        }
-        if text in special:
-            num, unit = special[text]
-            if unit == "day":
-                delta = timedelta(days=num)
-            elif unit == "week":
-                delta = timedelta(weeks=num)
-            elif unit == "month":
-                delta = timedelta(days=30 * num)  # approximate
-            parsed = True
-        else:
-            # Match optional number (or assume 1) and then a unit.
-            pattern = re.compile(r'(?P<num>\d+|אחד|אחת)?\s*(?P<unit>שנה|שנים|חודש|חודשים|יום|ימים|שבוע|שבועות)',
-                                 re.IGNORECASE)
-            m = pattern.search(text)
-            if m:
-                num_str = m.group("num")
-                if not num_str:
-                    num = 1
-                else:
-                    try:
-                        num = int(num_str)
-                    except ValueError:
-                        num = 1
-                unit_he = m.group("unit")
-                # Map the Hebrew unit (both singular and plural) to English unit names
-                if unit_he in ("יום", "ימים"):
-                    unit = "day"
-                elif unit_he in ("שבוע", "שבועות"):
-                    unit = "week"
-                elif unit_he in ("חודש", "חודשים"):
-                    unit = "month"
-                elif unit_he in ("שנה", "שנים"):
-                    unit = "year"
-                else:
-                    unit = "day"  # fallback
-
-                if unit == "day":
-                    delta = timedelta(days=num)
-                elif unit == "week":
-                    delta = timedelta(weeks=num)
-                elif unit == "month":
-                    delta = timedelta(days=30 * num)  # approximate
-                elif unit == "year":
-                    delta = timedelta(days=365 * num)  # approximate
-                parsed = True
-    elif lang.lower() == "th":
-        # Thai language patterns (simplified)
-        # Check for Thai patterns like "3 วันที่แล้ว" (3 days ago)
-        thai_pattern = re.compile(r'(?P<num>\d+)?\s*(?P<unit>วัน|สัปดาห์|เดือน|ปี)ที่แล้ว', re.IGNORECASE)
-        m = thai_pattern.search(date_str)
-        if m:
-            num_str = m.group("num")
-            num = 1 if not num_str else int(num_str)
-            unit_th = m.group("unit")
-
-            # Map Thai units to English
-            if unit_th == "วัน":
-                unit = "day"
-            elif unit_th == "สัปดาห์":
-                unit = "week"
-            elif unit_th == "เดือน":
-                unit = "month"
-            elif unit_th == "ปี":
-                unit = "year"
-            else:
-                unit = "day"  # fallback
-
-            if unit == "day":
-                delta = timedelta(days=num)
-            elif unit == "week":
-                delta = timedelta(weeks=num)
-            elif unit == "month":
-                delta = timedelta(days=30 * num)  # approximate
-            elif unit == "year":
-                delta = timedelta(days=365 * num)  # approximate
-            parsed = True
-
-    # Return the calculated date if parsing was successful, otherwise return the original string
-    if parsed:
-        result = now - delta
-        return result.isoformat()
-    else:
-        return date_str
-
-
-# def parse_relative_date(date_str: str, lang: str, now: Optional[datetime] = None) -> str:
-#     """
-#     Converts a relative review_date (in English or Hebrew) such as "a week ago" or "לפני 7 שנים"
-#     into an ISO formatted datetime string (UTC).
-#
-#     For English, supported formats include:
-#        - "a day ago", "an hour ago", "3 weeks ago", "4 months ago", "2 years ago", etc.
-#     For Hebrew, supported formats include:
-#        - "לפני יום", "לפני 2 ימים", "לפני שבוע", "לפני שבועיים", "לפני חודש",
-#          "לפני חודשיים", "לפני 10 חודשים", "לפני שנה", "לפני 3 שנים", etc.
-#
-#     Parameters:
-#       - date_str (str): the relative date string.
-#       - lang (str): "en" for English or "he" for Hebrew.
-#       - now (Optional[datetime]): reference datetime; if None, current local time is used.
-#
-#     Returns:
-#       A string representing the calculated absolute datetime in ISO 8601 format,
-#       or the original date_str if parsing fails.
-#     """
-#     if now is None:
-#         now = datetime.utcnow()  # use UTC for consistency
-#
-#     delta = timedelta(0)
-#
-#     if lang.lower() == "en":
-#         # Pattern: capture number or "a"/"an", then unit.
-#         pattern = re.compile(r'(?P<num>a|an|\d+)\s+(?P<unit>day|week|month|year)s?\s+ago', re.IGNORECASE)
-#         m = pattern.search(date_str)
-#         if m:
-#             num_str = m.group("num").lower()
-#             num = 1 if num_str in ("a", "an") else int(num_str)
-#             unit = m.group("unit").lower()
-#             if unit == "day":
-#                 delta = timedelta(days=num)
-#             elif unit == "week":
-#                 delta = timedelta(weeks=num)
-#             elif unit == "month":
-#                 delta = timedelta(days=30 * num)  # approximate
-#             elif unit == "year":
-#                 delta = timedelta(days=365 * num)  # approximate
-#         else:
-#             return date_str  # return original if not matched
-#     elif lang.lower() == "he":
-#         # Remove the "לפני" prefix if present
-#         text = date_str.strip()
-#         if text.startswith("לפני"):
-#             text = text[len("לפני"):].strip()
-#
-#         # Handle special cases where the number and unit are combined:
-#         special = {
-#             "חודשיים": (2, "month"),
-#             "שבועיים": (2, "week"),
-#             "יומיים": (2, "day"),
-#         }
-#         if text in special:
-#             num, unit = special[text]
-#         else:
-#             # Match optional number (or assume 1) and then a unit.
-#             pattern = re.compile(r'(?P<num>\d+|אחד|אחת)?\s*(?P<unit>שנה|שנים|חודש|חודשים|יום|ימים|שבוע|שבועות)',
-#                                  re.IGNORECASE)
-#             m = pattern.search(text)
-#             if m:
-#                 num_str = m.group("num")
-#                 if not num_str:
-#                     num = 1
-#                 else:
-#                     try:
-#                         num = int(num_str)
-#                     except ValueError:
-#                         num = 1
-#                 unit_he = m.group("unit")
-#                 # Map the Hebrew unit (both singular and plural) to English unit names
-#                 if unit_he in ("יום", "ימים"):
-#                     unit = "day"
-#                 elif unit_he in ("שבוע", "שבועות"):
-#                     unit = "week"
-#                 elif unit_he in ("חודש", "חודשים"):
-#                     unit = "month"
-#                 elif unit_he in ("שנה", "שנים"):
-#                     unit = "year"
-#                 else:
-#                     unit = "day"  # fallback
-#             else:
-#                 return date_str  # if nothing matches, return original text
-#
-#         if unit == "day":
-#             delta = timedelta(days=num)
-#         elif unit == "week":
-#             delta = timedelta(weeks=num)
-#         elif unit == "month":
-#             delta = timedelta(days=30 * num)  # approximate
-#         elif unit == "year":
-#             delta = timedelta(days=365 * num)  # approximate
-#
-#     result = now - delta
-#     return result.isoformat()
-
-
-# --- Example usage ---
-if __name__ == "__main__":
-    # Fixed reference time for reproducibility:
-    fixed_now = datetime(2025, 2, 5, 12, 0, 0)
-    examples = [
-        ("a week ago", "he"),
-        ("4 weeks ago", "en"),
-        ("לפני 7 שנים", "he"),
-        ("לפני חודשיים", "he")
-    ]
-    for text, lang in examples:
-        iso_date = parse_relative_date(text, lang, now=fixed_now)
-        print(f"Original: {text} ({lang}) => ISO: {iso_date}")
--- a/modules/health_checks.py
+++ b/modules/health_checks.py
@@ -1,411 +0,0 @@
-#!/usr/bin/env python3
-"""
-Smart health check system with canary testing.
-Verifies that scraping actually works, not just that services are up.
-"""
-import asyncio
-import logging
-from datetime import datetime, timedelta
-from typing import Dict, Any, Optional
-import os
-
-log = logging.getLogger(__name__)
-
-
-class CanaryMonitor:
-    """
-    Background canary test monitor.
-
-    Runs actual scraping tests periodically to verify the scraper works.
-    This catches issues like:
-    - Google Maps page structure changes
-    - Broken CSS selectors
-    - GDPR consent handling issues
-    - Network/proxy problems
-    - Chrome/browser issues
-    """
-
-    def __init__(
-        self,
-        db,
-        interval_hours: int = 4,
-        test_url: Optional[str] = None
-    ):
-        """
-        Initialize canary monitor.
-
-        Args:
-            db: Database manager instance
-            interval_hours: How often to run canary tests
-            test_url: Optional test URL (defaults to Soho Factory in Vilnius)
-        """
-        self.db = db
-        self.interval = timedelta(hours=interval_hours)
-        self.test_url = test_url or os.getenv(
-            'CANARY_TEST_URL',
-            'https://www.google.com/maps/place/Soho+Factory/@54.6738155,25.2595844,17z/'
-        )
-
-        self.running = False
-        self.last_run: Optional[datetime] = None
-        self.last_success: Optional[datetime] = None
-        self.consecutive_failures = 0
-        self.last_result: Optional[Dict[str, Any]] = None
-
-    async def start(self):
-        """Start the background canary monitoring"""
-        self.running = True
-        log.info(f"Canary monitor started (interval: {self.interval.total_seconds()/3600:.1f}h)")
-
-        while self.running:
-            try:
-                await self.run_canary_test()
-            except Exception as e:
-                log.error(f"Canary test failed with exception: {e}")
-                self.consecutive_failures += 1
-
-                # Alert if multiple consecutive failures
-                if self.consecutive_failures >= 3:
-                    await self.send_alert(
-                        f"🚨 CRITICAL: Scraper canary failed {self.consecutive_failures} times in a row! "
-                        f"Last error: {str(e)[:200]}"
-                    )
-
-            # Sleep until next run
-            await asyncio.sleep(self.interval.total_seconds())
-
-    def stop(self):
-        """Stop the background monitoring"""
-        self.running = False
-        log.info("Canary monitor stopped")
-
-    async def run_canary_test(self):
-        """
-        Run a single canary test.
-
-        This performs an actual scrape on a known test URL and validates:
-        - Scraping succeeds
-        - Reviews are extracted
-        - Review count is reasonable
-        - Scrape time is reasonable
-        - Data structure is valid
-        """
-        from modules.scraper_clean import fast_scrape_reviews
-
-        log.info(f"Running canary scrape test on {self.test_url[:60]}...")
-        self.last_run = datetime.now()
-
-        try:
-            # Run actual scrape with timeout
-            result = await asyncio.wait_for(
-                asyncio.to_thread(
-                    fast_scrape_reviews,
-                    url=self.test_url,
-                    headless=True,
-                    max_scrolls=10  # Limited for canary
-                ),
-                timeout=60  # Fail if takes > 60s
-            )
-
-            # Validate result
-            checks = {
-                "scrape_succeeded": result['success'],
-                "got_reviews": result['count'] > 0,
-                "reasonable_count": 10 <= result['count'] <= 500,
-                "reasonable_time": result['time'] < 30,
-                "data_structure_valid": self._validate_review_structure(result.get('reviews', []))
-            }
-
-            all_passed = all(checks.values())
-
-            if all_passed:
-                # Success!
-                log.info(
-                    f"✅ Canary test PASSED: {result['count']} reviews in {result['time']:.1f}s"
-                )
-                self.consecutive_failures = 0
-                self.last_success = datetime.now()
-                self.last_result = {
-                    "status": "pass",
-                    "reviews_count": result['count'],
-                    "scrape_time": result['time'],
-                    "checks": checks
-                }
-
-                # Save to database
-                await self.db.save_canary_result(
-                    success=True,
-                    reviews_count=result['count'],
-                    scrape_time=result['time'],
-                    metadata={"checks": checks}
-                )
-
-            else:
-                # Validation failed
-                failed_checks = [k for k, v in checks.items() if not v]
-                log.error(
-                    f"❌ Canary test FAILED: validation failed on {failed_checks}"
-                )
-                self.consecutive_failures += 1
-                self.last_result = {
-                    "status": "fail",
-                    "reviews_count": result['count'],
-                    "scrape_time": result['time'],
-                    "checks": checks,
-                    "failed_checks": failed_checks
-                }
-
-                # Save to database
-                await self.db.save_canary_result(
-                    success=False,
-                    reviews_count=result['count'],
-                    scrape_time=result['time'],
-                    error_message=f"Validation failed: {failed_checks}",
-                    metadata={"checks": checks}
-                )
-
-                # Alert on failure
-                if self.consecutive_failures >= 3:
-                    await self.send_alert(
-                        f"🚨 CRITICAL: Canary validation failed {self.consecutive_failures} times! "
-                        f"Failed checks: {failed_checks}"
-                    )
-
-        except asyncio.TimeoutError:
-            log.error("❌ Canary test TIMEOUT (>60s)")
-            self.consecutive_failures += 1
-            self.last_result = {
-                "status": "timeout",
-                "error": "Scrape took longer than 60 seconds"
-            }
-
-            await self.db.save_canary_result(
-                success=False,
-                error_message="Timeout after 60 seconds"
-            )
-
-            if self.consecutive_failures >= 3:
-                await self.send_alert(
-                    f"🚨 CRITICAL: Canary timeout {self.consecutive_failures} times!"
-                )
-
-        except Exception as e:
-            log.error(f"❌ Canary test ERROR: {e}")
-            self.consecutive_failures += 1
-            self.last_result = {
-                "status": "error",
-                "error": str(e)
-            }
-
-            await self.db.save_canary_result(
-                success=False,
-                error_message=str(e)
-            )
-
-            raise  # Re-raise to trigger alert in main loop
-
-    def _validate_review_structure(self, reviews) -> bool:
-        """
-        Validate that reviews have expected structure.
-
-        Args:
-            reviews: List of review dictionaries
-
-        Returns:
-            True if structure is valid
-        """
-        if not reviews or len(reviews) == 0:
-            return False
-
-        # Check first review has required fields
-        first_review = reviews[0]
-        required_fields = ['author', 'rating', 'date_text']
-
-        return all(field in first_review for field in required_fields)
-
-    async def send_alert(self, message: str):
-        """
-        Send alert via configured channels.
-
-        Args:
-            message: Alert message to send
-        """
-        log.critical(message)
-
-        # TODO: Integrate with alerting systems
-        # Examples:
-
-        # Slack
-        slack_webhook = os.getenv('SLACK_WEBHOOK_URL')
-        if slack_webhook:
-            try:
-                import httpx
-                async with httpx.AsyncClient() as client:
-                    await client.post(
-                        slack_webhook,
-                        json={"text": message},
-                        timeout=5.0
-                    )
-                log.info("Alert sent to Slack")
-            except Exception as e:
-                log.error(f"Failed to send Slack alert: {e}")
-
-        # Email (example with SMTP)
-        # smtp_config = os.getenv('SMTP_CONFIG')
-        # if smtp_config:
-        #     await send_email(
-        #         to=os.getenv('ALERT_EMAIL'),
-        #         subject="Scraper Canary Alert",
-        #         body=message
-        #     )
-
-        # PagerDuty
-        # pagerduty_key = os.getenv('PAGERDUTY_KEY')
-        # if pagerduty_key:
-        #     await trigger_pagerduty(message)
-
-    def get_status(self) -> Dict[str, Any]:
-        """
-        Get current canary status.
-
-        Returns:
-            Status dictionary
-        """
-        if not self.last_success:
-            return {
-                "status": "unknown",
-                "message": "No canary tests run yet",
-                "last_run": self.last_run.isoformat() if self.last_run else None
-            }
-
-        age = datetime.now() - self.last_success
-        max_age = timedelta(hours=6)  # Alert if no success in 6 hours
-
-        if age > max_age:
-            return {
-                "status": "stale",
-                "last_success": self.last_success.isoformat(),
-                "age_hours": age.total_seconds() / 3600,
-                "consecutive_failures": self.consecutive_failures,
-                "message": f"Last successful canary was {age.total_seconds()/3600:.1f} hours ago"
-            }
-
-        return {
-            "status": "healthy",
-            "last_success": self.last_success.isoformat(),
-            "last_run": self.last_run.isoformat() if self.last_run else None,
-            "age_minutes": age.total_seconds() / 60,
-            "consecutive_failures": self.consecutive_failures,
-            "last_result": self.last_result
-        }
-
-
-class HealthCheckSystem:
-    """
-    Complete health check system for production.
-
-    Provides multiple levels of health checks:
-    - Liveness: Is the server alive?
-    - Readiness: Can it handle traffic?
-    - Canary: Does scraping actually work?
-    """
-
-    def __init__(self, db):
-        """
-        Initialize health check system.
-
-        Args:
-            db: Database manager instance
-        """
-        self.db = db
-        self.canary = CanaryMonitor(db, interval_hours=4)
-
-    async def start(self):
-        """Start background health monitoring"""
-        asyncio.create_task(self.canary.start())
-
-    def stop(self):
-        """Stop background health monitoring"""
-        self.canary.stop()
-
-    async def check_liveness(self) -> Dict[str, Any]:
-        """
-        Liveness check: Is the server alive?
-
-        This is a simple check that always succeeds if the server is running.
-        Used by Kubernetes liveness probe - restart container if fails.
-
-        Returns:
-            Liveness status
-        """
-        return {
-            "status": "alive",
-            "timestamp": datetime.utcnow().isoformat()
-        }
-
-    async def check_readiness(self) -> Dict[str, Any]:
-        """
-        Readiness check: Can the server handle traffic?
-
-        Checks if dependencies are available.
-        Used by Kubernetes readiness probe - remove from load balancer if fails.
-
-        Returns:
-            Readiness status
-        """
-        checks = {}
-
-        # Check database
-        try:
-            await self.db.pool.fetchval("SELECT 1")
-            checks["database"] = {"healthy": True}
-        except Exception as e:
-            checks["database"] = {"healthy": False, "error": str(e)}
-
-        # Overall readiness
-        all_healthy = all(c.get("healthy", False) for c in checks.values())
-
-        return {
-            "status": "ready" if all_healthy else "not_ready",
-            "checks": checks,
-            "timestamp": datetime.utcnow().isoformat()
-        }
-
-    async def check_canary(self) -> Dict[str, Any]:
-        """
-        Canary check: Does scraping actually work?
-
-        Returns the latest canary test result.
-        Used by external monitoring (PagerDuty, DataDog) for alerts.
-
-        Returns:
-            Canary status
-        """
-        return self.canary.get_status()
-
-    async def get_detailed_health(self) -> Dict[str, Any]:
-        """
-        Get detailed health status of all components.
-
-        Returns:
-            Complete health status
-        """
-        liveness = await self.check_liveness()
-        readiness = await self.check_readiness()
-        canary = await self.check_canary()
-
-        overall_healthy = (
-            liveness["status"] == "alive" and
-            readiness["status"] == "ready" and
-            canary["status"] in ["healthy", "unknown"]  # Unknown is OK (first run)
-        )
-
-        return {
-            "status": "healthy" if overall_healthy else "degraded",
-            "components": {
-                "liveness": liveness,
-                "readiness": readiness,
-                "canary": canary
-            },
-            "timestamp": datetime.utcnow().isoformat()
-        }
--- a/modules/models.py
+++ b/modules/models.py
@@ -1,93 +0,0 @@
-"""
-Data models for Google Maps Reviews Scraper.
-"""
-import re
-from dataclasses import dataclass, field
-
-from selenium.webdriver.remote.webelement import WebElement
-
-from modules.utils import (try_find, first_text, first_attr, safe_int, detect_lang, parse_date_to_iso)
-
-
-@dataclass
-class RawReview:
-    """
-    Data class representing a raw review extracted from Google Maps.
-    """
-    id: str = ""
-    author: str = ""
-    rating: float = 0.0
-    date: str = ""
-    lang: str = "und"
-    text: str = ""
-    likes: int = 0
-    photos: list[str] = field(default_factory=list)
-    profile: str = ""
-    avatar: str = ""  # URL to profile picture
-    owner_date: str = ""
-    owner_text: str = ""
-    review_date: str = ""  # ISO format date
-    
-    # Translation fields
-    translations: dict = field(default_factory=dict)  # Store translations by language code
-
-    # CSS Selectors for review elements
-    MORE_BTN = "button.kyuRq"
-    LIKE_BTN = 'button[jsaction*="toggleThumbsUp" i]'
-    PHOTO_BTN = "button.Tya61d"
-    OWNER_RESP = "div.CDe7pd"
-
-    @classmethod
-    def from_card(cls, card: WebElement) -> "RawReview":
-        """Factory method to create a RawReview from a WebElement"""
-        # expand "More" - non-blocking approach
-        for b in try_find(card, cls.MORE_BTN, all=True):
-            try:
-                b.click()
-            except Exception:
-                pass
-
-        # Try to get data-review-id from the card itself, or from a child element
-        rid = card.get_attribute("data-review-id") or ""
-        if not rid:
-            # Try to find it in a child element
-            review_id_elem = try_find(card, "[data-review-id]")
-            if review_id_elem:
-                rid = review_id_elem[0].get_attribute("data-review-id") or ""
-        author = first_text(card, 'div[class*="d4r55"]')
-        profile = first_attr(card, 'button[data-review-id]', "data-href")
-        avatar = first_attr(card, 'button[data-review-id] img', "src")
-
-        label = first_attr(card, 'span[role="img"]', "aria-label")
-        num = re.search(r"[\d\.]+", label.replace(",", ".")) if label else None
-        rating = float(num.group()) if num else 0.0
-
-        date = first_text(card, 'span[class*="rsqaWe"]')
-        # Parse the date string to ISO format
-        review_date = parse_date_to_iso(date)
-
-        text = ""
-        for sel in ('span[jsname="bN97Pc"]',
-                    'span[jsname="fbQN7e"]',
-                    'div.MyEned span.wiI7pd'):
-            text = first_text(card, sel)
-            if text: break
-        lang = detect_lang(text)
-
-        likes = 0
-        if (btn := try_find(card, cls.LIKE_BTN)):
-            likes = safe_int(btn[0].text or btn[0].get_attribute("aria-label"))
-
-        photos: list[str] = []
-        for btn in try_find(card, cls.PHOTO_BTN, all=True):
-            if (m := re.search(r'url\("([^"]+)"', btn.get_attribute("style") or "")):
-                photos.append(m.group(1))
-
-        owner_date = owner_text = ""
-        if (box := try_find(card, cls.OWNER_RESP)):
-            box = box[0]
-            owner_date = first_text(box, "span.DZSIDd")
-            owner_text = first_text(box, "div.wiI7pd")
-
-        return cls(rid, author, rating, date, lang, text, likes,
-                   photos, profile, avatar, owner_date, owner_text, review_date)
--- a/modules/scraper_clean.py
+++ b/modules/scraper_clean.py
--- a/modules/structured_logger.py
+++ b/modules/structured_logger.py
@@ -1,250 +0,0 @@
-"""
-Structured Logger Module
-
-Provides a thread-safe, structured logging system with JSON-serializable output.
-Designed to replace the LogCapture class with enhanced categorization and metrics support.
-"""
-
-from dataclasses import dataclass, field, asdict
-from datetime import datetime, timezone
-from typing import Dict, List, Literal, Optional
-import threading
-import time
-
-
-LogLevel = Literal['DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL']
-LogCategory = Literal['scraper', 'browser', 'network', 'system']
-
-
-@dataclass
-class LogEntry:
-    """Structured log entry with timestamp, level, category, and optional metrics."""
-    timestamp: str  # ISO 8601 with Z suffix
-    timestamp_ms: int  # Unix milliseconds
-    level: LogLevel
-    category: LogCategory
-    message: str
-    metrics: Optional[Dict] = None  # memory_mb, reviews_count, scroll_position, dom_nodes, etc.
-    network: Optional[Dict] = None  # url, method, status, size_bytes, duration_ms
-    snapshot_id: Optional[str] = None
-
-    def to_dict(self) -> Dict:
-        """Convert to JSON-serializable dictionary, excluding None values."""
-        result = {
-            'timestamp': self.timestamp,
-            'timestamp_ms': self.timestamp_ms,
-            'level': self.level,
-            'category': self.category,
-            'message': self.message,
-        }
-        if self.metrics is not None:
-            result['metrics'] = self.metrics
-        if self.network is not None:
-            result['network'] = self.network
-        if self.snapshot_id is not None:
-            result['snapshot_id'] = self.snapshot_id
-        return result
-
-
-class StructuredLogger:
-    """
-    Thread-safe structured logger with categorized log entries and automatic pruning.
-
-    Example usage:
-        logger = StructuredLogger()
-        logger.info('browser', 'Navigating to URL', metrics={'memory_mb': 245})
-        logger.warn('network', 'Rate limit detected', network={'status': 429, 'url': '...'})
-        logger.error('system', 'Chrome crashed', metrics={'memory_mb': 489, 'dom_nodes': 12000})
-    """
-
-    def __init__(self, max_entries: int = 10000):
-        """
-        Initialize the structured logger.
-
-        Args:
-            max_entries: Maximum number of log entries to retain (default 10000).
-                        Oldest entries are pruned when limit is exceeded.
-        """
-        self._entries: List[LogEntry] = []
-        self._lock = threading.Lock()
-        self._max_entries = max_entries
-
-    def _create_entry(
-        self,
-        level: LogLevel,
-        category: LogCategory,
-        message: str,
-        metrics: Optional[Dict] = None,
-        network: Optional[Dict] = None,
-        snapshot_id: Optional[str] = None,
-    ) -> LogEntry:
-        """Create a new log entry with current timestamp."""
-        now = datetime.now(timezone.utc)
-        timestamp = now.strftime('%Y-%m-%dT%H:%M:%S.') + f'{now.microsecond // 1000:03d}Z'
-        timestamp_ms = int(now.timestamp() * 1000)
-
-        return LogEntry(
-            timestamp=timestamp,
-            timestamp_ms=timestamp_ms,
-            level=level,
-            category=category,
-            message=message,
-            metrics=metrics,
-            network=network,
-            snapshot_id=snapshot_id,
-        )
-
-    def _add_entry(self, entry: LogEntry) -> None:
-        """Add an entry to the log with thread-safety and automatic pruning."""
-        with self._lock:
-            self._entries.append(entry)
-            # Prune oldest entries if limit exceeded
-            if len(self._entries) > self._max_entries:
-                # Remove oldest 10% to avoid frequent pruning
-                prune_count = max(1, self._max_entries // 10)
-                self._entries = self._entries[prune_count:]
-
-    def debug(
-        self,
-        category: LogCategory,
-        message: str,
-        *,
-        metrics: Optional[Dict] = None,
-        network: Optional[Dict] = None,
-        snapshot_id: Optional[str] = None,
-    ) -> None:
-        """Log a DEBUG level message."""
-        entry = self._create_entry('DEBUG', category, message, metrics, network, snapshot_id)
-        self._add_entry(entry)
-
-    def info(
-        self,
-        category: LogCategory,
-        message: str,
-        *,
-        metrics: Optional[Dict] = None,
-        network: Optional[Dict] = None,
-        snapshot_id: Optional[str] = None,
-    ) -> None:
-        """Log an INFO level message."""
-        entry = self._create_entry('INFO', category, message, metrics, network, snapshot_id)
-        self._add_entry(entry)
-
-    def warn(
-        self,
-        category: LogCategory,
-        message: str,
-        *,
-        metrics: Optional[Dict] = None,
-        network: Optional[Dict] = None,
-        snapshot_id: Optional[str] = None,
-    ) -> None:
-        """Log a WARN level message."""
-        entry = self._create_entry('WARN', category, message, metrics, network, snapshot_id)
-        self._add_entry(entry)
-
-    def error(
-        self,
-        category: LogCategory,
-        message: str,
-        *,
-        metrics: Optional[Dict] = None,
-        network: Optional[Dict] = None,
-        snapshot_id: Optional[str] = None,
-    ) -> None:
-        """Log an ERROR level message."""
-        entry = self._create_entry('ERROR', category, message, metrics, network, snapshot_id)
-        self._add_entry(entry)
-
-    def fatal(
-        self,
-        category: LogCategory,
-        message: str,
-        *,
-        metrics: Optional[Dict] = None,
-        network: Optional[Dict] = None,
-        snapshot_id: Optional[str] = None,
-    ) -> None:
-        """Log a FATAL level message."""
-        entry = self._create_entry('FATAL', category, message, metrics, network, snapshot_id)
-        self._add_entry(entry)
-
-    def log(self, message: str, level: str = 'INFO') -> None:
-        """
-        Backward-compatible log method for legacy code.
-
-        Maps to 'system' category by default.
-
-        Args:
-            message: The log message
-            level: Log level as string (DEBUG, INFO, WARN, ERROR, FATAL)
-        """
-        level_upper = level.upper()
-        if level_upper not in ('DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'):
-            level_upper = 'INFO'
-
-        entry = self._create_entry(level_upper, 'system', message)
-        self._add_entry(entry)
-
-    def get_logs(self) -> List[Dict]:
-        """
-        Get all log entries as JSON-serializable dictionaries.
-
-        Returns:
-            List of log entry dictionaries.
-        """
-        with self._lock:
-            return [entry.to_dict() for entry in self._entries]
-
-    def get_logs_by_category(self, category: LogCategory) -> List[Dict]:
-        """
-        Get log entries filtered by category.
-
-        Args:
-            category: The category to filter by ('scraper', 'browser', 'network', 'system')
-
-        Returns:
-            List of log entry dictionaries matching the category.
-        """
-        with self._lock:
-            return [entry.to_dict() for entry in self._entries if entry.category == category]
-
-    def get_logs_by_level(self, level: LogLevel) -> List[Dict]:
-        """
-        Get log entries filtered by level.
-
-        Args:
-            level: The level to filter by ('DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
-
-        Returns:
-            List of log entry dictionaries matching the level.
-        """
-        with self._lock:
-            return [entry.to_dict() for entry in self._entries if entry.level == level]
-
-    def get_logs_since(self, timestamp_ms: int) -> List[Dict]:
-        """
-        Get log entries since a specific timestamp.
-
-        Args:
-            timestamp_ms: Unix timestamp in milliseconds
-
-        Returns:
-            List of log entry dictionaries with timestamp >= timestamp_ms.
-        """
-        with self._lock:
-            return [entry.to_dict() for entry in self._entries if entry.timestamp_ms >= timestamp_ms]
-
-    def clear(self) -> None:
-        """Clear all log entries."""
-        with self._lock:
-            self._entries.clear()
-
-    def count(self) -> int:
-        """Get the current number of log entries."""
-        with self._lock:
-            return len(self._entries)
-
-    def __len__(self) -> int:
-        """Get the current number of log entries."""
-        return self.count()
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -1,307 +0,0 @@
-"""
-Utility functions for Google Maps Reviews Scraper.
-"""
-import datetime
-import logging
-import re
-import time
-from datetime import timezone
-from functools import lru_cache
-from typing import List
-
-from selenium.common.exceptions import (NoSuchElementException,
-                                        StaleElementReferenceException,
-                                        TimeoutException)
-from selenium.webdriver import Chrome
-from selenium.webdriver.common.by import By
-from selenium.webdriver.remote.webelement import WebElement
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.support.ui import WebDriverWait
-
-# Logger
-log = logging.getLogger("scraper")
-
-# Constants for language detection
-HEB_CHARS = re.compile(r"[\u0590-\u05FF]")
-THAI_CHARS = re.compile(r"[\u0E00-\u0E7F]")
-
-
-@lru_cache(maxsize=1024)
-def detect_lang(txt: str) -> str:
-    """Detect language based on character sets"""
-    if HEB_CHARS.search(txt):  return "he"
-    if THAI_CHARS.search(txt): return "th"
-    return "en"
-
-
-@lru_cache(maxsize=128)
-def safe_int(s: str | None) -> int:
-    """Safely convert string to integer, returning 0 if not possible"""
-    m = re.search(r"\d+", s or "")
-    return int(m.group()) if m else 0
-
-
-def try_find(el: WebElement, css: str, *, all=False) -> List[WebElement]:
-    """Safely find elements by CSS selector without raising exceptions"""
-    try:
-        if all:
-            return el.find_elements(By.CSS_SELECTOR, css)
-        obj = el.find_element(By.CSS_SELECTOR, css)
-        return [obj] if obj else []
-    except (NoSuchElementException, StaleElementReferenceException):
-        return []
-
-
-def first_text(el: WebElement, css: str) -> str:
-    """Get text from the first matching element that has non-empty text"""
-    for e in try_find(el, css, all=True):
-        try:
-            if (t := e.text.strip()):
-                return t
-        except StaleElementReferenceException:
-            continue
-    return ""
-
-
-def parse_date_to_iso(date_str: str) -> str:
-    """
-    Parse date strings like "2 weeks ago", "January 2023", etc. into ISO format.
-    Returns a best-effort ISO string, or empty string if parsing fails.
-    """
-    if not date_str:
-        return ""
-
-    try:
-        now = datetime.now(timezone.utc)
-
-        # Handle relative dates
-        if "ago" in date_str.lower():
-            # For simplicity, map to approximate dates
-            if "minute" in date_str.lower():
-                minutes = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-                dt = now.replace(microsecond=0) - timezone.timedelta(minutes=minutes)
-            elif "hour" in date_str.lower():
-                hours = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-                dt = now.replace(microsecond=0) - timezone.timedelta(hours=hours)
-            elif "day" in date_str.lower():
-                days = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-                dt = now.replace(microsecond=0) - timezone.timedelta(days=days)
-            elif "week" in date_str.lower():
-                weeks = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-                dt = now.replace(microsecond=0) - timezone.timedelta(weeks=weeks)
-            elif "month" in date_str.lower():
-                months = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-                # Approximate months as 30 days
-                dt = now.replace(microsecond=0) - timezone.timedelta(days=30 * months)
-            elif "year" in date_str.lower():
-                years = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-                # Approximate years as 365 days
-                dt = now.replace(microsecond=0) - timezone.timedelta(days=365 * years)
-            else:
-                # Default to current time if can't parse
-                dt = now.replace(microsecond=0)
-        else:
-            # Handle absolute dates (month year format)
-            # This is a simplification - would need more robust parsing for production
-            dt = now.replace(microsecond=0)
-
-        return dt.isoformat()
-    except Exception:
-        # If parsing fails, return empty string
-        return ""
-
-
-def first_attr(el: WebElement, css: str, attr: str) -> str:
-    """Get attribute value from the first matching element that has a non-empty value"""
-    for e in try_find(el, css, all=True):
-        try:
-            if (v := (e.get_attribute(attr) or "").strip()):
-                return v
-        except StaleElementReferenceException:
-            continue
-    return ""
-
-
-def click_if(driver: Chrome, css: str, delay: float = .25, timeout: float = 5.0) -> bool:
-    """
-    Click element if it exists and is clickable, with timeout and better error handling.
-
-    Args:
-        driver: WebDriver instance
-        css: CSS selector for the element to click
-        delay: Time to wait after clicking (seconds)
-        timeout: Maximum time to wait for element (seconds)
-
-    Returns:
-        True if element was found and clicked, False otherwise
-    """
-    try:
-        # First check if elements exist at all
-        elements = driver.find_elements(By.CSS_SELECTOR, css)
-        if not elements:
-            return False
-
-        # Try clicking the first visible element
-        for element in elements:
-            try:
-                if element.is_displayed() and element.is_enabled():
-                    element.click()
-                    time.sleep(delay)
-                    return True
-            except Exception:
-                # Try next element if this one fails
-                continue
-
-        # If we couldn't click any of the direct elements, try with WebDriverWait
-        try:
-            WebDriverWait(driver, timeout).until(
-                EC.element_to_be_clickable((By.CSS_SELECTOR, css))
-            ).click()
-            time.sleep(delay)
-            return True
-        except TimeoutException:
-            return False
-
-    except Exception as e:
-        log.debug(f"Error in click_if: {str(e)}")
-        return False
-
-
-def get_current_iso_date() -> str:
-    """Return current UTC time in ISO format."""
-    from datetime import datetime, timezone
-    return datetime.now(timezone.utc).isoformat()
-
-# """
-# Utility functions for Google Maps Reviews Scraper.
-# """
-#
-# import re
-# import time
-# import logging
-# from datetime import datetime, timezone
-# from functools import lru_cache
-# from typing import List, Optional
-#
-# from selenium.common.exceptions import (NoSuchElementException,
-#                                        StaleElementReferenceException,
-#                                        TimeoutException)
-# from selenium.webdriver import Chrome
-# from selenium.webdriver.common.by import By
-# from selenium.webdriver.remote.webelement import WebElement
-# from selenium.webdriver.support import expected_conditions as EC
-# from selenium.webdriver.support.ui import WebDriverWait
-#
-# # Constants for language detection
-# HEB_CHARS = re.compile(r"[\u0590-\u05FF]")
-# THAI_CHARS = re.compile(r"[\u0E00-\u0E7F]")
-#
-# # Logger
-# log = logging.getLogger("scraper")
-#
-#
-# @lru_cache(maxsize=1024)
-# def detect_lang(txt: str) -> str:
-#     """Detect language based on character sets"""
-#     if HEB_CHARS.search(txt):  return "he"
-#     if THAI_CHARS.search(txt): return "th"
-#     return "en"
-#
-#
-# @lru_cache(maxsize=128)
-# def safe_int(s: str | None) -> int:
-#     """Safely convert string to integer, returning 0 if not possible"""
-#     m = re.search(r"\d+", s or "")
-#     return int(m.group()) if m else 0
-#
-#
-# def try_find(el: WebElement, css: str, *, all=False) -> List[WebElement]:
-#     """Safely find elements by CSS selector without raising exceptions"""
-#     try:
-#         if all:
-#             return el.find_elements(By.CSS_SELECTOR, css)
-#         obj = el.find_element(By.CSS_SELECTOR, css)
-#         return [obj] if obj else []
-#     except (NoSuchElementException, StaleElementReferenceException):
-#         return []
-#
-#
-# def first_text(el: WebElement, css: str) -> str:
-#     """Get text from the first matching element that has non-empty text"""
-#     for e in try_find(el, css, all=True):
-#         if (t := e.text.strip()):
-#             return t
-#     return ""
-#
-#
-# def first_attr(el: WebElement, css: str, attr: str) -> str:
-#     """Get attribute value from the first matching element that has a non-empty value"""
-#     for e in try_find(el, css, all=True):
-#         if (v := (e.get_attribute(attr) or "").strip()):
-#             return v
-#     return ""
-#
-#
-# def click_if(driver: Chrome, css: str, delay: float = .25, timeout: float = 5.0) -> bool:
-#     """Click element if it exists and is clickable, with timeout"""
-#     try:
-#         WebDriverWait(driver, timeout).until(
-#             EC.element_to_be_clickable((By.CSS_SELECTOR, css))
-#         ).click()
-#         time.sleep(delay)
-#         return True
-#     except TimeoutException:
-#         return False
-#
-#
-# def parse_date_to_iso(date_str: str) -> str:
-#     """
-#     Parse date strings like "2 weeks ago", "January 2023", etc. into ISO format.
-#     Returns a best-effort ISO string, or empty string if parsing fails.
-#     """
-#     if not date_str:
-#         return ""
-#
-#     try:
-#         now = datetime.now(timezone.utc)
-#
-#         # Handle relative dates
-#         if "ago" in date_str.lower():
-#             # For simplicity, map to approximate dates
-#             if "minute" in date_str.lower():
-#                 minutes = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-#                 dt = now.replace(microsecond=0) - timezone.timedelta(minutes=minutes)
-#             elif "hour" in date_str.lower():
-#                 hours = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-#                 dt = now.replace(microsecond=0) - timezone.timedelta(hours=hours)
-#             elif "day" in date_str.lower():
-#                 days = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-#                 dt = now.replace(microsecond=0) - timezone.timedelta(days=days)
-#             elif "week" in date_str.lower():
-#                 weeks = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-#                 dt = now.replace(microsecond=0) - timezone.timedelta(weeks=weeks)
-#             elif "month" in date_str.lower():
-#                 months = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-#                 # Approximate months as 30 days
-#                 dt = now.replace(microsecond=0) - timezone.timedelta(days=30 * months)
-#             elif "year" in date_str.lower():
-#                 years = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
-#                 # Approximate years as 365 days
-#                 dt = now.replace(microsecond=0) - timezone.timedelta(days=365 * years)
-#             else:
-#                 # Default to current time if can't parse
-#                 dt = now.replace(microsecond=0)
-#         else:
-#             # Handle absolute dates (month year format)
-#             # This is a simplification - would need more robust parsing for production
-#             dt = now.replace(microsecond=0)
-#
-#         return dt.isoformat()
-#     except Exception:
-#         # If parsing fails, return empty string
-#         return ""
-#
-#
-# def get_current_iso_date() -> str:
-#     """Return current UTC time in ISO format."""
-#     return datetime.now(timezone.utc).isoformat()
--- a/modules/webhooks.py
+++ b/modules/webhooks.py
@@ -1,373 +0,0 @@
-#!/usr/bin/env python3
-"""
-Webhook delivery system with retry logic and security.
-"""
-import asyncio
-import hmac
-import hashlib
-import json
-import logging
-from typing import Dict, Any, Optional
-from datetime import datetime
-import httpx
-from uuid import UUID
-
-log = logging.getLogger(__name__)
-
-
-class WebhookDeliveryError(Exception):
-    """Raised when webhook delivery fails after all retries"""
-    pass
-
-
-class WebhookManager:
-    """
-    Manages webhook delivery with retry logic and security.
-
-    Features:
-    - Exponential backoff retry (3 attempts)
-    - HMAC signature for security
-    - Timeout handling
-    - Async delivery
-    - Logging of all attempts
-    """
-
-    def __init__(
-        self,
-        max_retries: int = 3,
-        timeout: float = 10.0,
-        initial_retry_delay: float = 2.0
-    ):
-        """
-        Initialize webhook manager.
-
-        Args:
-            max_retries: Maximum number of delivery attempts
-            timeout: Request timeout in seconds
-            initial_retry_delay: Initial delay between retries (exponential backoff)
-        """
-        self.max_retries = max_retries
-        self.timeout = timeout
-        self.initial_retry_delay = initial_retry_delay
-
-    def generate_signature(self, payload: str, secret: str) -> str:
-        """
-        Generate HMAC-SHA256 signature for webhook payload.
-
-        Args:
-            payload: JSON string payload
-            secret: Webhook secret
-
-        Returns:
-            Hex-encoded signature
-        """
-        return hmac.new(
-            secret.encode('utf-8'),
-            payload.encode('utf-8'),
-            hashlib.sha256
-        ).hexdigest()
-
-    async def send_webhook(
-        self,
-        webhook_url: str,
-        payload: Dict[str, Any],
-        secret: Optional[str] = None,
-        job_id: Optional[UUID] = None,
-        db=None
-    ) -> bool:
-        """
-        Send webhook with retry logic.
-
-        Args:
-            webhook_url: URL to send webhook to
-            payload: Webhook payload dictionary
-            secret: Optional webhook secret for HMAC signature
-            job_id: Optional job ID for logging attempts
-            db: Optional database manager for logging
-
-        Returns:
-            True if delivery succeeded, False otherwise
-        """
-        payload_json = json.dumps(payload, default=str)
-
-        for attempt in range(1, self.max_retries + 1):
-            try:
-                start_time = datetime.now()
-
-                # Prepare headers
-                headers = {
-                    "Content-Type": "application/json",
-                    "User-Agent": "GoogleReviewsScraper-Webhook/1.0"
-                }
-
-                # Add signature if secret provided
-                if secret:
-                    signature = self.generate_signature(payload_json, secret)
-                    headers["X-Webhook-Signature"] = f"sha256={signature}"
-                    headers["X-Webhook-Timestamp"] = str(int(datetime.now().timestamp()))
-
-                # Send webhook
-                async with httpx.AsyncClient() as client:
-                    response = await client.post(
-                        webhook_url,
-                        content=payload_json,
-                        headers=headers,
-                        timeout=self.timeout
-                    )
-
-                response_time_ms = (datetime.now() - start_time).total_seconds() * 1000
-
-                # Check response
-                if response.status_code in [200, 201, 202, 204]:
-                    # Success
-                    log.info(
-                        f"Webhook delivered successfully to {webhook_url} "
-                        f"(attempt {attempt}, {response_time_ms:.0f}ms, status {response.status_code})"
-                    )
-
-                    # Log successful attempt
-                    if db and job_id:
-                        await db.log_webhook_attempt(
-                            job_id=job_id,
-                            attempt_number=attempt,
-                            success=True,
-                            status_code=response.status_code,
-                            response_time_ms=response_time_ms
-                        )
-
-                    return True
-                else:
-                    # Non-2xx response
-                    error_msg = f"HTTP {response.status_code}: {response.text[:200]}"
-                    log.warning(
-                        f"Webhook delivery failed to {webhook_url} "
-                        f"(attempt {attempt}/{self.max_retries}): {error_msg}"
-                    )
-
-                    # Log failed attempt
-                    if db and job_id:
-                        await db.log_webhook_attempt(
-                            job_id=job_id,
-                            attempt_number=attempt,
-                            success=False,
-                            status_code=response.status_code,
-                            error_message=error_msg,
-                            response_time_ms=response_time_ms
-                        )
-
-            except httpx.TimeoutException as e:
-                error_msg = f"Timeout after {self.timeout}s"
-                log.warning(
-                    f"Webhook delivery timeout to {webhook_url} "
-                    f"(attempt {attempt}/{self.max_retries}): {error_msg}"
-                )
-
-                # Log timeout attempt
-                if db and job_id:
-                    await db.log_webhook_attempt(
-                        job_id=job_id,
-                        attempt_number=attempt,
-                        success=False,
-                        error_message=error_msg
-                    )
-
-            except Exception as e:
-                error_msg = f"{type(e).__name__}: {str(e)}"
-                log.error(
-                    f"Webhook delivery error to {webhook_url} "
-                    f"(attempt {attempt}/{self.max_retries}): {error_msg}"
-                )
-
-                # Log error attempt
-                if db and job_id:
-                    await db.log_webhook_attempt(
-                        job_id=job_id,
-                        attempt_number=attempt,
-                        success=False,
-                        error_message=error_msg
-                    )
-
-            # Retry with exponential backoff
-            if attempt < self.max_retries:
-                retry_delay = self.initial_retry_delay * (2 ** (attempt - 1))
-                log.info(f"Retrying in {retry_delay:.1f}s...")
-                await asyncio.sleep(retry_delay)
-
-        # All retries failed
-        log.error(
-            f"Webhook delivery failed to {webhook_url} after {self.max_retries} attempts"
-        )
-        return False
-
-    async def send_job_completed_webhook(
-        self,
-        webhook_url: str,
-        job_id: UUID,
-        status: str,
-        reviews_count: Optional[int] = None,
-        scrape_time: Optional[float] = None,
-        error_message: Optional[str] = None,
-        reviews_url: Optional[str] = None,
-        secret: Optional[str] = None,
-        db=None
-    ) -> bool:
-        """
-        Send job completion webhook.
-
-        Args:
-            webhook_url: URL to send webhook to
-            job_id: Job UUID
-            status: Job status ('completed' or 'failed')
-            reviews_count: Number of reviews scraped
-            scrape_time: Time taken in seconds
-            error_message: Error message if failed
-            reviews_url: URL to retrieve reviews
-            secret: Webhook secret
-            db: Database manager for logging
-
-        Returns:
-            True if delivery succeeded
-        """
-        payload = {
-            "event": f"job.{status}",
-            "job_id": str(job_id),
-            "status": status,
-            "timestamp": datetime.utcnow().isoformat() + "Z"
-        }
-
-        if status == "completed":
-            payload.update({
-                "reviews_count": reviews_count,
-                "scrape_time": scrape_time,
-                "reviews_url": reviews_url
-            })
-        elif status == "failed":
-            payload["error_message"] = error_message
-
-        return await self.send_webhook(
-            webhook_url=webhook_url,
-            payload=payload,
-            secret=secret,
-            job_id=job_id,
-            db=db
-        )
-
-
-class WebhookDispatcher:
-    """
-    Background webhook dispatcher that processes pending webhooks.
-
-    Runs in background and delivers webhooks for completed jobs.
-    """
-
-    def __init__(self, db, interval_seconds: int = 30):
-        """
-        Initialize webhook dispatcher.
-
-        Args:
-            db: Database manager instance
-            interval_seconds: How often to check for pending webhooks
-        """
-        self.db = db
-        self.interval = interval_seconds
-        self.webhook_manager = WebhookManager()
-        self.running = False
-
-    async def start(self):
-        """Start the background webhook dispatcher"""
-        self.running = True
-        log.info("Webhook dispatcher started")
-
-        while self.running:
-            try:
-                await self.process_pending_webhooks()
-            except Exception as e:
-                log.error(f"Error in webhook dispatcher: {e}")
-
-            await asyncio.sleep(self.interval)
-
-    def stop(self):
-        """Stop the background webhook dispatcher"""
-        self.running = False
-        log.info("Webhook dispatcher stopped")
-
-    async def process_pending_webhooks(self):
-        """
-        Process all pending webhooks.
-
-        Fetches jobs with pending webhooks and delivers them.
-        """
-        # Get jobs with pending webhooks
-        jobs = await self.db.get_pending_jobs_with_webhooks(limit=100)
-
-        if not jobs:
-            return
-
-        log.info(f"Processing {len(jobs)} pending webhooks...")
-
-        for job in jobs:
-            try:
-                job_id = job['job_id']
-                webhook_url = job['webhook_url']
-                webhook_secret = job.get('webhook_secret')
-                status = job['status']
-
-                # Build reviews URL (assuming API base URL from environment)
-                import os
-                api_base_url = os.getenv('API_BASE_URL', 'http://localhost:8000')
-                reviews_url = f"{api_base_url}/jobs/{job_id}/reviews"
-
-                # Send webhook
-                await self.webhook_manager.send_job_completed_webhook(
-                    webhook_url=webhook_url,
-                    job_id=job_id,
-                    status=status,
-                    reviews_count=job.get('reviews_count'),
-                    scrape_time=job.get('scrape_time'),
-                    error_message=job.get('error_message'),
-                    reviews_url=reviews_url if status == 'completed' else None,
-                    secret=webhook_secret,
-                    db=self.db
-                )
-
-            except Exception as e:
-                log.error(f"Error processing webhook for job {job['job_id']}: {e}")
-
-        log.info(f"Processed {len(jobs)} webhooks")
-
-
-# Webhook verification helper for client implementations
-def verify_webhook_signature(payload: str, signature: str, secret: str) -> bool:
-    """
-    Verify webhook signature (for client-side verification).
-
-    Args:
-        payload: Raw JSON payload string
-        signature: Signature from X-Webhook-Signature header (format: "sha256=...")
-        secret: Webhook secret
-
-    Returns:
-        True if signature is valid
-
-    Example:
-        @app.post("/webhook")
-        async def handle_webhook(request: Request):
-            payload = await request.body()
-            signature = request.headers.get("X-Webhook-Signature")
-
-            if not verify_webhook_signature(payload.decode(), signature, WEBHOOK_SECRET):
-                raise HTTPException(status_code=401, detail="Invalid signature")
-
-            # Process webhook...
-    """
-    if not signature or not signature.startswith("sha256="):
-        return False
-
-    expected_signature = signature.split("sha256=", 1)[1]
-    computed_signature = hmac.new(
-        secret.encode('utf-8'),
-        payload.encode('utf-8'),
-        hashlib.sha256
-    ).hexdigest()
-
-    return hmac.compare_digest(expected_signature, computed_signature)