Phase 0: Project restructure to ReviewIQ platform architecture

New structure: - scrapers/google_reviews/v1_0_0.py (was modules/scraper_clean.py) - scrapers/base.py (BaseScraper interface) - scrapers/registry.py (ScraperRegistry for version routing) - core/database.py, models.py, config.py, enums.py - utils/logger.py, crash_analyzer.py, health_checks.py, helpers.py, date_converter.py - workers/chrome_pool.py - services/webhook_service.py - api/ routes structure (empty, ready for Phase 2) - tests/ structure mirroring source All imports updated in: - api_server_production.py (7 import paths updated) - utils/health_checks.py (scraper import path) Legacy modules moved to modules/_legacy/: - data_storage.py, image_handler.py, s3_handler.py (unused) Syntax verified, frontend build passing. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 15:22:08 +00:00
parent bb0291f265
commit 544e028c3f
37 changed files with 5782 additions and 30 deletions
--- a/utils/init.py
+++ b/utils/init.py
--- a/utils/crash_analyzer.py
+++ b/utils/crash_analyzer.py
@@ -0,0 +1,666 @@
+"""
+Crash Pattern Analyzer Module
+
+Provides deep analysis of scraper crashes with pattern detection,
+confidence scoring, and auto-fix parameter suggestions.
+
+Builds on top of the basic classify_crash function in scraper_clean.py
+with more sophisticated pattern matching and multi-signal analysis.
+"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+import re
+
+
+@dataclass
+class CrashAnalysis:
+    """
+    Result of crash pattern analysis.
+
+    Attributes:
+        pattern: The identified crash pattern type (e.g., "memory_exhaustion", "dom_bloat")
+        confidence: Confidence score from 0.0 to 1.0 based on multiple signals
+        description: Human-readable description of the crash cause
+        suggested_fix: Recommended action to prevent this crash
+        auto_fix_params: Parameters that can be applied automatically to prevent recurrence
+    """
+    pattern: str  # e.g., "memory_exhaustion", "dom_bloat", "rate_limited"
+    confidence: float  # 0.0 to 1.0
+    description: str
+    suggested_fix: str
+    auto_fix_params: Optional[Dict[str, Any]]
+
+
+# Thresholds for pattern detection
+MEMORY_EXHAUSTION_THRESHOLD_MB = 1500  # 1.5GB in MB
+MEMORY_GROWTH_RATE_THRESHOLD_MB_S = 10  # 10MB/s
+DOM_BLOAT_THRESHOLD = 50000  # 50000 nodes
+SCROLL_TIMEOUT_MIN_SCROLLS = 10  # Minimum scrolls before considering scroll_timeout
+
+
+# Auto-fix parameters for each crash pattern
+AUTO_FIX_PARAMS = {
+    "memory_exhaustion": {
+        "max_reviews": 500,
+        "restart_browser_after": 200
+    },
+    "dom_bloat": {
+        "scroll_cleanup": True,
+        "lazy_load": True
+    },
+    "rate_limited": {
+        "delay_multiplier": 2.0,
+        "use_different_proxy": True
+    },
+    "consent_loop": {
+        "skip_consent_retries": True
+    },
+    "scroll_timeout": {
+        "reduce_target": True,
+        "target_reviews": "current - 10%"
+    },
+    "element_stale": {
+        "retry_with_fresh_elements": True
+    }
+}
+
+
+def _calculate_memory_growth_rate(metrics_history: List[Dict]) -> Optional[float]:
+    """
+    Calculate memory growth rate in MB/s from metrics history.
+
+    Args:
+        metrics_history: List of metric samples with timestamp_ms and memory_mb
+
+    Returns:
+        Growth rate in MB/s, or None if cannot be calculated
+    """
+    if not metrics_history or len(metrics_history) < 2:
+        return None
+
+    # Filter samples that have valid memory readings
+    valid_samples = [
+        m for m in metrics_history
+        if m.get('memory_mb') is not None and m.get('timestamp_ms') is not None
+    ]
+
+    if len(valid_samples) < 2:
+        return None
+
+    # Use first and last valid samples
+    first = valid_samples[0]
+    last = valid_samples[-1]
+
+    time_delta_s = (last['timestamp_ms'] - first['timestamp_ms']) / 1000
+    if time_delta_s <= 0:
+        return None
+
+    memory_delta_mb = last['memory_mb'] - first['memory_mb']
+    return memory_delta_mb / time_delta_s
+
+
+def _get_max_memory(metrics_history: List[Dict]) -> Optional[int]:
+    """Get maximum memory usage from metrics history."""
+    if not metrics_history:
+        return None
+
+    memories = [m.get('memory_mb') for m in metrics_history if m.get('memory_mb') is not None]
+    return max(memories) if memories else None
+
+
+def _get_max_dom_nodes(metrics_history: List[Dict]) -> Optional[int]:
+    """Get maximum DOM node count from metrics history."""
+    if not metrics_history:
+        return None
+
+    nodes = [m.get('dom_nodes') for m in metrics_history if m.get('dom_nodes') is not None]
+    return max(nodes) if nodes else None
+
+
+def _check_memory_exhaustion(
+    error_message: str,
+    metrics_history: List[Dict],
+    logs: List[Dict]
+) -> tuple[float, str]:
+    """
+    Check for memory exhaustion pattern.
+
+    Returns:
+        Tuple of (confidence, description)
+    """
+    confidence = 0.0
+    signals = []
+
+    # Check for high memory usage
+    max_memory = _get_max_memory(metrics_history)
+    if max_memory is not None:
+        if max_memory >= MEMORY_EXHAUSTION_THRESHOLD_MB:
+            confidence += 0.5
+            signals.append(f"Memory reached {max_memory}MB (threshold: {MEMORY_EXHAUSTION_THRESHOLD_MB}MB)")
+        elif max_memory >= MEMORY_EXHAUSTION_THRESHOLD_MB * 0.8:
+            confidence += 0.3
+            signals.append(f"Memory at {max_memory}MB approaching threshold")
+
+    # Check for rapid memory growth
+    growth_rate = _calculate_memory_growth_rate(metrics_history)
+    if growth_rate is not None and growth_rate >= MEMORY_GROWTH_RATE_THRESHOLD_MB_S:
+        confidence += 0.3
+        signals.append(f"Memory growing at {growth_rate:.1f}MB/s (threshold: {MEMORY_GROWTH_RATE_THRESHOLD_MB_S}MB/s)")
+
+    # Check error message for memory-related keywords
+    error_lower = error_message.lower()
+    memory_keywords = ['memory', 'heap', 'out of memory', 'oom', 'aw, snap', 'status_access_violation']
+    for keyword in memory_keywords:
+        if keyword in error_lower:
+            confidence += 0.2
+            signals.append(f"Error contains '{keyword}'")
+            break
+
+    # Check logs for memory warnings
+    for log_entry in logs:
+        msg = log_entry.get('message', '').lower()
+        if 'memory' in msg and ('high' in msg or 'warning' in msg or 'exceeded' in msg):
+            confidence += 0.1
+            signals.append("Memory warning found in logs")
+            break
+
+    description = "; ".join(signals) if signals else "No memory exhaustion signals detected"
+    return min(confidence, 1.0), description
+
+
+def _check_dom_bloat(
+    error_message: str,
+    metrics_history: List[Dict],
+    logs: List[Dict]
+) -> tuple[float, str]:
+    """
+    Check for DOM bloat pattern.
+
+    Returns:
+        Tuple of (confidence, description)
+    """
+    confidence = 0.0
+    signals = []
+
+    # Check for high DOM node count
+    max_nodes = _get_max_dom_nodes(metrics_history)
+    if max_nodes is not None:
+        if max_nodes >= DOM_BLOAT_THRESHOLD:
+            confidence += 0.6
+            signals.append(f"DOM nodes reached {max_nodes} (threshold: {DOM_BLOAT_THRESHOLD})")
+        elif max_nodes >= DOM_BLOAT_THRESHOLD * 0.8:
+            confidence += 0.3
+            signals.append(f"DOM nodes at {max_nodes} approaching threshold")
+
+    # Check error message for DOM-related keywords
+    error_lower = error_message.lower()
+    dom_keywords = ['dom', 'element', 'node', 'render', 'paint', 'layout']
+    for keyword in dom_keywords:
+        if keyword in error_lower:
+            confidence += 0.2
+            signals.append(f"Error contains '{keyword}'")
+            break
+
+    # Check if memory is high too (DOM bloat often causes memory issues)
+    max_memory = _get_max_memory(metrics_history)
+    if max_memory is not None and max_memory >= 800:  # 800MB
+        confidence += 0.1
+        signals.append(f"Memory also elevated ({max_memory}MB)")
+
+    # Check logs for DOM-related messages
+    for log_entry in logs:
+        msg = log_entry.get('message', '').lower()
+        if 'dom' in msg and ('large' in msg or 'cleanup' in msg or 'remove' in msg):
+            confidence += 0.1
+            signals.append("DOM warning found in logs")
+            break
+
+    description = "; ".join(signals) if signals else "No DOM bloat signals detected"
+    return min(confidence, 1.0), description
+
+
+def _check_rate_limited(
+    error_message: str,
+    metrics_history: List[Dict],
+    logs: List[Dict]
+) -> tuple[float, str]:
+    """
+    Check for rate limiting pattern.
+
+    Returns:
+        Tuple of (confidence, description)
+    """
+    confidence = 0.0
+    signals = []
+
+    # Check error message for rate limit indicators
+    error_lower = error_message.lower()
+    if '429' in error_message:
+        confidence += 0.6
+        signals.append("HTTP 429 status code in error")
+
+    rate_keywords = ['rate limit', 'too many requests', 'unusual traffic', 'captcha', 'blocked']
+    for keyword in rate_keywords:
+        if keyword in error_lower:
+            confidence += 0.4
+            signals.append(f"Error contains '{keyword}'")
+            break
+
+    # Check logs for rate limiting signals
+    rate_log_count = 0
+    for log_entry in logs:
+        msg = log_entry.get('message', '').lower()
+        network = log_entry.get('network', {})
+        status = network.get('status')
+
+        if status == 429:
+            rate_log_count += 1
+            confidence += 0.2
+
+        if 'unusual traffic' in msg or 'rate' in msg or 'blocked' in msg:
+            rate_log_count += 1
+            confidence += 0.1
+
+    if rate_log_count > 0:
+        signals.append(f"Found {rate_log_count} rate-limiting indicators in logs")
+
+    description = "; ".join(signals) if signals else "No rate limiting signals detected"
+    return min(confidence, 1.0), description
+
+
+def _check_consent_loop(
+    error_message: str,
+    metrics_history: List[Dict],
+    logs: List[Dict]
+) -> tuple[float, str]:
+    """
+    Check for consent popup loop pattern.
+
+    Returns:
+        Tuple of (confidence, description)
+    """
+    confidence = 0.0
+    signals = []
+
+    # Check error message for consent keywords
+    error_lower = error_message.lower()
+    if 'consent' in error_lower:
+        confidence += 0.3
+        signals.append("Error mentions consent")
+
+    # Count consent-related log entries
+    consent_count = 0
+    consent_messages = []
+    for log_entry in logs:
+        msg = log_entry.get('message', '').lower()
+        if 'consent' in msg:
+            consent_count += 1
+            consent_messages.append(msg[:50])
+
+    # Multiple consent messages indicate a loop
+    if consent_count >= 3:
+        confidence += 0.5
+        signals.append(f"Consent popup appeared {consent_count} times in logs")
+    elif consent_count >= 2:
+        confidence += 0.3
+        signals.append(f"Consent popup appeared {consent_count} times")
+    elif consent_count == 1:
+        confidence += 0.1
+        signals.append("Single consent popup detected")
+
+    # Check for timeout after consent handling
+    if 'timeout' in error_lower and consent_count > 0:
+        confidence += 0.2
+        signals.append("Timeout occurred with consent activity")
+
+    description = "; ".join(signals) if signals else "No consent loop signals detected"
+    return min(confidence, 1.0), description
+
+
+def _check_scroll_timeout(
+    error_message: str,
+    metrics_history: List[Dict],
+    logs: List[Dict],
+    state: Optional[Dict] = None
+) -> tuple[float, str]:
+    """
+    Check for scroll timeout pattern (no new reviews after many scrolls).
+
+    Returns:
+        Tuple of (confidence, description)
+    """
+    confidence = 0.0
+    signals = []
+
+    # Check state for scroll count
+    scroll_count = 0
+    reviews_count = 0
+    if state:
+        scroll_count = state.get('scroll_count', 0)
+        reviews_count = state.get('reviews_extracted', 0)
+
+    # Check error for timeout indicators
+    error_lower = error_message.lower()
+    if 'timeout' in error_lower:
+        confidence += 0.2
+        signals.append("Timeout in error message")
+
+    # Count recovery attempts in logs (indicate stuck scrolling)
+    recovery_count = 0
+    no_new_count = 0
+    for log_entry in logs:
+        msg = log_entry.get('message', '').lower()
+        if 'recovery attempt' in msg:
+            recovery_count += 1
+        if 'no new' in msg or 'stuck' in msg:
+            no_new_count += 1
+
+    if recovery_count >= SCROLL_TIMEOUT_MIN_SCROLLS:
+        confidence += 0.5
+        signals.append(f"Made {recovery_count} recovery attempts")
+    elif recovery_count >= 5:
+        confidence += 0.3
+        signals.append(f"Made {recovery_count} recovery attempts")
+
+    if no_new_count > 0:
+        confidence += 0.2
+        signals.append(f"Found {no_new_count} 'no new reviews' log entries")
+
+    # Check if reviews stopped growing
+    if metrics_history and len(metrics_history) >= 5:
+        # Check if reviews count plateaued
+        recent_counts = [m.get('reviews_count', 0) for m in metrics_history[-5:] if m.get('reviews_count')]
+        if recent_counts and len(set(recent_counts)) == 1:
+            confidence += 0.2
+            signals.append(f"Review count stuck at {recent_counts[0]}")
+
+    description = "; ".join(signals) if signals else "No scroll timeout signals detected"
+    return min(confidence, 1.0), description
+
+
+def _check_element_stale(
+    error_message: str,
+    metrics_history: List[Dict],
+    logs: List[Dict]
+) -> tuple[float, str]:
+    """
+    Check for stale element reference pattern.
+
+    Returns:
+        Tuple of (confidence, description)
+    """
+    confidence = 0.0
+    signals = []
+
+    # Check error message for stale element indicators
+    error_lower = error_message.lower()
+    stale_keywords = [
+        'stale element', 'staleelement', 'stale_element',
+        'element is not attached', 'element reference',
+        'no such element', 'element not found',
+        'element is no longer valid'
+    ]
+
+    for keyword in stale_keywords:
+        if keyword in error_lower:
+            confidence += 0.6
+            signals.append(f"Error contains '{keyword}'")
+            break
+
+    # Check logs for stale element patterns
+    stale_log_count = 0
+    for log_entry in logs:
+        msg = log_entry.get('message', '').lower()
+        for keyword in stale_keywords:
+            if keyword in msg:
+                stale_log_count += 1
+                break
+
+    if stale_log_count > 0:
+        confidence += 0.2
+        signals.append(f"Found {stale_log_count} stale element references in logs")
+
+    # Check if DOM was changing rapidly (indicates dynamic page)
+    if metrics_history and len(metrics_history) >= 3:
+        dom_counts = [m.get('dom_nodes') for m in metrics_history if m.get('dom_nodes')]
+        if len(dom_counts) >= 3:
+            # Calculate variance
+            avg = sum(dom_counts) / len(dom_counts)
+            variance = sum((x - avg) ** 2 for x in dom_counts) / len(dom_counts)
+            std_dev = variance ** 0.5
+            # High variance indicates rapidly changing DOM
+            if std_dev > 1000:
+                confidence += 0.2
+                signals.append(f"High DOM variability (std dev: {std_dev:.0f})")
+
+    description = "; ".join(signals) if signals else "No stale element signals detected"
+    return min(confidence, 1.0), description
+
+
+def analyze_crash(crash_report: Dict) -> CrashAnalysis:
+    """
+    Analyze a crash report to determine the most likely crash pattern.
+
+    Examines error_message, metrics_history, and logs_before_crash to
+    calculate confidence scores for each crash pattern type.
+
+    Args:
+        crash_report: Dictionary containing:
+            - error_message: str - The exception message
+            - metrics_history: List[Dict] - Sampled metrics with timestamp_ms, memory_mb, dom_nodes
+            - logs_before_crash: List[Dict] - Recent log entries before the crash
+            - state: Optional[Dict] - Scraper state (reviews_extracted, scroll_count, etc.)
+            - crash_type: Optional[str] - Basic crash classification from classify_crash()
+
+    Returns:
+        CrashAnalysis with the highest-confidence pattern match
+    """
+    # Extract data from crash report
+    error_message = crash_report.get('error_message', '')
+    metrics_history = crash_report.get('metrics_history', [])
+    logs = crash_report.get('logs_before_crash', [])
+    state = crash_report.get('state', {})
+    basic_type = crash_report.get('crash_type', 'unknown')
+
+    # Run all pattern checks
+    pattern_results = {}
+
+    # Memory exhaustion
+    conf, desc = _check_memory_exhaustion(error_message, metrics_history, logs)
+    pattern_results['memory_exhaustion'] = (conf, desc)
+
+    # DOM bloat
+    conf, desc = _check_dom_bloat(error_message, metrics_history, logs)
+    pattern_results['dom_bloat'] = (conf, desc)
+
+    # Rate limited
+    conf, desc = _check_rate_limited(error_message, metrics_history, logs)
+    pattern_results['rate_limited'] = (conf, desc)
+
+    # Consent loop
+    conf, desc = _check_consent_loop(error_message, metrics_history, logs)
+    pattern_results['consent_loop'] = (conf, desc)
+
+    # Scroll timeout
+    conf, desc = _check_scroll_timeout(error_message, metrics_history, logs, state)
+    pattern_results['scroll_timeout'] = (conf, desc)
+
+    # Element stale
+    conf, desc = _check_element_stale(error_message, metrics_history, logs)
+    pattern_results['element_stale'] = (conf, desc)
+
+    # Find the pattern with highest confidence
+    best_pattern = max(pattern_results.items(), key=lambda x: x[1][0])
+    pattern_name = best_pattern[0]
+    confidence = best_pattern[1][0]
+    description = best_pattern[1][1]
+
+    # If confidence is too low, fall back to basic classification
+    if confidence < 0.2:
+        # Map basic crash types to our patterns
+        basic_to_pattern = {
+            'memory_exhaustion': 'memory_exhaustion',
+            'tab_crash': 'memory_exhaustion',  # Tab crashes often from memory
+            'timeout': 'scroll_timeout',
+            'element_not_found': 'element_stale',
+            'rate_limited': 'rate_limited',
+            'network_failure': 'rate_limited',  # Could be blocking
+        }
+
+        if basic_type in basic_to_pattern:
+            pattern_name = basic_to_pattern[basic_type]
+            confidence = 0.3  # Low confidence fallback
+            description = f"Inferred from basic crash type '{basic_type}'"
+        else:
+            pattern_name = 'unknown'
+            confidence = 0.0
+            description = f"Unable to determine crash pattern (basic type: {basic_type})"
+
+    # Generate suggested fix based on pattern
+    suggested_fixes = {
+        'memory_exhaustion': (
+            "Reduce batch size and restart browser more frequently. "
+            "Consider limiting max_reviews to 500 and restarting browser after every 200 reviews."
+        ),
+        'dom_bloat': (
+            "Enable DOM cleanup during scrolling. "
+            "Hide processed review cards and remove separator elements to keep DOM light."
+        ),
+        'rate_limited': (
+            "Increase delays between requests and consider rotating proxies. "
+            "Double the delay multiplier and switch to a different proxy if available."
+        ),
+        'consent_loop': (
+            "Skip consent handling after initial attempt to avoid infinite loops. "
+            "The consent popup may be appearing due to cookie clearing or navigation issues."
+        ),
+        'scroll_timeout': (
+            "The page may have stopped loading new reviews. "
+            "Try reducing the target review count by 10% and accepting partial results."
+        ),
+        'element_stale': (
+            "Page elements are being removed/replaced during scraping. "
+            "Retry operations with freshly-located elements and add defensive waits."
+        ),
+        'unknown': (
+            "Unable to determine specific crash cause. "
+            "Review logs and consider restarting with fresh browser session."
+        )
+    }
+
+    suggested_fix = suggested_fixes.get(pattern_name, suggested_fixes['unknown'])
+    auto_fix_params = AUTO_FIX_PARAMS.get(pattern_name)
+
+    return CrashAnalysis(
+        pattern=pattern_name,
+        confidence=confidence,
+        description=description,
+        suggested_fix=suggested_fix,
+        auto_fix_params=auto_fix_params
+    )
+
+
+def get_auto_fix_params(pattern: str) -> Optional[Dict[str, Any]]:
+    """
+    Get auto-fix parameters for a specific crash pattern.
+
+    Args:
+        pattern: The crash pattern name
+
+    Returns:
+        Dictionary of auto-fix parameters, or None if pattern not recognized
+    """
+    return AUTO_FIX_PARAMS.get(pattern)
+
+
+def apply_auto_fix(pattern: str, current_params: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Apply auto-fix parameters to current scraper parameters.
+
+    Args:
+        pattern: The crash pattern name
+        current_params: Current scraper parameters to modify
+
+    Returns:
+        Updated parameters dictionary with fixes applied
+    """
+    fix_params = AUTO_FIX_PARAMS.get(pattern, {})
+    updated = current_params.copy()
+
+    for key, value in fix_params.items():
+        if key == 'target_reviews' and value == 'current - 10%':
+            # Special case: reduce target by 10%
+            current_target = updated.get('max_reviews', 1000)
+            updated['max_reviews'] = int(current_target * 0.9)
+        elif key == 'delay_multiplier':
+            # Multiply existing delay
+            current_delay = updated.get('scroll_delay', 1.0)
+            updated['scroll_delay'] = current_delay * value
+        else:
+            updated[key] = value
+
+    return updated
+
+
+def summarize_crash_patterns(crash_reports: List[Dict]) -> Dict[str, Any]:
+    """
+    Analyze multiple crash reports to identify recurring patterns.
+
+    Args:
+        crash_reports: List of crash report dictionaries
+
+    Returns:
+        Summary dictionary with pattern frequencies and recommendations
+    """
+    if not crash_reports:
+        return {
+            'total_crashes': 0,
+            'patterns': {},
+            'most_common': None,
+            'recommendations': []
+        }
+
+    pattern_counts: Dict[str, int] = {}
+    pattern_confidences: Dict[str, List[float]] = {}
+
+    for report in crash_reports:
+        analysis = analyze_crash(report)
+        pattern = analysis.pattern
+
+        pattern_counts[pattern] = pattern_counts.get(pattern, 0) + 1
+        if pattern not in pattern_confidences:
+            pattern_confidences[pattern] = []
+        pattern_confidences[pattern].append(analysis.confidence)
+
+    # Calculate average confidence per pattern
+    patterns_summary = {}
+    for pattern, count in pattern_counts.items():
+        avg_confidence = sum(pattern_confidences[pattern]) / len(pattern_confidences[pattern])
+        patterns_summary[pattern] = {
+            'count': count,
+            'percentage': count / len(crash_reports) * 100,
+            'avg_confidence': avg_confidence
+        }
+
+    # Find most common pattern
+    most_common = max(pattern_counts.items(), key=lambda x: x[1])[0] if pattern_counts else None
+
+    # Generate recommendations
+    recommendations = []
+    for pattern, stats in sorted(patterns_summary.items(), key=lambda x: x[1]['count'], reverse=True):
+        if stats['count'] >= 2:  # Only recommend for recurring patterns
+            fix_params = AUTO_FIX_PARAMS.get(pattern)
+            if fix_params:
+                recommendations.append({
+                    'pattern': pattern,
+                    'occurrences': stats['count'],
+                    'auto_fix_params': fix_params
+                })
+
+    return {
+        'total_crashes': len(crash_reports),
+        'patterns': patterns_summary,
+        'most_common': most_common,
+        'recommendations': recommendations
+    }
--- a/utils/date_converter.py
+++ b/utils/date_converter.py
@@ -0,0 +1,391 @@
+"""
+Date conversion utilities for Google Maps reviews.
+"""
+
+import logging
+import re
+from datetime import datetime, timedelta
+from typing import Dict, Any, Optional
+
+# Logger
+log = logging.getLogger("scraper")
+
+
+def relative_to_datetime(date_str: str, lang: str = "en") -> Optional[datetime]:
+    """
+    Convert a relative date string to a datetime object.
+
+    Args:
+        date_str: The relative date string (e.g., "2 years ago")
+        lang: Language code ("en" or "he")
+
+    Returns:
+        datetime object or None if conversion fails
+    """
+    if not date_str:
+        return None
+
+    try:
+        # Convert to ISO format first
+        iso_date = parse_relative_date(date_str, lang)
+
+        # If original string was returned, it wasn't in the expected format
+        if iso_date == date_str:
+            return None
+
+        # Parse the ISO format into datetime
+        return datetime.fromisoformat(iso_date)
+    except Exception as e:
+        log.debug(f"Failed to convert relative date '{date_str}': {e}")
+        return None
+
+
+class DateConverter:
+    """Handler for converting string dates to datetime objects in MongoDB"""
+
+    @staticmethod
+    def convert_dates_in_document(doc: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Convert string dates to datetime objects in a document.
+
+        Args:
+            doc: MongoDB document with string dates
+
+        Returns:
+            Document with string dates converted to datetime objects
+        """
+        # Remove the original date string field if it exists
+        if "date" in doc:
+            original_date = doc.pop("date")
+
+            # Try to use the original date to fix review_date if needed
+            if "review_date" not in doc or not doc["review_date"]:
+                lang = next(iter(doc.get("description", {}).keys()), "en")
+                date_obj = relative_to_datetime(original_date, lang)
+                if date_obj:
+                    doc["review_date"] = date_obj
+
+        # Fields that should be converted to dates
+        date_fields = ["created_date", "last_modified_date", "review_date"]
+
+        # Convert date fields to datetime
+        for field in date_fields:
+            if field in doc and isinstance(doc[field], str):
+                try:
+                    # Try to parse as ISO format first
+                    doc[field] = datetime.fromisoformat(doc[field].replace('Z', '+00:00'))
+                except (ValueError, TypeError):
+                    # If that fails, try parsing as relative date
+                    lang = next(iter(doc.get("description", {}).keys()), "en")
+                    date_obj = relative_to_datetime(doc[field], lang)
+                    if date_obj:
+                        doc[field] = date_obj
+
+        # Handle nested date fields in owner_responses
+        if "owner_responses" in doc and isinstance(doc["owner_responses"], dict):
+            for lang, response in doc["owner_responses"].items():
+                if isinstance(response, dict) and "date" in response:
+                    # Remove the date string field from owner responses
+                    del response["date"]
+
+        return doc
+
+    @staticmethod
+    def convert_dates_in_reviews(reviews: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
+        """
+        Convert string dates to datetime objects for all reviews.
+
+        Args:
+            reviews: Dictionary of review documents
+
+        Returns:
+            Reviews with dates converted to datetime objects
+        """
+        log.info("Converting string dates to datetime objects...")
+
+        for review_id, review in reviews.items():
+            reviews[review_id] = DateConverter.convert_dates_in_document(review)
+
+        return reviews
+
+
+def parse_relative_date(date_str: str, lang: str, now: Optional[datetime] = None) -> str:
+    """
+    Converts a relative review_date (in English or Hebrew) such as "a week ago" or "לפני 7 שנים"
+    into an ISO formatted datetime string (UTC).
+
+    For English, supported formats include:
+       - "a day ago", "an hour ago", "3 weeks ago", "4 months ago", "2 years ago", etc.
+    For Hebrew, supported formats include:
+       - "לפני יום", "לפני 2 ימים", "לפני שבוע", "לפני שבועיים", "לפני חודש",
+         "לפני חודשיים", "לפני 10 חודשים", "לפני שנה", "לפני 3 שנים", etc.
+
+    Parameters:
+      - date_str (str): the relative date string.
+      - lang (str): "en" for English or "he" for Hebrew.
+      - now (Optional[datetime]): reference datetime; if None, current local time is used.
+
+    Returns:
+      A string representing the calculated absolute datetime in ISO 8601 format.
+      If parsing fails in all supported languages, returns a random date within the last year.
+    """
+    import random
+
+    if now is None:
+        now = datetime.utcnow()  # use UTC for consistency
+
+    # Try with the provided language first
+    result = try_parse_date(date_str, lang, now)
+    if result != date_str:
+        return result
+
+    # If the provided language failed, try other supported languages
+    supported_langs = ["en", "he", "th"]
+    for alt_lang in supported_langs:
+        if alt_lang != lang.lower():
+            result = try_parse_date(date_str, alt_lang, now)
+            if result != date_str:
+                return result
+
+    # If all parsing attempts failed, generate a random date within the last year
+    # This creates a date between 1 day ago and 365 days ago
+    random_days_ago = random.randint(1, 365)
+    random_date = now - timedelta(days=random_days_ago)
+    return random_date.isoformat()
+
+
+def try_parse_date(date_str: str, lang: str, now: datetime) -> str:
+    """
+    Helper function that attempts to parse a date string in a specific language.
+
+    Returns the ISO formatted date if successful, or the original string if not.
+    """
+    delta = timedelta(0)
+    parsed = False
+
+    if lang.lower() == "en":
+        # Pattern: capture number or "a"/"an", then unit.
+        pattern = re.compile(r'(?P<num>a|an|\d+)\s+(?P<unit>day|week|month|year)s?\s+ago', re.IGNORECASE)
+        m = pattern.search(date_str)
+        if m:
+            num_str = m.group("num").lower()
+            num = 1 if num_str in ("a", "an") else int(num_str)
+            unit = m.group("unit").lower()
+            if unit == "day":
+                delta = timedelta(days=num)
+            elif unit == "week":
+                delta = timedelta(weeks=num)
+            elif unit == "month":
+                delta = timedelta(days=30 * num)  # approximate
+            elif unit == "year":
+                delta = timedelta(days=365 * num)  # approximate
+            parsed = True
+    elif lang.lower() == "he":
+        # Remove the "לפני" prefix if present
+        text = date_str.strip()
+        if text.startswith("לפני"):
+            text = text[len("לפני"):].strip()
+
+        # Handle special cases where the number and unit are combined:
+        special = {
+            "חודשיים": (2, "month"),
+            "שבועיים": (2, "week"),
+            "יומיים": (2, "day"),
+        }
+        if text in special:
+            num, unit = special[text]
+            if unit == "day":
+                delta = timedelta(days=num)
+            elif unit == "week":
+                delta = timedelta(weeks=num)
+            elif unit == "month":
+                delta = timedelta(days=30 * num)  # approximate
+            parsed = True
+        else:
+            # Match optional number (or assume 1) and then a unit.
+            pattern = re.compile(r'(?P<num>\d+|אחד|אחת)?\s*(?P<unit>שנה|שנים|חודש|חודשים|יום|ימים|שבוע|שבועות)',
+                                 re.IGNORECASE)
+            m = pattern.search(text)
+            if m:
+                num_str = m.group("num")
+                if not num_str:
+                    num = 1
+                else:
+                    try:
+                        num = int(num_str)
+                    except ValueError:
+                        num = 1
+                unit_he = m.group("unit")
+                # Map the Hebrew unit (both singular and plural) to English unit names
+                if unit_he in ("יום", "ימים"):
+                    unit = "day"
+                elif unit_he in ("שבוע", "שבועות"):
+                    unit = "week"
+                elif unit_he in ("חודש", "חודשים"):
+                    unit = "month"
+                elif unit_he in ("שנה", "שנים"):
+                    unit = "year"
+                else:
+                    unit = "day"  # fallback
+
+                if unit == "day":
+                    delta = timedelta(days=num)
+                elif unit == "week":
+                    delta = timedelta(weeks=num)
+                elif unit == "month":
+                    delta = timedelta(days=30 * num)  # approximate
+                elif unit == "year":
+                    delta = timedelta(days=365 * num)  # approximate
+                parsed = True
+    elif lang.lower() == "th":
+        # Thai language patterns (simplified)
+        # Check for Thai patterns like "3 วันที่แล้ว" (3 days ago)
+        thai_pattern = re.compile(r'(?P<num>\d+)?\s*(?P<unit>วัน|สัปดาห์|เดือน|ปี)ที่แล้ว', re.IGNORECASE)
+        m = thai_pattern.search(date_str)
+        if m:
+            num_str = m.group("num")
+            num = 1 if not num_str else int(num_str)
+            unit_th = m.group("unit")
+
+            # Map Thai units to English
+            if unit_th == "วัน":
+                unit = "day"
+            elif unit_th == "สัปดาห์":
+                unit = "week"
+            elif unit_th == "เดือน":
+                unit = "month"
+            elif unit_th == "ปี":
+                unit = "year"
+            else:
+                unit = "day"  # fallback
+
+            if unit == "day":
+                delta = timedelta(days=num)
+            elif unit == "week":
+                delta = timedelta(weeks=num)
+            elif unit == "month":
+                delta = timedelta(days=30 * num)  # approximate
+            elif unit == "year":
+                delta = timedelta(days=365 * num)  # approximate
+            parsed = True
+
+    # Return the calculated date if parsing was successful, otherwise return the original string
+    if parsed:
+        result = now - delta
+        return result.isoformat()
+    else:
+        return date_str
+
+
+# def parse_relative_date(date_str: str, lang: str, now: Optional[datetime] = None) -> str:
+#     """
+#     Converts a relative review_date (in English or Hebrew) such as "a week ago" or "לפני 7 שנים"
+#     into an ISO formatted datetime string (UTC).
+#
+#     For English, supported formats include:
+#        - "a day ago", "an hour ago", "3 weeks ago", "4 months ago", "2 years ago", etc.
+#     For Hebrew, supported formats include:
+#        - "לפני יום", "לפני 2 ימים", "לפני שבוע", "לפני שבועיים", "לפני חודש",
+#          "לפני חודשיים", "לפני 10 חודשים", "לפני שנה", "לפני 3 שנים", etc.
+#
+#     Parameters:
+#       - date_str (str): the relative date string.
+#       - lang (str): "en" for English or "he" for Hebrew.
+#       - now (Optional[datetime]): reference datetime; if None, current local time is used.
+#
+#     Returns:
+#       A string representing the calculated absolute datetime in ISO 8601 format,
+#       or the original date_str if parsing fails.
+#     """
+#     if now is None:
+#         now = datetime.utcnow()  # use UTC for consistency
+#
+#     delta = timedelta(0)
+#
+#     if lang.lower() == "en":
+#         # Pattern: capture number or "a"/"an", then unit.
+#         pattern = re.compile(r'(?P<num>a|an|\d+)\s+(?P<unit>day|week|month|year)s?\s+ago', re.IGNORECASE)
+#         m = pattern.search(date_str)
+#         if m:
+#             num_str = m.group("num").lower()
+#             num = 1 if num_str in ("a", "an") else int(num_str)
+#             unit = m.group("unit").lower()
+#             if unit == "day":
+#                 delta = timedelta(days=num)
+#             elif unit == "week":
+#                 delta = timedelta(weeks=num)
+#             elif unit == "month":
+#                 delta = timedelta(days=30 * num)  # approximate
+#             elif unit == "year":
+#                 delta = timedelta(days=365 * num)  # approximate
+#         else:
+#             return date_str  # return original if not matched
+#     elif lang.lower() == "he":
+#         # Remove the "לפני" prefix if present
+#         text = date_str.strip()
+#         if text.startswith("לפני"):
+#             text = text[len("לפני"):].strip()
+#
+#         # Handle special cases where the number and unit are combined:
+#         special = {
+#             "חודשיים": (2, "month"),
+#             "שבועיים": (2, "week"),
+#             "יומיים": (2, "day"),
+#         }
+#         if text in special:
+#             num, unit = special[text]
+#         else:
+#             # Match optional number (or assume 1) and then a unit.
+#             pattern = re.compile(r'(?P<num>\d+|אחד|אחת)?\s*(?P<unit>שנה|שנים|חודש|חודשים|יום|ימים|שבוע|שבועות)',
+#                                  re.IGNORECASE)
+#             m = pattern.search(text)
+#             if m:
+#                 num_str = m.group("num")
+#                 if not num_str:
+#                     num = 1
+#                 else:
+#                     try:
+#                         num = int(num_str)
+#                     except ValueError:
+#                         num = 1
+#                 unit_he = m.group("unit")
+#                 # Map the Hebrew unit (both singular and plural) to English unit names
+#                 if unit_he in ("יום", "ימים"):
+#                     unit = "day"
+#                 elif unit_he in ("שבוע", "שבועות"):
+#                     unit = "week"
+#                 elif unit_he in ("חודש", "חודשים"):
+#                     unit = "month"
+#                 elif unit_he in ("שנה", "שנים"):
+#                     unit = "year"
+#                 else:
+#                     unit = "day"  # fallback
+#             else:
+#                 return date_str  # if nothing matches, return original text
+#
+#         if unit == "day":
+#             delta = timedelta(days=num)
+#         elif unit == "week":
+#             delta = timedelta(weeks=num)
+#         elif unit == "month":
+#             delta = timedelta(days=30 * num)  # approximate
+#         elif unit == "year":
+#             delta = timedelta(days=365 * num)  # approximate
+#
+#     result = now - delta
+#     return result.isoformat()
+
+
+# --- Example usage ---
+if __name__ == "__main__":
+    # Fixed reference time for reproducibility:
+    fixed_now = datetime(2025, 2, 5, 12, 0, 0)
+    examples = [
+        ("a week ago", "he"),
+        ("4 weeks ago", "en"),
+        ("לפני 7 שנים", "he"),
+        ("לפני חודשיים", "he")
+    ]
+    for text, lang in examples:
+        iso_date = parse_relative_date(text, lang, now=fixed_now)
+        print(f"Original: {text} ({lang}) => ISO: {iso_date}")
--- a/utils/health_checks.py
+++ b/utils/health_checks.py
@@ -0,0 +1,411 @@
+#!/usr/bin/env python3
+"""
+Smart health check system with canary testing.
+Verifies that scraping actually works, not just that services are up.
+"""
+import asyncio
+import logging
+from datetime import datetime, timedelta
+from typing import Dict, Any, Optional
+import os
+
+log = logging.getLogger(__name__)
+
+
+class CanaryMonitor:
+    """
+    Background canary test monitor.
+
+    Runs actual scraping tests periodically to verify the scraper works.
+    This catches issues like:
+    - Google Maps page structure changes
+    - Broken CSS selectors
+    - GDPR consent handling issues
+    - Network/proxy problems
+    - Chrome/browser issues
+    """
+
+    def __init__(
+        self,
+        db,
+        interval_hours: int = 4,
+        test_url: Optional[str] = None
+    ):
+        """
+        Initialize canary monitor.
+
+        Args:
+            db: Database manager instance
+            interval_hours: How often to run canary tests
+            test_url: Optional test URL (defaults to Soho Factory in Vilnius)
+        """
+        self.db = db
+        self.interval = timedelta(hours=interval_hours)
+        self.test_url = test_url or os.getenv(
+            'CANARY_TEST_URL',
+            'https://www.google.com/maps/place/Soho+Factory/@54.6738155,25.2595844,17z/'
+        )
+
+        self.running = False
+        self.last_run: Optional[datetime] = None
+        self.last_success: Optional[datetime] = None
+        self.consecutive_failures = 0
+        self.last_result: Optional[Dict[str, Any]] = None
+
+    async def start(self):
+        """Start the background canary monitoring"""
+        self.running = True
+        log.info(f"Canary monitor started (interval: {self.interval.total_seconds()/3600:.1f}h)")
+
+        while self.running:
+            try:
+                await self.run_canary_test()
+            except Exception as e:
+                log.error(f"Canary test failed with exception: {e}")
+                self.consecutive_failures += 1
+
+                # Alert if multiple consecutive failures
+                if self.consecutive_failures >= 3:
+                    await self.send_alert(
+                        f"CRITICAL: Scraper canary failed {self.consecutive_failures} times in a row! "
+                        f"Last error: {str(e)[:200]}"
+                    )
+
+            # Sleep until next run
+            await asyncio.sleep(self.interval.total_seconds())
+
+    def stop(self):
+        """Stop the background monitoring"""
+        self.running = False
+        log.info("Canary monitor stopped")
+
+    async def run_canary_test(self):
+        """
+        Run a single canary test.
+
+        This performs an actual scrape on a known test URL and validates:
+        - Scraping succeeds
+        - Reviews are extracted
+        - Review count is reasonable
+        - Scrape time is reasonable
+        - Data structure is valid
+        """
+        from scrapers.google_reviews.v1_0_0 import fast_scrape_reviews
+
+        log.info(f"Running canary scrape test on {self.test_url[:60]}...")
+        self.last_run = datetime.now()
+
+        try:
+            # Run actual scrape with timeout
+            result = await asyncio.wait_for(
+                asyncio.to_thread(
+                    fast_scrape_reviews,
+                    url=self.test_url,
+                    headless=True,
+                    max_scrolls=10  # Limited for canary
+                ),
+                timeout=60  # Fail if takes > 60s
+            )
+
+            # Validate result
+            checks = {
+                "scrape_succeeded": result['success'],
+                "got_reviews": result['count'] > 0,
+                "reasonable_count": 10 <= result['count'] <= 500,
+                "reasonable_time": result['time'] < 30,
+                "data_structure_valid": self._validate_review_structure(result.get('reviews', []))
+            }
+
+            all_passed = all(checks.values())
+
+            if all_passed:
+                # Success!
+                log.info(
+                    f"Canary test PASSED: {result['count']} reviews in {result['time']:.1f}s"
+                )
+                self.consecutive_failures = 0
+                self.last_success = datetime.now()
+                self.last_result = {
+                    "status": "pass",
+                    "reviews_count": result['count'],
+                    "scrape_time": result['time'],
+                    "checks": checks
+                }
+
+                # Save to database
+                await self.db.save_canary_result(
+                    success=True,
+                    reviews_count=result['count'],
+                    scrape_time=result['time'],
+                    metadata={"checks": checks}
+                )
+
+            else:
+                # Validation failed
+                failed_checks = [k for k, v in checks.items() if not v]
+                log.error(
+                    f"Canary test FAILED: validation failed on {failed_checks}"
+                )
+                self.consecutive_failures += 1
+                self.last_result = {
+                    "status": "fail",
+                    "reviews_count": result['count'],
+                    "scrape_time": result['time'],
+                    "checks": checks,
+                    "failed_checks": failed_checks
+                }
+
+                # Save to database
+                await self.db.save_canary_result(
+                    success=False,
+                    reviews_count=result['count'],
+                    scrape_time=result['time'],
+                    error_message=f"Validation failed: {failed_checks}",
+                    metadata={"checks": checks}
+                )
+
+                # Alert on failure
+                if self.consecutive_failures >= 3:
+                    await self.send_alert(
+                        f"CRITICAL: Canary validation failed {self.consecutive_failures} times! "
+                        f"Failed checks: {failed_checks}"
+                    )
+
+        except asyncio.TimeoutError:
+            log.error("Canary test TIMEOUT (>60s)")
+            self.consecutive_failures += 1
+            self.last_result = {
+                "status": "timeout",
+                "error": "Scrape took longer than 60 seconds"
+            }
+
+            await self.db.save_canary_result(
+                success=False,
+                error_message="Timeout after 60 seconds"
+            )
+
+            if self.consecutive_failures >= 3:
+                await self.send_alert(
+                    f"CRITICAL: Canary timeout {self.consecutive_failures} times!"
+                )
+
+        except Exception as e:
+            log.error(f"Canary test ERROR: {e}")
+            self.consecutive_failures += 1
+            self.last_result = {
+                "status": "error",
+                "error": str(e)
+            }
+
+            await self.db.save_canary_result(
+                success=False,
+                error_message=str(e)
+            )
+
+            raise  # Re-raise to trigger alert in main loop
+
+    def _validate_review_structure(self, reviews) -> bool:
+        """
+        Validate that reviews have expected structure.
+
+        Args:
+            reviews: List of review dictionaries
+
+        Returns:
+            True if structure is valid
+        """
+        if not reviews or len(reviews) == 0:
+            return False
+
+        # Check first review has required fields
+        first_review = reviews[0]
+        required_fields = ['author', 'rating', 'date_text']
+
+        return all(field in first_review for field in required_fields)
+
+    async def send_alert(self, message: str):
+        """
+        Send alert via configured channels.
+
+        Args:
+            message: Alert message to send
+        """
+        log.critical(message)
+
+        # TODO: Integrate with alerting systems
+        # Examples:
+
+        # Slack
+        slack_webhook = os.getenv('SLACK_WEBHOOK_URL')
+        if slack_webhook:
+            try:
+                import httpx
+                async with httpx.AsyncClient() as client:
+                    await client.post(
+                        slack_webhook,
+                        json={"text": message},
+                        timeout=5.0
+                    )
+                log.info("Alert sent to Slack")
+            except Exception as e:
+                log.error(f"Failed to send Slack alert: {e}")
+
+        # Email (example with SMTP)
+        # smtp_config = os.getenv('SMTP_CONFIG')
+        # if smtp_config:
+        #     await send_email(
+        #         to=os.getenv('ALERT_EMAIL'),
+        #         subject="Scraper Canary Alert",
+        #         body=message
+        #     )
+
+        # PagerDuty
+        # pagerduty_key = os.getenv('PAGERDUTY_KEY')
+        # if pagerduty_key:
+        #     await trigger_pagerduty(message)
+
+    def get_status(self) -> Dict[str, Any]:
+        """
+        Get current canary status.
+
+        Returns:
+            Status dictionary
+        """
+        if not self.last_success:
+            return {
+                "status": "unknown",
+                "message": "No canary tests run yet",
+                "last_run": self.last_run.isoformat() if self.last_run else None
+            }
+
+        age = datetime.now() - self.last_success
+        max_age = timedelta(hours=6)  # Alert if no success in 6 hours
+
+        if age > max_age:
+            return {
+                "status": "stale",
+                "last_success": self.last_success.isoformat(),
+                "age_hours": age.total_seconds() / 3600,
+                "consecutive_failures": self.consecutive_failures,
+                "message": f"Last successful canary was {age.total_seconds()/3600:.1f} hours ago"
+            }
+
+        return {
+            "status": "healthy",
+            "last_success": self.last_success.isoformat(),
+            "last_run": self.last_run.isoformat() if self.last_run else None,
+            "age_minutes": age.total_seconds() / 60,
+            "consecutive_failures": self.consecutive_failures,
+            "last_result": self.last_result
+        }
+
+
+class HealthCheckSystem:
+    """
+    Complete health check system for production.
+
+    Provides multiple levels of health checks:
+    - Liveness: Is the server alive?
+    - Readiness: Can it handle traffic?
+    - Canary: Does scraping actually work?
+    """
+
+    def __init__(self, db):
+        """
+        Initialize health check system.
+
+        Args:
+            db: Database manager instance
+        """
+        self.db = db
+        self.canary = CanaryMonitor(db, interval_hours=4)
+
+    async def start(self):
+        """Start background health monitoring"""
+        asyncio.create_task(self.canary.start())
+
+    def stop(self):
+        """Stop background health monitoring"""
+        self.canary.stop()
+
+    async def check_liveness(self) -> Dict[str, Any]:
+        """
+        Liveness check: Is the server alive?
+
+        This is a simple check that always succeeds if the server is running.
+        Used by Kubernetes liveness probe - restart container if fails.
+
+        Returns:
+            Liveness status
+        """
+        return {
+            "status": "alive",
+            "timestamp": datetime.utcnow().isoformat()
+        }
+
+    async def check_readiness(self) -> Dict[str, Any]:
+        """
+        Readiness check: Can the server handle traffic?
+
+        Checks if dependencies are available.
+        Used by Kubernetes readiness probe - remove from load balancer if fails.
+
+        Returns:
+            Readiness status
+        """
+        checks = {}
+
+        # Check database
+        try:
+            await self.db.pool.fetchval("SELECT 1")
+            checks["database"] = {"healthy": True}
+        except Exception as e:
+            checks["database"] = {"healthy": False, "error": str(e)}
+
+        # Overall readiness
+        all_healthy = all(c.get("healthy", False) for c in checks.values())
+
+        return {
+            "status": "ready" if all_healthy else "not_ready",
+            "checks": checks,
+            "timestamp": datetime.utcnow().isoformat()
+        }
+
+    async def check_canary(self) -> Dict[str, Any]:
+        """
+        Canary check: Does scraping actually work?
+
+        Returns the latest canary test result.
+        Used by external monitoring (PagerDuty, DataDog) for alerts.
+
+        Returns:
+            Canary status
+        """
+        return self.canary.get_status()
+
+    async def get_detailed_health(self) -> Dict[str, Any]:
+        """
+        Get detailed health status of all components.
+
+        Returns:
+            Complete health status
+        """
+        liveness = await self.check_liveness()
+        readiness = await self.check_readiness()
+        canary = await self.check_canary()
+
+        overall_healthy = (
+            liveness["status"] == "alive" and
+            readiness["status"] == "ready" and
+            canary["status"] in ["healthy", "unknown"]  # Unknown is OK (first run)
+        )
+
+        return {
+            "status": "healthy" if overall_healthy else "degraded",
+            "components": {
+                "liveness": liveness,
+                "readiness": readiness,
+                "canary": canary
+            },
+            "timestamp": datetime.utcnow().isoformat()
+        }
--- a/utils/helpers.py
+++ b/utils/helpers.py
@@ -0,0 +1,307 @@
+"""
+Utility functions for Google Maps Reviews Scraper.
+"""
+import datetime
+import logging
+import re
+import time
+from datetime import timezone
+from functools import lru_cache
+from typing import List
+
+from selenium.common.exceptions import (NoSuchElementException,
+                                        StaleElementReferenceException,
+                                        TimeoutException)
+from selenium.webdriver import Chrome
+from selenium.webdriver.common.by import By
+from selenium.webdriver.remote.webelement import WebElement
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
+
+# Logger
+log = logging.getLogger("scraper")
+
+# Constants for language detection
+HEB_CHARS = re.compile(r"[\u0590-\u05FF]")
+THAI_CHARS = re.compile(r"[\u0E00-\u0E7F]")
+
+
+@lru_cache(maxsize=1024)
+def detect_lang(txt: str) -> str:
+    """Detect language based on character sets"""
+    if HEB_CHARS.search(txt):  return "he"
+    if THAI_CHARS.search(txt): return "th"
+    return "en"
+
+
+@lru_cache(maxsize=128)
+def safe_int(s: str | None) -> int:
+    """Safely convert string to integer, returning 0 if not possible"""
+    m = re.search(r"\d+", s or "")
+    return int(m.group()) if m else 0
+
+
+def try_find(el: WebElement, css: str, *, all=False) -> List[WebElement]:
+    """Safely find elements by CSS selector without raising exceptions"""
+    try:
+        if all:
+            return el.find_elements(By.CSS_SELECTOR, css)
+        obj = el.find_element(By.CSS_SELECTOR, css)
+        return [obj] if obj else []
+    except (NoSuchElementException, StaleElementReferenceException):
+        return []
+
+
+def first_text(el: WebElement, css: str) -> str:
+    """Get text from the first matching element that has non-empty text"""
+    for e in try_find(el, css, all=True):
+        try:
+            if (t := e.text.strip()):
+                return t
+        except StaleElementReferenceException:
+            continue
+    return ""
+
+
+def parse_date_to_iso(date_str: str) -> str:
+    """
+    Parse date strings like "2 weeks ago", "January 2023", etc. into ISO format.
+    Returns a best-effort ISO string, or empty string if parsing fails.
+    """
+    if not date_str:
+        return ""
+
+    try:
+        now = datetime.now(timezone.utc)
+
+        # Handle relative dates
+        if "ago" in date_str.lower():
+            # For simplicity, map to approximate dates
+            if "minute" in date_str.lower():
+                minutes = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                dt = now.replace(microsecond=0) - timezone.timedelta(minutes=minutes)
+            elif "hour" in date_str.lower():
+                hours = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                dt = now.replace(microsecond=0) - timezone.timedelta(hours=hours)
+            elif "day" in date_str.lower():
+                days = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                dt = now.replace(microsecond=0) - timezone.timedelta(days=days)
+            elif "week" in date_str.lower():
+                weeks = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                dt = now.replace(microsecond=0) - timezone.timedelta(weeks=weeks)
+            elif "month" in date_str.lower():
+                months = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                # Approximate months as 30 days
+                dt = now.replace(microsecond=0) - timezone.timedelta(days=30 * months)
+            elif "year" in date_str.lower():
+                years = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                # Approximate years as 365 days
+                dt = now.replace(microsecond=0) - timezone.timedelta(days=365 * years)
+            else:
+                # Default to current time if can't parse
+                dt = now.replace(microsecond=0)
+        else:
+            # Handle absolute dates (month year format)
+            # This is a simplification - would need more robust parsing for production
+            dt = now.replace(microsecond=0)
+
+        return dt.isoformat()
+    except Exception:
+        # If parsing fails, return empty string
+        return ""
+
+
+def first_attr(el: WebElement, css: str, attr: str) -> str:
+    """Get attribute value from the first matching element that has a non-empty value"""
+    for e in try_find(el, css, all=True):
+        try:
+            if (v := (e.get_attribute(attr) or "").strip()):
+                return v
+        except StaleElementReferenceException:
+            continue
+    return ""
+
+
+def click_if(driver: Chrome, css: str, delay: float = .25, timeout: float = 5.0) -> bool:
+    """
+    Click element if it exists and is clickable, with timeout and better error handling.
+
+    Args:
+        driver: WebDriver instance
+        css: CSS selector for the element to click
+        delay: Time to wait after clicking (seconds)
+        timeout: Maximum time to wait for element (seconds)
+
+    Returns:
+        True if element was found and clicked, False otherwise
+    """
+    try:
+        # First check if elements exist at all
+        elements = driver.find_elements(By.CSS_SELECTOR, css)
+        if not elements:
+            return False
+
+        # Try clicking the first visible element
+        for element in elements:
+            try:
+                if element.is_displayed() and element.is_enabled():
+                    element.click()
+                    time.sleep(delay)
+                    return True
+            except Exception:
+                # Try next element if this one fails
+                continue
+
+        # If we couldn't click any of the direct elements, try with WebDriverWait
+        try:
+            WebDriverWait(driver, timeout).until(
+                EC.element_to_be_clickable((By.CSS_SELECTOR, css))
+            ).click()
+            time.sleep(delay)
+            return True
+        except TimeoutException:
+            return False
+
+    except Exception as e:
+        log.debug(f"Error in click_if: {str(e)}")
+        return False
+
+
+def get_current_iso_date() -> str:
+    """Return current UTC time in ISO format."""
+    from datetime import datetime, timezone
+    return datetime.now(timezone.utc).isoformat()
+
+# """
+# Utility functions for Google Maps Reviews Scraper.
+# """
+#
+# import re
+# import time
+# import logging
+# from datetime import datetime, timezone
+# from functools import lru_cache
+# from typing import List, Optional
+#
+# from selenium.common.exceptions import (NoSuchElementException,
+#                                        StaleElementReferenceException,
+#                                        TimeoutException)
+# from selenium.webdriver import Chrome
+# from selenium.webdriver.common.by import By
+# from selenium.webdriver.remote.webelement import WebElement
+# from selenium.webdriver.support import expected_conditions as EC
+# from selenium.webdriver.support.ui import WebDriverWait
+#
+# # Constants for language detection
+# HEB_CHARS = re.compile(r"[\u0590-\u05FF]")
+# THAI_CHARS = re.compile(r"[\u0E00-\u0E7F]")
+#
+# # Logger
+# log = logging.getLogger("scraper")
+#
+#
+# @lru_cache(maxsize=1024)
+# def detect_lang(txt: str) -> str:
+#     """Detect language based on character sets"""
+#     if HEB_CHARS.search(txt):  return "he"
+#     if THAI_CHARS.search(txt): return "th"
+#     return "en"
+#
+#
+# @lru_cache(maxsize=128)
+# def safe_int(s: str | None) -> int:
+#     """Safely convert string to integer, returning 0 if not possible"""
+#     m = re.search(r"\d+", s or "")
+#     return int(m.group()) if m else 0
+#
+#
+# def try_find(el: WebElement, css: str, *, all=False) -> List[WebElement]:
+#     """Safely find elements by CSS selector without raising exceptions"""
+#     try:
+#         if all:
+#             return el.find_elements(By.CSS_SELECTOR, css)
+#         obj = el.find_element(By.CSS_SELECTOR, css)
+#         return [obj] if obj else []
+#     except (NoSuchElementException, StaleElementReferenceException):
+#         return []
+#
+#
+# def first_text(el: WebElement, css: str) -> str:
+#     """Get text from the first matching element that has non-empty text"""
+#     for e in try_find(el, css, all=True):
+#         if (t := e.text.strip()):
+#             return t
+#     return ""
+#
+#
+# def first_attr(el: WebElement, css: str, attr: str) -> str:
+#     """Get attribute value from the first matching element that has a non-empty value"""
+#     for e in try_find(el, css, all=True):
+#         if (v := (e.get_attribute(attr) or "").strip()):
+#             return v
+#     return ""
+#
+#
+# def click_if(driver: Chrome, css: str, delay: float = .25, timeout: float = 5.0) -> bool:
+#     """Click element if it exists and is clickable, with timeout"""
+#     try:
+#         WebDriverWait(driver, timeout).until(
+#             EC.element_to_be_clickable((By.CSS_SELECTOR, css))
+#         ).click()
+#         time.sleep(delay)
+#         return True
+#     except TimeoutException:
+#         return False
+#
+#
+# def parse_date_to_iso(date_str: str) -> str:
+#     """
+#     Parse date strings like "2 weeks ago", "January 2023", etc. into ISO format.
+#     Returns a best-effort ISO string, or empty string if parsing fails.
+#     """
+#     if not date_str:
+#         return ""
+#
+#     try:
+#         now = datetime.now(timezone.utc)
+#
+#         # Handle relative dates
+#         if "ago" in date_str.lower():
+#             # For simplicity, map to approximate dates
+#             if "minute" in date_str.lower():
+#                 minutes = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(minutes=minutes)
+#             elif "hour" in date_str.lower():
+#                 hours = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(hours=hours)
+#             elif "day" in date_str.lower():
+#                 days = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(days=days)
+#             elif "week" in date_str.lower():
+#                 weeks = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(weeks=weeks)
+#             elif "month" in date_str.lower():
+#                 months = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 # Approximate months as 30 days
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(days=30 * months)
+#             elif "year" in date_str.lower():
+#                 years = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 # Approximate years as 365 days
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(days=365 * years)
+#             else:
+#                 # Default to current time if can't parse
+#                 dt = now.replace(microsecond=0)
+#         else:
+#             # Handle absolute dates (month year format)
+#             # This is a simplification - would need more robust parsing for production
+#             dt = now.replace(microsecond=0)
+#
+#         return dt.isoformat()
+#     except Exception:
+#         # If parsing fails, return empty string
+#         return ""
+#
+#
+# def get_current_iso_date() -> str:
+#     """Return current UTC time in ISO format."""
+#     return datetime.now(timezone.utc).isoformat()
--- a/utils/logger.py
+++ b/utils/logger.py
@@ -0,0 +1,250 @@
+"""
+Structured Logger Module
+
+Provides a thread-safe, structured logging system with JSON-serializable output.
+Designed to replace the LogCapture class with enhanced categorization and metrics support.
+"""
+
+from dataclasses import dataclass, field, asdict
+from datetime import datetime, timezone
+from typing import Dict, List, Literal, Optional
+import threading
+import time
+
+
+LogLevel = Literal['DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL']
+LogCategory = Literal['scraper', 'browser', 'network', 'system']
+
+
+@dataclass
+class LogEntry:
+    """Structured log entry with timestamp, level, category, and optional metrics."""
+    timestamp: str  # ISO 8601 with Z suffix
+    timestamp_ms: int  # Unix milliseconds
+    level: LogLevel
+    category: LogCategory
+    message: str
+    metrics: Optional[Dict] = None  # memory_mb, reviews_count, scroll_position, dom_nodes, etc.
+    network: Optional[Dict] = None  # url, method, status, size_bytes, duration_ms
+    snapshot_id: Optional[str] = None
+
+    def to_dict(self) -> Dict:
+        """Convert to JSON-serializable dictionary, excluding None values."""
+        result = {
+            'timestamp': self.timestamp,
+            'timestamp_ms': self.timestamp_ms,
+            'level': self.level,
+            'category': self.category,
+            'message': self.message,
+        }
+        if self.metrics is not None:
+            result['metrics'] = self.metrics
+        if self.network is not None:
+            result['network'] = self.network
+        if self.snapshot_id is not None:
+            result['snapshot_id'] = self.snapshot_id
+        return result
+
+
+class StructuredLogger:
+    """
+    Thread-safe structured logger with categorized log entries and automatic pruning.
+
+    Example usage:
+        logger = StructuredLogger()
+        logger.info('browser', 'Navigating to URL', metrics={'memory_mb': 245})
+        logger.warn('network', 'Rate limit detected', network={'status': 429, 'url': '...'})
+        logger.error('system', 'Chrome crashed', metrics={'memory_mb': 489, 'dom_nodes': 12000})
+    """
+
+    def __init__(self, max_entries: int = 10000):
+        """
+        Initialize the structured logger.
+
+        Args:
+            max_entries: Maximum number of log entries to retain (default 10000).
+                        Oldest entries are pruned when limit is exceeded.
+        """
+        self._entries: List[LogEntry] = []
+        self._lock = threading.Lock()
+        self._max_entries = max_entries
+
+    def _create_entry(
+        self,
+        level: LogLevel,
+        category: LogCategory,
+        message: str,
+        metrics: Optional[Dict] = None,
+        network: Optional[Dict] = None,
+        snapshot_id: Optional[str] = None,
+    ) -> LogEntry:
+        """Create a new log entry with current timestamp."""
+        now = datetime.now(timezone.utc)
+        timestamp = now.strftime('%Y-%m-%dT%H:%M:%S.') + f'{now.microsecond // 1000:03d}Z'
+        timestamp_ms = int(now.timestamp() * 1000)
+
+        return LogEntry(
+            timestamp=timestamp,
+            timestamp_ms=timestamp_ms,
+            level=level,
+            category=category,
+            message=message,
+            metrics=metrics,
+            network=network,
+            snapshot_id=snapshot_id,
+        )
+
+    def _add_entry(self, entry: LogEntry) -> None:
+        """Add an entry to the log with thread-safety and automatic pruning."""
+        with self._lock:
+            self._entries.append(entry)
+            # Prune oldest entries if limit exceeded
+            if len(self._entries) > self._max_entries:
+                # Remove oldest 10% to avoid frequent pruning
+                prune_count = max(1, self._max_entries // 10)
+                self._entries = self._entries[prune_count:]
+
+    def debug(
+        self,
+        category: LogCategory,
+        message: str,
+        *,
+        metrics: Optional[Dict] = None,
+        network: Optional[Dict] = None,
+        snapshot_id: Optional[str] = None,
+    ) -> None:
+        """Log a DEBUG level message."""
+        entry = self._create_entry('DEBUG', category, message, metrics, network, snapshot_id)
+        self._add_entry(entry)
+
+    def info(
+        self,
+        category: LogCategory,
+        message: str,
+        *,
+        metrics: Optional[Dict] = None,
+        network: Optional[Dict] = None,
+        snapshot_id: Optional[str] = None,
+    ) -> None:
+        """Log an INFO level message."""
+        entry = self._create_entry('INFO', category, message, metrics, network, snapshot_id)
+        self._add_entry(entry)
+
+    def warn(
+        self,
+        category: LogCategory,
+        message: str,
+        *,
+        metrics: Optional[Dict] = None,
+        network: Optional[Dict] = None,
+        snapshot_id: Optional[str] = None,
+    ) -> None:
+        """Log a WARN level message."""
+        entry = self._create_entry('WARN', category, message, metrics, network, snapshot_id)
+        self._add_entry(entry)
+
+    def error(
+        self,
+        category: LogCategory,
+        message: str,
+        *,
+        metrics: Optional[Dict] = None,
+        network: Optional[Dict] = None,
+        snapshot_id: Optional[str] = None,
+    ) -> None:
+        """Log an ERROR level message."""
+        entry = self._create_entry('ERROR', category, message, metrics, network, snapshot_id)
+        self._add_entry(entry)
+
+    def fatal(
+        self,
+        category: LogCategory,
+        message: str,
+        *,
+        metrics: Optional[Dict] = None,
+        network: Optional[Dict] = None,
+        snapshot_id: Optional[str] = None,
+    ) -> None:
+        """Log a FATAL level message."""
+        entry = self._create_entry('FATAL', category, message, metrics, network, snapshot_id)
+        self._add_entry(entry)
+
+    def log(self, message: str, level: str = 'INFO') -> None:
+        """
+        Backward-compatible log method for legacy code.
+
+        Maps to 'system' category by default.
+
+        Args:
+            message: The log message
+            level: Log level as string (DEBUG, INFO, WARN, ERROR, FATAL)
+        """
+        level_upper = level.upper()
+        if level_upper not in ('DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'):
+            level_upper = 'INFO'
+
+        entry = self._create_entry(level_upper, 'system', message)
+        self._add_entry(entry)
+
+    def get_logs(self) -> List[Dict]:
+        """
+        Get all log entries as JSON-serializable dictionaries.
+
+        Returns:
+            List of log entry dictionaries.
+        """
+        with self._lock:
+            return [entry.to_dict() for entry in self._entries]
+
+    def get_logs_by_category(self, category: LogCategory) -> List[Dict]:
+        """
+        Get log entries filtered by category.
+
+        Args:
+            category: The category to filter by ('scraper', 'browser', 'network', 'system')
+
+        Returns:
+            List of log entry dictionaries matching the category.
+        """
+        with self._lock:
+            return [entry.to_dict() for entry in self._entries if entry.category == category]
+
+    def get_logs_by_level(self, level: LogLevel) -> List[Dict]:
+        """
+        Get log entries filtered by level.
+
+        Args:
+            level: The level to filter by ('DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
+
+        Returns:
+            List of log entry dictionaries matching the level.
+        """
+        with self._lock:
+            return [entry.to_dict() for entry in self._entries if entry.level == level]
+
+    def get_logs_since(self, timestamp_ms: int) -> List[Dict]:
+        """
+        Get log entries since a specific timestamp.
+
+        Args:
+            timestamp_ms: Unix timestamp in milliseconds
+
+        Returns:
+            List of log entry dictionaries with timestamp >= timestamp_ms.
+        """
+        with self._lock:
+            return [entry.to_dict() for entry in self._entries if entry.timestamp_ms >= timestamp_ms]
+
+    def clear(self) -> None:
+        """Clear all log entries."""
+        with self._lock:
+            self._entries.clear()
+
+    def count(self) -> int:
+        """Get the current number of log entries."""
+        with self._lock:
+            return len(self._entries)
+
+    def __len__(self) -> int:
+        """Get the current number of log entries."""
+        return self.count()