Release Google Reviews Scraper Pro v1.0.0 (2025)

Initial release with multi-language support, MongoDB integration, image handling, URL replacement, and robust error handling. Includes detailed documentation, usage examples, and recommended usage guidelines. Built to effectively handle Google's 2025 interface changes.
2025-04-24 22:12:07 +07:00
commit 5bbaf455d8
14 changed files with 4032 additions and 0 deletions
--- a/modules/init.py
+++ b/modules/init.py
--- a/modules/cli.py
+++ b/modules/cli.py
@@ -0,0 +1,76 @@
+"""
+Command line interface handling for Google Maps Reviews Scraper.
+"""
+
+import argparse
+import json
+from pathlib import Path
+
+from modules.config import DEFAULT_CONFIG_PATH
+
+
+def parse_arguments():
+    """Parse command line arguments"""
+    ap = argparse.ArgumentParser(description="Google‑Maps review scraper with MongoDB integration")
+    ap.add_argument("-q", "--headless", action="store_true",
+                    help="run Chrome in the background")
+    ap.add_argument("-s", "--sort", dest="sort_by",
+                    choices=("newest", "highest", "lowest", "relevance"),
+                    default=None, help="sorting order for reviews")
+    ap.add_argument("--stop-on-match", action="store_true",
+                    help="stop scrolling when first already‑seen id is met "
+                         "(useful with --sort newest)")
+    ap.add_argument("--url", type=str, default=None,
+                    help="custom Google Maps URL to scrape")
+    ap.add_argument("--overwrite", action="store_true", dest="overwrite_existing",
+                    help="overwrite existing reviews instead of appending")
+    ap.add_argument("--config", type=str, default=None,
+                    help="path to custom configuration file")
+    ap.add_argument("--use-mongodb", type=bool, default=None,
+                    help="whether to use MongoDB for storage")
+
+    # Arguments for date conversion and image downloading
+    ap.add_argument("--convert-dates", type=bool, default=None,
+                    help="convert string dates to MongoDB Date objects")
+    ap.add_argument("--download-images", type=bool, default=None,
+                    help="download images from reviews")
+    ap.add_argument("--image-dir", type=str, default=None,
+                    help="directory to store downloaded images")
+    ap.add_argument("--download-threads", type=int, default=None,
+                    help="number of threads for downloading images")
+
+    # Arguments for local image paths and URL replacement
+    ap.add_argument("--store-local-paths", type=bool, default=None,
+                    help="whether to store local image paths in documents")
+    ap.add_argument("--replace-urls", type=bool, default=None,
+                    help="whether to replace original URLs with custom ones")
+    ap.add_argument("--custom-url-base", type=str, default=None,
+                    help="base URL for replacement")
+    ap.add_argument("--custom-url-profiles", type=str, default=None,
+                    help="path for profile images")
+    ap.add_argument("--custom-url-reviews", type=str, default=None,
+                    help="path for review images")
+    ap.add_argument("--preserve-original-urls", type=bool, default=None,
+                    help="whether to preserve original URLs in original_* fields")
+
+    # Arguments for custom parameters
+    ap.add_argument("--custom-params", type=str, default=None,
+                    help="JSON string with custom parameters to add to each document (e.g. '{\"company\":\"Thaitours\"}')")
+
+    args = ap.parse_args()
+
+    # Handle config path
+    if args.config is not None:
+        args.config = Path(args.config)
+    else:
+        args.config = DEFAULT_CONFIG_PATH
+
+    # Process custom params if provided
+    if args.custom_params:
+        try:
+            args.custom_params = json.loads(args.custom_params)
+        except json.JSONDecodeError:
+            print(f"Warning: Could not parse custom params JSON: {args.custom_params}")
+            args.custom_params = None
+
+    return args
--- a/modules/data_storage.py
+++ b/modules/data_storage.py
@@ -0,0 +1,319 @@
+"""
+Data storage modules for Google Maps Reviews Scraper.
+"""
+
+import json
+import logging
+import ssl
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Any, Set
+
+import pymongo
+
+from modules.date_converter import parse_relative_date, DateConverter
+from modules.image_handler import ImageHandler
+from modules.models import RawReview
+from modules.utils import detect_lang, get_current_iso_date
+
+# Configure SSL for MongoDB connection
+ssl._create_default_https_context = ssl._create_unverified_context  # macOS SSL fix
+
+# Logger
+log = logging.getLogger("scraper")
+
+RAW_LANG = "en"
+
+
+class MongoDBStorage:
+    """MongoDB storage handler for Google Maps reviews"""
+
+    def __init__(self, config: Dict[str, Any]):
+        """Initialize MongoDB storage with configuration"""
+        mongodb_config = config.get("mongodb", {})
+        self.uri = mongodb_config.get("uri")
+        self.db_name = mongodb_config.get("database")
+        self.collection_name = mongodb_config.get("collection")
+        self.client = None
+        self.collection = None
+        self.connected = False
+        self.convert_dates = config.get("convert_dates", True)
+        self.download_images = config.get("download_images", False)
+        self.store_local_paths = config.get("store_local_paths", True)
+        self.replace_urls = config.get("replace_urls", False)
+        self.preserve_original_urls = config.get("preserve_original_urls", True)
+        self.custom_params = config.get("custom_params", {})
+        self.image_handler = ImageHandler(config) if self.download_images else None
+
+    def connect(self) -> bool:
+        """Connect to MongoDB"""
+        try:
+            # Use the correct TLS parameters for newer PyMongo versions
+            self.client = pymongo.MongoClient(
+                self.uri,
+                tlsAllowInvalidCertificates=True,  # Equivalent to ssl_cert_reqs=CERT_NONE
+                connectTimeoutMS=30000,
+                socketTimeoutMS=None,
+                connect=True,
+                maxPoolSize=50
+            )
+            # Test connection
+            self.client.admin.command('ping')
+            db = self.client[self.db_name]
+            self.collection = db[self.collection_name]
+            self.connected = True
+            log.info(f"Connected to MongoDB: {self.db_name}.{self.collection_name}")
+            return True
+        except Exception as e:
+            log.error(f"Failed to connect to MongoDB: {e}")
+            self.connected = False
+            return False
+
+    def close(self):
+        """Close MongoDB connection"""
+        if self.client:
+            self.client.close()
+            self.connected = False
+
+    def fetch_existing_reviews(self) -> Dict[str, Dict[str, Any]]:
+        """Fetch existing reviews from MongoDB"""
+        if not self.connected and not self.connect():
+            log.warning("Cannot fetch existing reviews - MongoDB connection failed")
+            return {}
+
+        try:
+            reviews = {}
+            for doc in self.collection.find({}, {"_id": 0}):
+                review_id = doc.get("review_id")
+                if review_id:
+                    reviews[review_id] = doc
+            log.info(f"Fetched {len(reviews)} existing reviews from MongoDB")
+            return reviews
+        except Exception as e:
+            log.error(f"Error fetching reviews from MongoDB: {e}")
+            return {}
+
+    def save_reviews(self, reviews: Dict[str, Dict[str, Any]]):
+        """Save reviews to MongoDB using bulk operations"""
+        if not reviews:
+            log.info("No reviews to save to MongoDB")
+            return
+
+        if not self.connected and not self.connect():
+            log.warning("Cannot save reviews - MongoDB connection failed")
+            return
+
+        try:
+            # Process reviews before saving
+            processed_reviews = reviews.copy()
+
+            # Convert string dates to datetime objects if enabled
+            if self.convert_dates:
+                processed_reviews = DateConverter.convert_dates_in_reviews(processed_reviews)
+
+            # Download and process images if enabled
+            if self.download_images and self.image_handler:
+                processed_reviews = self.image_handler.download_all_images(processed_reviews)
+
+                # If not storing local paths, remove them from the documents
+                if not self.store_local_paths:
+                    for review in processed_reviews.values():
+                        if "local_images" in review:
+                            del review["local_images"]
+                        if "local_profile_picture" in review:
+                            del review["local_profile_picture"]
+
+                # If not preserving original URLs, remove them from the documents
+                if self.replace_urls and not self.preserve_original_urls:
+                    for review in processed_reviews.values():
+                        if "original_image_urls" in review:
+                            del review["original_image_urls"]
+                        if "original_profile_picture" in review:
+                            del review["original_profile_picture"]
+
+            # Add custom parameters to each document
+            if self.custom_params:
+                log.info(f"Adding custom parameters to {len(processed_reviews)} documents")
+                for review in processed_reviews.values():
+                    for key, value in self.custom_params.items():
+                        review[key] = value
+
+            operations = []
+            for review in processed_reviews.values():
+                # Convert to proper MongoDB document
+                # Exclude _id for inserts, MongoDB will generate it
+                if "_id" in review:
+                    del review["_id"]
+
+                operations.append(
+                    pymongo.UpdateOne(
+                        {"review_id": review["review_id"]},
+                        {"$set": review},
+                        upsert=True
+                    )
+                )
+
+            if operations:
+                result = self.collection.bulk_write(operations)
+                log.info(f"MongoDB: Upserted {result.upserted_count}, modified {result.modified_count} reviews")
+        except Exception as e:
+            log.error(f"Error saving reviews to MongoDB: {e}")
+
+
+class JSONStorage:
+    """JSON file-based storage handler for Google Maps reviews"""
+
+    def __init__(self, config: Dict[str, Any]):
+        """Initialize JSON storage with configuration"""
+        self.json_path = Path(config.get("json_path", "google_reviews.json"))
+        self.seen_ids_path = Path(config.get("seen_ids_path", "google_reviews.ids"))
+        self.convert_dates = config.get("convert_dates", True)
+        self.download_images = config.get("download_images", False)
+        self.store_local_paths = config.get("store_local_paths", True)
+        self.replace_urls = config.get("replace_urls", False)
+        self.preserve_original_urls = config.get("preserve_original_urls", True)
+        self.custom_params = config.get("custom_params", {})
+        self.image_handler = ImageHandler(config) if self.download_images else None
+
+    def load_json_docs(self) -> Dict[str, Dict[str, Any]]:
+        """Load reviews from JSON file"""
+        if not self.json_path.exists():
+            return {}
+        try:
+            data = json.loads(self.json_path.read_text(encoding="utf-8"))
+            # Index by review_id for fast lookups
+            return {d.get("review_id", ""): d for d in data if d.get("review_id")}
+        except json.JSONDecodeError:
+            log.warning("⚠️ Error reading JSON file, starting with empty data")
+            return {}
+
+    def save_json_docs(self, docs: Dict[str, Dict[str, Any]]):
+        """Save reviews to JSON file"""
+        # Create a copy of the docs to avoid modifying the original
+        processed_docs = {review_id: review.copy() for review_id, review in docs.items()}
+
+        # Process reviews before saving
+        # Convert string dates to datetime objects if enabled
+        if self.convert_dates:
+            processed_docs = DateConverter.convert_dates_in_reviews(processed_docs)
+
+        # Download and process images if enabled
+        if self.download_images and self.image_handler:
+            processed_docs = self.image_handler.download_all_images(processed_docs)
+
+            # If not storing local paths, remove them from the documents
+            if not self.store_local_paths:
+                for review in processed_docs.values():
+                    if "local_images" in review:
+                        del review["local_images"]
+                    if "local_profile_picture" in review:
+                        del review["local_profile_picture"]
+
+            # If not preserving original URLs, remove them from the documents
+            if self.replace_urls and not self.preserve_original_urls:
+                for review in processed_docs.values():
+                    if "original_image_urls" in review:
+                        del review["original_image_urls"]
+                    if "original_profile_picture" in review:
+                        del review["original_profile_picture"]
+
+        # Add custom parameters to each document
+        if self.custom_params:
+            log.info(f"Adding custom parameters to {len(processed_docs)} documents")
+            for review in processed_docs.values():
+                for key, value in self.custom_params.items():
+                    review[key] = value
+
+        # Convert datetime objects back to strings for JSON serialization
+        for doc in processed_docs.values():
+            for key, value in doc.items():
+                if isinstance(value, datetime):
+                    doc[key] = value.isoformat()
+
+        # Write to JSON file
+        self.json_path.write_text(json.dumps(list(processed_docs.values()),
+                                             ensure_ascii=False, indent=2), encoding="utf-8")
+
+    def load_seen(self) -> Set[str]:
+        """Load set of already seen review IDs"""
+        return set(
+            self.seen_ids_path.read_text(encoding="utf-8").splitlines()) if self.seen_ids_path.exists() else set()
+
+    def save_seen(self, ids: Set[str]):
+        """Save set of already seen review IDs"""
+        self.seen_ids_path.write_text("\n".join(ids), encoding="utf-8")
+
+
+def merge_review(existing: Dict[str, Any] | None, raw: RawReview) -> Dict[str, Any]:
+    """
+    Merge a raw review with an existing review document.
+    Creates a new document if existing is None.
+    """
+    if not existing:
+        # Create a new review with the updated field names
+        existing = {
+            "review_id": raw.id,
+            "author": raw.author,
+            "rating": raw.rating,
+            "description": {},  # renamed from "texts"
+            "likes": raw.likes,
+            "user_images": list(raw.photos),  # renamed from "photo_urls"
+            "author_profile_url": raw.profile,  # renamed from "profile_link"
+            "profile_picture": raw.avatar,  # renamed from "avatar_url"
+            "owner_responses": {},
+            "created_date": get_current_iso_date(),
+            "review_date": parse_relative_date(raw.date, RAW_LANG),
+        }
+    else:
+        # Handle existing reviews with old field names - migrate them
+        if "texts" in existing and "description" not in existing:
+            existing["description"] = existing.pop("texts")
+
+        if "photo_urls" in existing and "user_images" not in existing:
+            existing["user_images"] = existing.pop("photo_urls")
+
+        if "profile_link" in existing and "author_profile_url" not in existing:
+            existing["author_profile_url"] = existing.pop("profile_link")
+
+        if "avatar_url" in existing and "profile_picture" not in existing:
+            existing["profile_picture"] = existing.pop("avatar_url")
+
+        # Add ISO dates if not present
+        if "created_date" not in existing:
+            existing["created_date"] = get_current_iso_date()
+
+        if "review_date" not in existing:
+            existing["review_date"] = parse_relative_date(raw.date, RAW_LANG)
+
+        # Remove the 'date' field if it exists
+        if "date" in existing:
+            del existing["date"]
+
+    if raw.text:
+        existing["description"][raw.lang] = raw.text
+
+    if not existing.get("rating"):
+        existing["rating"] = raw.rating
+
+    if raw.likes > existing.get("likes", 0):
+        existing["likes"] = raw.likes
+
+    # Update the images list
+    existing["user_images"] = list({*existing.get("user_images", []), *raw.photos})
+
+    # Update avatar/profile picture
+    if raw.avatar and (
+            not existing.get("profile_picture") or len(raw.avatar) > len(existing.get("profile_picture", ""))):
+        existing["profile_picture"] = raw.avatar
+
+    if raw.owner_text:
+        lang = detect_lang(raw.owner_text)
+        # Don't store the date string in owner_responses
+        existing.setdefault("owner_responses", {})[lang] = {
+            "text": raw.owner_text,
+        }
+
+    # Update last_modified timestamp
+    existing["last_modified_date"] = get_current_iso_date()
+
+    return existing
--- a/modules/date_converter.py
+++ b/modules/date_converter.py
@@ -0,0 +1,391 @@
+"""
+Date conversion utilities for Google Maps reviews.
+"""
+
+import logging
+import re
+from datetime import datetime, timedelta
+from typing import Dict, Any, Optional
+
+# Logger
+log = logging.getLogger("scraper")
+
+
+def relative_to_datetime(date_str: str, lang: str = "en") -> Optional[datetime]:
+    """
+    Convert a relative date string to a datetime object.
+
+    Args:
+        date_str: The relative date string (e.g., "2 years ago")
+        lang: Language code ("en" or "he")
+
+    Returns:
+        datetime object or None if conversion fails
+    """
+    if not date_str:
+        return None
+
+    try:
+        # Convert to ISO format first
+        iso_date = parse_relative_date(date_str, lang)
+
+        # If original string was returned, it wasn't in the expected format
+        if iso_date == date_str:
+            return None
+
+        # Parse the ISO format into datetime
+        return datetime.fromisoformat(iso_date)
+    except Exception as e:
+        log.debug(f"Failed to convert relative date '{date_str}': {e}")
+        return None
+
+
+class DateConverter:
+    """Handler for converting string dates to datetime objects in MongoDB"""
+
+    @staticmethod
+    def convert_dates_in_document(doc: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Convert string dates to datetime objects in a document.
+
+        Args:
+            doc: MongoDB document with string dates
+
+        Returns:
+            Document with string dates converted to datetime objects
+        """
+        # Remove the original date string field if it exists
+        if "date" in doc:
+            original_date = doc.pop("date")
+
+            # Try to use the original date to fix review_date if needed
+            if "review_date" not in doc or not doc["review_date"]:
+                lang = next(iter(doc.get("description", {}).keys()), "en")
+                date_obj = relative_to_datetime(original_date, lang)
+                if date_obj:
+                    doc["review_date"] = date_obj
+
+        # Fields that should be converted to dates
+        date_fields = ["created_date", "last_modified_date", "review_date"]
+
+        # Convert date fields to datetime
+        for field in date_fields:
+            if field in doc and isinstance(doc[field], str):
+                try:
+                    # Try to parse as ISO format first
+                    doc[field] = datetime.fromisoformat(doc[field].replace('Z', '+00:00'))
+                except (ValueError, TypeError):
+                    # If that fails, try parsing as relative date
+                    lang = next(iter(doc.get("description", {}).keys()), "en")
+                    date_obj = relative_to_datetime(doc[field], lang)
+                    if date_obj:
+                        doc[field] = date_obj
+
+        # Handle nested date fields in owner_responses
+        if "owner_responses" in doc and isinstance(doc["owner_responses"], dict):
+            for lang, response in doc["owner_responses"].items():
+                if isinstance(response, dict) and "date" in response:
+                    # Remove the date string field from owner responses
+                    del response["date"]
+
+        return doc
+
+    @staticmethod
+    def convert_dates_in_reviews(reviews: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
+        """
+        Convert string dates to datetime objects for all reviews.
+
+        Args:
+            reviews: Dictionary of review documents
+
+        Returns:
+            Reviews with dates converted to datetime objects
+        """
+        log.info("Converting string dates to datetime objects...")
+
+        for review_id, review in reviews.items():
+            reviews[review_id] = DateConverter.convert_dates_in_document(review)
+
+        return reviews
+
+
+def parse_relative_date(date_str: str, lang: str, now: Optional[datetime] = None) -> str:
+    """
+    Converts a relative review_date (in English or Hebrew) such as "a week ago" or "לפני 7 שנים"
+    into an ISO formatted datetime string (UTC).
+
+    For English, supported formats include:
+       - "a day ago", "an hour ago", "3 weeks ago", "4 months ago", "2 years ago", etc.
+    For Hebrew, supported formats include:
+       - "לפני יום", "לפני 2 ימים", "לפני שבוע", "לפני שבועיים", "לפני חודש",
+         "לפני חודשיים", "לפני 10 חודשים", "לפני שנה", "לפני 3 שנים", etc.
+
+    Parameters:
+      - date_str (str): the relative date string.
+      - lang (str): "en" for English or "he" for Hebrew.
+      - now (Optional[datetime]): reference datetime; if None, current local time is used.
+
+    Returns:
+      A string representing the calculated absolute datetime in ISO 8601 format.
+      If parsing fails in all supported languages, returns a random date within the last year.
+    """
+    import random
+
+    if now is None:
+        now = datetime.utcnow()  # use UTC for consistency
+
+    # Try with the provided language first
+    result = try_parse_date(date_str, lang, now)
+    if result != date_str:
+        return result
+
+    # If the provided language failed, try other supported languages
+    supported_langs = ["en", "he", "th"]
+    for alt_lang in supported_langs:
+        if alt_lang != lang.lower():
+            result = try_parse_date(date_str, alt_lang, now)
+            if result != date_str:
+                return result
+
+    # If all parsing attempts failed, generate a random date within the last year
+    # This creates a date between 1 day ago and 365 days ago
+    random_days_ago = random.randint(1, 365)
+    random_date = now - timedelta(days=random_days_ago)
+    return random_date.isoformat()
+
+
+def try_parse_date(date_str: str, lang: str, now: datetime) -> str:
+    """
+    Helper function that attempts to parse a date string in a specific language.
+
+    Returns the ISO formatted date if successful, or the original string if not.
+    """
+    delta = timedelta(0)
+    parsed = False
+
+    if lang.lower() == "en":
+        # Pattern: capture number or "a"/"an", then unit.
+        pattern = re.compile(r'(?P<num>a|an|\d+)\s+(?P<unit>day|week|month|year)s?\s+ago', re.IGNORECASE)
+        m = pattern.search(date_str)
+        if m:
+            num_str = m.group("num").lower()
+            num = 1 if num_str in ("a", "an") else int(num_str)
+            unit = m.group("unit").lower()
+            if unit == "day":
+                delta = timedelta(days=num)
+            elif unit == "week":
+                delta = timedelta(weeks=num)
+            elif unit == "month":
+                delta = timedelta(days=30 * num)  # approximate
+            elif unit == "year":
+                delta = timedelta(days=365 * num)  # approximate
+            parsed = True
+    elif lang.lower() == "he":
+        # Remove the "לפני" prefix if present
+        text = date_str.strip()
+        if text.startswith("לפני"):
+            text = text[len("לפני"):].strip()
+
+        # Handle special cases where the number and unit are combined:
+        special = {
+            "חודשיים": (2, "month"),
+            "שבועיים": (2, "week"),
+            "יומיים": (2, "day"),
+        }
+        if text in special:
+            num, unit = special[text]
+            if unit == "day":
+                delta = timedelta(days=num)
+            elif unit == "week":
+                delta = timedelta(weeks=num)
+            elif unit == "month":
+                delta = timedelta(days=30 * num)  # approximate
+            parsed = True
+        else:
+            # Match optional number (or assume 1) and then a unit.
+            pattern = re.compile(r'(?P<num>\d+|אחד|אחת)?\s*(?P<unit>שנה|שנים|חודש|חודשים|יום|ימים|שבוע|שבועות)',
+                                 re.IGNORECASE)
+            m = pattern.search(text)
+            if m:
+                num_str = m.group("num")
+                if not num_str:
+                    num = 1
+                else:
+                    try:
+                        num = int(num_str)
+                    except ValueError:
+                        num = 1
+                unit_he = m.group("unit")
+                # Map the Hebrew unit (both singular and plural) to English unit names
+                if unit_he in ("יום", "ימים"):
+                    unit = "day"
+                elif unit_he in ("שבוע", "שבועות"):
+                    unit = "week"
+                elif unit_he in ("חודש", "חודשים"):
+                    unit = "month"
+                elif unit_he in ("שנה", "שנים"):
+                    unit = "year"
+                else:
+                    unit = "day"  # fallback
+
+                if unit == "day":
+                    delta = timedelta(days=num)
+                elif unit == "week":
+                    delta = timedelta(weeks=num)
+                elif unit == "month":
+                    delta = timedelta(days=30 * num)  # approximate
+                elif unit == "year":
+                    delta = timedelta(days=365 * num)  # approximate
+                parsed = True
+    elif lang.lower() == "th":
+        # Thai language patterns (simplified)
+        # Check for Thai patterns like "3 วันที่แล้ว" (3 days ago)
+        thai_pattern = re.compile(r'(?P<num>\d+)?\s*(?P<unit>วัน|สัปดาห์|เดือน|ปี)ที่แล้ว', re.IGNORECASE)
+        m = thai_pattern.search(date_str)
+        if m:
+            num_str = m.group("num")
+            num = 1 if not num_str else int(num_str)
+            unit_th = m.group("unit")
+
+            # Map Thai units to English
+            if unit_th == "วัน":
+                unit = "day"
+            elif unit_th == "สัปดาห์":
+                unit = "week"
+            elif unit_th == "เดือน":
+                unit = "month"
+            elif unit_th == "ปี":
+                unit = "year"
+            else:
+                unit = "day"  # fallback
+
+            if unit == "day":
+                delta = timedelta(days=num)
+            elif unit == "week":
+                delta = timedelta(weeks=num)
+            elif unit == "month":
+                delta = timedelta(days=30 * num)  # approximate
+            elif unit == "year":
+                delta = timedelta(days=365 * num)  # approximate
+            parsed = True
+
+    # Return the calculated date if parsing was successful, otherwise return the original string
+    if parsed:
+        result = now - delta
+        return result.isoformat()
+    else:
+        return date_str
+
+
+# def parse_relative_date(date_str: str, lang: str, now: Optional[datetime] = None) -> str:
+#     """
+#     Converts a relative review_date (in English or Hebrew) such as "a week ago" or "לפני 7 שנים"
+#     into an ISO formatted datetime string (UTC).
+#
+#     For English, supported formats include:
+#        - "a day ago", "an hour ago", "3 weeks ago", "4 months ago", "2 years ago", etc.
+#     For Hebrew, supported formats include:
+#        - "לפני יום", "לפני 2 ימים", "לפני שבוע", "לפני שבועיים", "לפני חודש",
+#          "לפני חודשיים", "לפני 10 חודשים", "לפני שנה", "לפני 3 שנים", etc.
+#
+#     Parameters:
+#       - date_str (str): the relative date string.
+#       - lang (str): "en" for English or "he" for Hebrew.
+#       - now (Optional[datetime]): reference datetime; if None, current local time is used.
+#
+#     Returns:
+#       A string representing the calculated absolute datetime in ISO 8601 format,
+#       or the original date_str if parsing fails.
+#     """
+#     if now is None:
+#         now = datetime.utcnow()  # use UTC for consistency
+#
+#     delta = timedelta(0)
+#
+#     if lang.lower() == "en":
+#         # Pattern: capture number or "a"/"an", then unit.
+#         pattern = re.compile(r'(?P<num>a|an|\d+)\s+(?P<unit>day|week|month|year)s?\s+ago', re.IGNORECASE)
+#         m = pattern.search(date_str)
+#         if m:
+#             num_str = m.group("num").lower()
+#             num = 1 if num_str in ("a", "an") else int(num_str)
+#             unit = m.group("unit").lower()
+#             if unit == "day":
+#                 delta = timedelta(days=num)
+#             elif unit == "week":
+#                 delta = timedelta(weeks=num)
+#             elif unit == "month":
+#                 delta = timedelta(days=30 * num)  # approximate
+#             elif unit == "year":
+#                 delta = timedelta(days=365 * num)  # approximate
+#         else:
+#             return date_str  # return original if not matched
+#     elif lang.lower() == "he":
+#         # Remove the "לפני" prefix if present
+#         text = date_str.strip()
+#         if text.startswith("לפני"):
+#             text = text[len("לפני"):].strip()
+#
+#         # Handle special cases where the number and unit are combined:
+#         special = {
+#             "חודשיים": (2, "month"),
+#             "שבועיים": (2, "week"),
+#             "יומיים": (2, "day"),
+#         }
+#         if text in special:
+#             num, unit = special[text]
+#         else:
+#             # Match optional number (or assume 1) and then a unit.
+#             pattern = re.compile(r'(?P<num>\d+|אחד|אחת)?\s*(?P<unit>שנה|שנים|חודש|חודשים|יום|ימים|שבוע|שבועות)',
+#                                  re.IGNORECASE)
+#             m = pattern.search(text)
+#             if m:
+#                 num_str = m.group("num")
+#                 if not num_str:
+#                     num = 1
+#                 else:
+#                     try:
+#                         num = int(num_str)
+#                     except ValueError:
+#                         num = 1
+#                 unit_he = m.group("unit")
+#                 # Map the Hebrew unit (both singular and plural) to English unit names
+#                 if unit_he in ("יום", "ימים"):
+#                     unit = "day"
+#                 elif unit_he in ("שבוע", "שבועות"):
+#                     unit = "week"
+#                 elif unit_he in ("חודש", "חודשים"):
+#                     unit = "month"
+#                 elif unit_he in ("שנה", "שנים"):
+#                     unit = "year"
+#                 else:
+#                     unit = "day"  # fallback
+#             else:
+#                 return date_str  # if nothing matches, return original text
+#
+#         if unit == "day":
+#             delta = timedelta(days=num)
+#         elif unit == "week":
+#             delta = timedelta(weeks=num)
+#         elif unit == "month":
+#             delta = timedelta(days=30 * num)  # approximate
+#         elif unit == "year":
+#             delta = timedelta(days=365 * num)  # approximate
+#
+#     result = now - delta
+#     return result.isoformat()
+
+
+# --- Example usage ---
+if __name__ == "__main__":
+    # Fixed reference time for reproducibility:
+    fixed_now = datetime(2025, 2, 5, 12, 0, 0)
+    examples = [
+        ("a week ago", "he"),
+        ("4 weeks ago", "en"),
+        ("לפני 7 שנים", "he"),
+        ("לפני חודשיים", "he")
+    ]
+    for text, lang in examples:
+        iso_date = parse_relative_date(text, lang, now=fixed_now)
+        print(f"Original: {text} ({lang}) => ISO: {iso_date}")
--- a/modules/image_handler.py
+++ b/modules/image_handler.py
@@ -0,0 +1,283 @@
+"""
+Image downloading and handling for Google Maps Reviews Scraper.
+"""
+
+import logging
+import re
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+from typing import Dict, Any, Set, Tuple
+from urllib.parse import urlparse
+
+import requests
+
+# Logger
+log = logging.getLogger("scraper")
+
+
+class ImageHandler:
+    """Handler for downloading and managing review images"""
+
+    def __init__(self, config: Dict[str, Any]):
+        """Initialize image handler with configuration"""
+        self.image_dir = Path(config.get("image_dir", "review_images"))
+        self.max_workers = config.get("download_threads", 4)
+        self.store_local_paths = config.get("store_local_paths", True)
+
+        # URL replacement settings
+        self.replace_urls = config.get("replace_urls", False)
+        self.custom_url_base = config.get("custom_url_base", "https://mycustomurl.com")
+        self.custom_url_profiles = config.get("custom_url_profiles", "/profiles/")
+        self.custom_url_reviews = config.get("custom_url_reviews", "/reviews/")
+        self.preserve_original_urls = config.get("preserve_original_urls", True)
+
+        # Subdirectories for different image types
+        self.profile_dir = self.image_dir / "profiles"
+        self.review_dir = self.image_dir / "reviews"
+
+    def ensure_directories(self):
+        """Ensure all image directories exist"""
+        self.profile_dir.mkdir(parents=True, exist_ok=True)
+        self.review_dir.mkdir(parents=True, exist_ok=True)
+
+    def is_not_custom_url(self, url: str) -> bool:
+        """Check if the URL is not one of our custom URLs"""
+        if not url:
+            return False
+
+        # Check if the URL starts with our custom URL base - if so, skip it
+        if self.custom_url_base and url.startswith(self.custom_url_base):
+            return False
+
+        return True
+
+    def get_filename_from_url(self, url: str, is_profile: bool = False) -> str:
+        """Extract filename from URL and add .jpg extension"""
+        if not url:
+            return ""
+
+        # Skip our custom URLs
+        if not self.is_not_custom_url(url):
+            return ""
+
+        # For profile pictures
+        if is_profile:
+            # Extract unique identifier from profile URL
+            parts = url.split('/')
+            if len(parts) > 1:
+                filename = parts[-2] if parts[-1] in ('', 'w72-h72-p-rp-mo-ba4-br100') else parts[-1]
+                return f"{filename}.jpg"
+
+        # For review images
+        match = re.search(r'AIHoz[^=]+=', url)
+        if match:
+            # Use the ID as filename
+            return f"{match.group(0).rstrip('=')}w600-h450-p.jpg"
+
+        # Fallback to using the last part of the URL path
+        parsed = urlparse(url)
+        path = parsed.path
+        filename = path.split('/')[-1]
+
+        # Add .jpg extension if not present
+        if not filename.lower().endswith('.jpg'):
+            filename += ".jpg"
+
+        return filename
+
+    def get_custom_url(self, filename: str, is_profile: bool = False) -> str:
+        """Generate a custom URL for the image"""
+        if not self.replace_urls or not filename:
+            return ""
+
+        base_url = self.custom_url_base.rstrip('/')
+        path = self.custom_url_profiles if is_profile else self.custom_url_reviews
+        path = path.strip('/')
+
+        return f"{base_url}/{path}/{filename}"
+
+    def download_image(self, url_info: Tuple[str, bool]) -> Tuple[str, str, str]:
+        """
+        Download an image from URL and save to disk.
+
+        Args:
+            url_info: Tuple of (url, is_profile)
+
+        Returns:
+            Tuple of (url, local filename, custom url)
+        """
+        url, is_profile = url_info
+
+        # Skip our custom URLs
+        if not self.is_not_custom_url(url):
+            return url, "", ""
+
+        try:
+            filename = self.get_filename_from_url(url, is_profile)
+            if not filename:
+                return url, "", ""
+
+            # Choose directory based on image type
+            target_dir = self.profile_dir if is_profile else self.review_dir
+            filepath = target_dir / filename
+
+            # Skip if file already exists
+            if filepath.exists():
+                # Generate custom URL even if file exists
+                custom_url = self.get_custom_url(filename, is_profile)
+                return url, filename, custom_url
+
+            # Download the image
+            response = requests.get(url, stream=True, timeout=10)
+            response.raise_for_status()
+
+            with open(filepath, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+
+            # Generate custom URL
+            custom_url = self.get_custom_url(filename, is_profile)
+            return url, filename, custom_url
+
+        except Exception as e:
+            log.error(f"Error downloading image from {url}: {e}")
+            return url, "", ""
+
+    def download_all_images(self, reviews: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
+        """
+        Download all images (review images and profile pictures) for all reviews.
+
+        Args:
+            reviews: Dictionary of review documents
+
+        Returns:
+            Updated reviews with local image paths and custom URLs
+        """
+        self.ensure_directories()
+
+        # Collect all unique image URLs (both review images and profile pictures)
+        # Exclude custom URLs
+        review_urls: Set[str] = set()
+        profile_urls: Set[str] = set()
+
+        for review in reviews.values():
+            # Collect review images - exclude custom URLs
+            if "user_images" in review and isinstance(review["user_images"], list):
+                for url in review["user_images"]:
+                    if self.is_not_custom_url(url):
+                        review_urls.add(url)
+                # If we have original image URLs stored separately, add those too
+                if "original_image_urls" in review and isinstance(review["original_image_urls"], list):
+                    for orig_url in review["original_image_urls"]:
+                        if self.is_not_custom_url(orig_url):
+                            review_urls.add(orig_url)
+
+            # Collect profile pictures - exclude custom URLs
+            if "profile_picture" in review and review["profile_picture"]:
+                profile_url = review["profile_picture"]
+                if self.is_not_custom_url(profile_url):
+                    profile_urls.add(profile_url)
+                # If we have original profile URL stored separately, add that too
+                if "original_profile_picture" in review and review["original_profile_picture"]:
+                    orig_profile_url = review["original_profile_picture"]
+                    if self.is_not_custom_url(orig_profile_url):
+                        profile_urls.add(orig_profile_url)
+
+        # Prepare download tasks with URL type info
+        download_tasks = [(url, False) for url in review_urls] + [(url, True) for url in profile_urls]
+
+        if not download_tasks:
+            log.info("No images to download")
+            return reviews
+
+        log.info(
+            f"Downloading {len(download_tasks)} images ({len(profile_urls)} profiles, {len(review_urls)} review images)...")
+
+        # Create URL to filename and URL to custom URL mappings
+        url_to_filename = {}
+        url_to_custom_url = {}
+
+        # Download images in parallel
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            results = executor.map(self.download_image, download_tasks)
+            for url, filename, custom_url in results:
+                if filename:
+                    url_to_filename[url] = filename
+                if custom_url:
+                    url_to_custom_url[url] = custom_url
+
+        # Update review documents
+        for review_id, review in reviews.items():
+            # Find the original URLs to use for lookup - important for both user_images and profile_picture
+            user_images_original = []
+            profile_picture_original = ""
+
+            # For user_images, either use original URLs if we have them, or the current user_images
+            if "original_image_urls" in review and isinstance(review["original_image_urls"], list):
+                user_images_original = review["original_image_urls"]
+            elif "user_images" in review and isinstance(review["user_images"], list):
+                user_images_original = review["user_images"].copy()
+
+            # For profile_picture, either use original URL if we have it, or the current profile_picture
+            if "original_profile_picture" in review and review["original_profile_picture"]:
+                profile_picture_original = review["original_profile_picture"]
+            elif "profile_picture" in review:
+                profile_picture_original = review["profile_picture"]
+
+            # Process user_images
+            if "user_images" in review and isinstance(review["user_images"], list):
+                # Add local image paths if enabled
+                if self.store_local_paths:
+                    local_images = [url_to_filename.get(url, "") for url in user_images_original
+                                    if url and self.is_not_custom_url(url)]
+                    review["local_images"] = [img for img in local_images if img]
+
+                # Replace URLs if enabled
+                if self.replace_urls:
+                    # Store original URLs if needed and not already stored
+                    if self.preserve_original_urls and "original_image_urls" not in review:
+                        review["original_image_urls"] = review["user_images"].copy()
+
+                    # Create custom URLs for each image
+                    custom_images = []
+                    for url in user_images_original:
+                        if url in url_to_custom_url:
+                            custom_images.append(url_to_custom_url[url])
+                        elif not self.is_not_custom_url(url):  # Already a custom URL
+                            custom_images.append(url)
+
+                    # Replace with custom URLs if we have them
+                    if custom_images:
+                        review["user_images"] = custom_images
+
+            # Process profile_picture
+            if "profile_picture" in review and review["profile_picture"]:
+                # Add local profile picture path if enabled
+                if self.store_local_paths and profile_picture_original in url_to_filename:
+                    review["local_profile_picture"] = url_to_filename[profile_picture_original]
+
+                # Replace profile_picture URL if enabled
+                if self.replace_urls:
+                    # Store original URL if needed and not already stored
+                    if self.preserve_original_urls and "original_profile_picture" not in review:
+                        review["original_profile_picture"] = review["profile_picture"]
+
+                    # Replace with custom URL if we have one for this profile image
+                    if profile_picture_original in url_to_custom_url:
+                        review["profile_picture"] = url_to_custom_url[profile_picture_original]
+                    elif not self.is_not_custom_url(review["profile_picture"]):
+                        # If current URL is already a custom URL, keep it
+                        pass
+                    elif profile_picture_original:
+                        # If we don't have a custom URL but have a filename, generate one
+                        filename = url_to_filename.get(profile_picture_original, "")
+                        if filename:
+                            custom_url = self.get_custom_url(filename, True)
+                            if custom_url:
+                                review["profile_picture"] = custom_url
+
+        log.info(f"Downloaded {len(url_to_filename)} images")
+        if self.replace_urls:
+            log.info(f"Replaced URLs for {len(url_to_custom_url)} images")
+
+        return reviews
--- a/modules/models.py
+++ b/modules/models.py
@@ -0,0 +1,84 @@
+"""
+Data models for Google Maps Reviews Scraper.
+"""
+import re
+from dataclasses import dataclass, field
+
+from selenium.webdriver.remote.webelement import WebElement
+
+from modules.utils import (try_find, first_text, first_attr, safe_int, detect_lang, parse_date_to_iso)
+
+
+@dataclass
+class RawReview:
+    """
+    Data class representing a raw review extracted from Google Maps.
+    """
+    id: str = ""
+    author: str = ""
+    rating: float = 0.0
+    date: str = ""
+    lang: str = "und"
+    text: str = ""
+    likes: int = 0
+    photos: list[str] = field(default_factory=list)
+    profile: str = ""
+    avatar: str = ""  # URL to profile picture
+    owner_date: str = ""
+    owner_text: str = ""
+    review_date: str = ""  # ISO format date
+
+    # CSS Selectors for review elements
+    MORE_BTN = "button.kyuRq"
+    LIKE_BTN = 'button[jsaction*="toggleThumbsUp" i]'
+    PHOTO_BTN = "button.Tya61d"
+    OWNER_RESP = "div.CDe7pd"
+
+    @classmethod
+    def from_card(cls, card: WebElement) -> "RawReview":
+        """Factory method to create a RawReview from a WebElement"""
+        # expand "More" - non-blocking approach
+        for b in try_find(card, cls.MORE_BTN, all=True):
+            try:
+                b.click()
+            except Exception:
+                pass
+
+        rid = card.get_attribute("data-review-id") or ""
+        author = first_text(card, 'div[class*="d4r55"]')
+        profile = first_attr(card, 'button[data-review-id]', "data-href")
+        avatar = first_attr(card, 'button[data-review-id] img', "src")
+
+        label = first_attr(card, 'span[role="img"]', "aria-label")
+        num = re.search(r"[\d\.]+", label.replace(",", ".")) if label else None
+        rating = float(num.group()) if num else 0.0
+
+        date = first_text(card, 'span[class*="rsqaWe"]')
+        # Parse the date string to ISO format
+        review_date = parse_date_to_iso(date)
+
+        text = ""
+        for sel in ('span[jsname="bN97Pc"]',
+                    'span[jsname="fbQN7e"]',
+                    'div.MyEned span.wiI7pd'):
+            text = first_text(card, sel)
+            if text: break
+        lang = detect_lang(text)
+
+        likes = 0
+        if (btn := try_find(card, cls.LIKE_BTN)):
+            likes = safe_int(btn[0].text or btn[0].get_attribute("aria-label"))
+
+        photos: list[str] = []
+        for btn in try_find(card, cls.PHOTO_BTN, all=True):
+            if (m := re.search(r'url\("([^"]+)"', btn.get_attribute("style") or "")):
+                photos.append(m.group(1))
+
+        owner_date = owner_text = ""
+        if (box := try_find(card, cls.OWNER_RESP)):
+            box = box[0]
+            owner_date = first_text(box, "span.DZSIDd")
+            owner_text = first_text(box, "div.wiI7pd")
+
+        return cls(rid, author, rating, date, lang, text, likes,
+                   photos, profile, avatar, owner_date, owner_text, review_date)
--- a/modules/scraper.py
+++ b/modules/scraper.py
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -0,0 +1,307 @@
+"""
+Utility functions for Google Maps Reviews Scraper.
+"""
+import datetime
+import logging
+import re
+import time
+from datetime import timezone
+from functools import lru_cache
+from typing import List
+
+from selenium.common.exceptions import (NoSuchElementException,
+                                        StaleElementReferenceException,
+                                        TimeoutException)
+from selenium.webdriver import Chrome
+from selenium.webdriver.common.by import By
+from selenium.webdriver.remote.webelement import WebElement
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
+
+# Logger
+log = logging.getLogger("scraper")
+
+# Constants for language detection
+HEB_CHARS = re.compile(r"[\u0590-\u05FF]")
+THAI_CHARS = re.compile(r"[\u0E00-\u0E7F]")
+
+
+@lru_cache(maxsize=1024)
+def detect_lang(txt: str) -> str:
+    """Detect language based on character sets"""
+    if HEB_CHARS.search(txt):  return "he"
+    if THAI_CHARS.search(txt): return "th"
+    return "en"
+
+
+@lru_cache(maxsize=128)
+def safe_int(s: str | None) -> int:
+    """Safely convert string to integer, returning 0 if not possible"""
+    m = re.search(r"\d+", s or "")
+    return int(m.group()) if m else 0
+
+
+def try_find(el: WebElement, css: str, *, all=False) -> List[WebElement]:
+    """Safely find elements by CSS selector without raising exceptions"""
+    try:
+        if all:
+            return el.find_elements(By.CSS_SELECTOR, css)
+        obj = el.find_element(By.CSS_SELECTOR, css)
+        return [obj] if obj else []
+    except (NoSuchElementException, StaleElementReferenceException):
+        return []
+
+
+def first_text(el: WebElement, css: str) -> str:
+    """Get text from the first matching element that has non-empty text"""
+    for e in try_find(el, css, all=True):
+        try:
+            if (t := e.text.strip()):
+                return t
+        except StaleElementReferenceException:
+            continue
+    return ""
+
+
+def parse_date_to_iso(date_str: str) -> str:
+    """
+    Parse date strings like "2 weeks ago", "January 2023", etc. into ISO format.
+    Returns a best-effort ISO string, or empty string if parsing fails.
+    """
+    if not date_str:
+        return ""
+
+    try:
+        now = datetime.now(timezone.utc)
+
+        # Handle relative dates
+        if "ago" in date_str.lower():
+            # For simplicity, map to approximate dates
+            if "minute" in date_str.lower():
+                minutes = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                dt = now.replace(microsecond=0) - timezone.timedelta(minutes=minutes)
+            elif "hour" in date_str.lower():
+                hours = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                dt = now.replace(microsecond=0) - timezone.timedelta(hours=hours)
+            elif "day" in date_str.lower():
+                days = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                dt = now.replace(microsecond=0) - timezone.timedelta(days=days)
+            elif "week" in date_str.lower():
+                weeks = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                dt = now.replace(microsecond=0) - timezone.timedelta(weeks=weeks)
+            elif "month" in date_str.lower():
+                months = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                # Approximate months as 30 days
+                dt = now.replace(microsecond=0) - timezone.timedelta(days=30 * months)
+            elif "year" in date_str.lower():
+                years = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+                # Approximate years as 365 days
+                dt = now.replace(microsecond=0) - timezone.timedelta(days=365 * years)
+            else:
+                # Default to current time if can't parse
+                dt = now.replace(microsecond=0)
+        else:
+            # Handle absolute dates (month year format)
+            # This is a simplification - would need more robust parsing for production
+            dt = now.replace(microsecond=0)
+
+        return dt.isoformat()
+    except Exception:
+        # If parsing fails, return empty string
+        return ""
+
+
+def first_attr(el: WebElement, css: str, attr: str) -> str:
+    """Get attribute value from the first matching element that has a non-empty value"""
+    for e in try_find(el, css, all=True):
+        try:
+            if (v := (e.get_attribute(attr) or "").strip()):
+                return v
+        except StaleElementReferenceException:
+            continue
+    return ""
+
+
+def click_if(driver: Chrome, css: str, delay: float = .25, timeout: float = 5.0) -> bool:
+    """
+    Click element if it exists and is clickable, with timeout and better error handling.
+
+    Args:
+        driver: WebDriver instance
+        css: CSS selector for the element to click
+        delay: Time to wait after clicking (seconds)
+        timeout: Maximum time to wait for element (seconds)
+
+    Returns:
+        True if element was found and clicked, False otherwise
+    """
+    try:
+        # First check if elements exist at all
+        elements = driver.find_elements(By.CSS_SELECTOR, css)
+        if not elements:
+            return False
+
+        # Try clicking the first visible element
+        for element in elements:
+            try:
+                if element.is_displayed() and element.is_enabled():
+                    element.click()
+                    time.sleep(delay)
+                    return True
+            except Exception:
+                # Try next element if this one fails
+                continue
+
+        # If we couldn't click any of the direct elements, try with WebDriverWait
+        try:
+            WebDriverWait(driver, timeout).until(
+                EC.element_to_be_clickable((By.CSS_SELECTOR, css))
+            ).click()
+            time.sleep(delay)
+            return True
+        except TimeoutException:
+            return False
+
+    except Exception as e:
+        log.debug(f"Error in click_if: {str(e)}")
+        return False
+
+
+def get_current_iso_date() -> str:
+    """Return current UTC time in ISO format."""
+    from datetime import datetime, timezone
+    return datetime.now(timezone.utc).isoformat()
+
+# """
+# Utility functions for Google Maps Reviews Scraper.
+# """
+#
+# import re
+# import time
+# import logging
+# from datetime import datetime, timezone
+# from functools import lru_cache
+# from typing import List, Optional
+#
+# from selenium.common.exceptions import (NoSuchElementException,
+#                                        StaleElementReferenceException,
+#                                        TimeoutException)
+# from selenium.webdriver import Chrome
+# from selenium.webdriver.common.by import By
+# from selenium.webdriver.remote.webelement import WebElement
+# from selenium.webdriver.support import expected_conditions as EC
+# from selenium.webdriver.support.ui import WebDriverWait
+#
+# # Constants for language detection
+# HEB_CHARS = re.compile(r"[\u0590-\u05FF]")
+# THAI_CHARS = re.compile(r"[\u0E00-\u0E7F]")
+#
+# # Logger
+# log = logging.getLogger("scraper")
+#
+#
+# @lru_cache(maxsize=1024)
+# def detect_lang(txt: str) -> str:
+#     """Detect language based on character sets"""
+#     if HEB_CHARS.search(txt):  return "he"
+#     if THAI_CHARS.search(txt): return "th"
+#     return "en"
+#
+#
+# @lru_cache(maxsize=128)
+# def safe_int(s: str | None) -> int:
+#     """Safely convert string to integer, returning 0 if not possible"""
+#     m = re.search(r"\d+", s or "")
+#     return int(m.group()) if m else 0
+#
+#
+# def try_find(el: WebElement, css: str, *, all=False) -> List[WebElement]:
+#     """Safely find elements by CSS selector without raising exceptions"""
+#     try:
+#         if all:
+#             return el.find_elements(By.CSS_SELECTOR, css)
+#         obj = el.find_element(By.CSS_SELECTOR, css)
+#         return [obj] if obj else []
+#     except (NoSuchElementException, StaleElementReferenceException):
+#         return []
+#
+#
+# def first_text(el: WebElement, css: str) -> str:
+#     """Get text from the first matching element that has non-empty text"""
+#     for e in try_find(el, css, all=True):
+#         if (t := e.text.strip()):
+#             return t
+#     return ""
+#
+#
+# def first_attr(el: WebElement, css: str, attr: str) -> str:
+#     """Get attribute value from the first matching element that has a non-empty value"""
+#     for e in try_find(el, css, all=True):
+#         if (v := (e.get_attribute(attr) or "").strip()):
+#             return v
+#     return ""
+#
+#
+# def click_if(driver: Chrome, css: str, delay: float = .25, timeout: float = 5.0) -> bool:
+#     """Click element if it exists and is clickable, with timeout"""
+#     try:
+#         WebDriverWait(driver, timeout).until(
+#             EC.element_to_be_clickable((By.CSS_SELECTOR, css))
+#         ).click()
+#         time.sleep(delay)
+#         return True
+#     except TimeoutException:
+#         return False
+#
+#
+# def parse_date_to_iso(date_str: str) -> str:
+#     """
+#     Parse date strings like "2 weeks ago", "January 2023", etc. into ISO format.
+#     Returns a best-effort ISO string, or empty string if parsing fails.
+#     """
+#     if not date_str:
+#         return ""
+#
+#     try:
+#         now = datetime.now(timezone.utc)
+#
+#         # Handle relative dates
+#         if "ago" in date_str.lower():
+#             # For simplicity, map to approximate dates
+#             if "minute" in date_str.lower():
+#                 minutes = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(minutes=minutes)
+#             elif "hour" in date_str.lower():
+#                 hours = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(hours=hours)
+#             elif "day" in date_str.lower():
+#                 days = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(days=days)
+#             elif "week" in date_str.lower():
+#                 weeks = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(weeks=weeks)
+#             elif "month" in date_str.lower():
+#                 months = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 # Approximate months as 30 days
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(days=30 * months)
+#             elif "year" in date_str.lower():
+#                 years = int(re.search(r'\d+', date_str).group()) if re.search(r'\d+', date_str) else 1
+#                 # Approximate years as 365 days
+#                 dt = now.replace(microsecond=0) - timezone.timedelta(days=365 * years)
+#             else:
+#                 # Default to current time if can't parse
+#                 dt = now.replace(microsecond=0)
+#         else:
+#             # Handle absolute dates (month year format)
+#             # This is a simplification - would need more robust parsing for production
+#             dt = now.replace(microsecond=0)
+#
+#         return dt.isoformat()
+#     except Exception:
+#         # If parsing fails, return empty string
+#         return ""
+#
+#
+# def get_current_iso_date() -> str:
+#     """Return current UTC time in ISO format."""
+#     return datetime.now(timezone.utc).isoformat()