diff --git a/modules/config.py b/modules/config.py new file mode 100644 index 0000000..f5cf7b8 --- /dev/null +++ b/modules/config.py @@ -0,0 +1,80 @@ +""" +Configuration management for Google Maps Reviews Scraper. +""" + +import logging +from pathlib import Path +from typing import Dict, Any + +import yaml + +# Configure logging +logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(message)s") +log = logging.getLogger("scraper") + +# Default configuration path +DEFAULT_CONFIG_PATH = Path("config.yaml") + +# Default configuration - will be overridden by config file +DEFAULT_CONFIG = { + "url": "https://maps.app.goo.gl/6tkNMDjcj3SS6LJe9", + "headless": True, + "sort_by": "relevance", + "stop_on_match": False, + "overwrite_existing": False, + "use_mongodb": True, + "mongodb": { + "uri": "mongodb://localhost:27017", + "database": "reviews", + "collection": "google_reviews" + }, + "backup_to_json": True, + "json_path": "google_reviews.json", + "seen_ids_path": "google_reviews.ids", + "convert_dates": True, + "download_images": True, + "image_dir": "review_images", + "download_threads": 4, + "store_local_paths": True, # Option to control storing local image paths + "replace_urls": False, # Option to control URL replacement + "custom_url_base": "https://mycustomurl.com", # Base URL for replacement + "custom_url_profiles": "/profiles/", # Path for profile images + "custom_url_reviews": "/reviews/", # Path for review images + "preserve_original_urls": True, # Option to preserve original URLs + "custom_params": { # Custom parameters to add to each document + "company": "Thaitours", # Default example + "source": "Google Maps" # Default example + } +} + + +def load_config(config_path: Path = DEFAULT_CONFIG_PATH) -> Dict[str, Any]: + """Load configuration from YAML file or use defaults""" + config = DEFAULT_CONFIG.copy() + + if config_path.exists(): + try: + with open(config_path, 'r') as f: + user_config = yaml.safe_load(f) + if user_config: + # Merge configs, with nested dictionary support + def deep_update(d, u): + for k, v in u.items(): + if isinstance(v, dict) and k in d and isinstance(d[k], dict): + deep_update(d[k], v) + else: + d[k] = v + + deep_update(config, user_config) + log.info(f"Loaded configuration from {config_path}") + except Exception as e: + log.error(f"Error loading config from {config_path}: {e}") + log.info("Using default configuration") + else: + log.info(f"Config file {config_path} not found, using default configuration") + # Create a default config file for future use + with open(config_path, 'w') as f: + yaml.dump(config, f, default_flow_style=False) + log.info(f"Created default configuration file at {config_path}") + + return config