81 lines
2.9 KiB
Python
81 lines
2.9 KiB
Python
"""
|
|
Configuration management for Google Maps Reviews Scraper.
|
|
"""
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, Any
|
|
|
|
import yaml
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(message)s")
|
|
log = logging.getLogger("scraper")
|
|
|
|
# Default configuration path
|
|
DEFAULT_CONFIG_PATH = Path("config.yaml")
|
|
|
|
# Default configuration - will be overridden by config file
|
|
DEFAULT_CONFIG = {
|
|
"url": "https://maps.app.goo.gl/6tkNMDjcj3SS6LJe9",
|
|
"headless": True,
|
|
"sort_by": "relevance",
|
|
"stop_on_match": False,
|
|
"overwrite_existing": False,
|
|
"use_mongodb": True,
|
|
"mongodb": {
|
|
"uri": "mongodb://localhost:27017",
|
|
"database": "reviews",
|
|
"collection": "google_reviews"
|
|
},
|
|
"backup_to_json": True,
|
|
"json_path": "google_reviews.json",
|
|
"seen_ids_path": "google_reviews.ids",
|
|
"convert_dates": True,
|
|
"download_images": True,
|
|
"image_dir": "review_images",
|
|
"download_threads": 4,
|
|
"store_local_paths": True, # Option to control storing local image paths
|
|
"replace_urls": False, # Option to control URL replacement
|
|
"custom_url_base": "https://mycustomurl.com", # Base URL for replacement
|
|
"custom_url_profiles": "/profiles/", # Path for profile images
|
|
"custom_url_reviews": "/reviews/", # Path for review images
|
|
"preserve_original_urls": True, # Option to preserve original URLs
|
|
"custom_params": { # Custom parameters to add to each document
|
|
"company": "Thaitours", # Default example
|
|
"source": "Google Maps" # Default example
|
|
}
|
|
}
|
|
|
|
|
|
def load_config(config_path: Path = DEFAULT_CONFIG_PATH) -> Dict[str, Any]:
|
|
"""Load configuration from YAML file or use defaults"""
|
|
config = DEFAULT_CONFIG.copy()
|
|
|
|
if config_path.exists():
|
|
try:
|
|
with open(config_path, 'r') as f:
|
|
user_config = yaml.safe_load(f)
|
|
if user_config:
|
|
# Merge configs, with nested dictionary support
|
|
def deep_update(d, u):
|
|
for k, v in u.items():
|
|
if isinstance(v, dict) and k in d and isinstance(d[k], dict):
|
|
deep_update(d[k], v)
|
|
else:
|
|
d[k] = v
|
|
|
|
deep_update(config, user_config)
|
|
log.info(f"Loaded configuration from {config_path}")
|
|
except Exception as e:
|
|
log.error(f"Error loading config from {config_path}: {e}")
|
|
log.info("Using default configuration")
|
|
else:
|
|
log.info(f"Config file {config_path} not found, using default configuration")
|
|
# Create a default config file for future use
|
|
with open(config_path, 'w') as f:
|
|
yaml.dump(config, f, default_flow_style=False)
|
|
log.info(f"Created default configuration file at {config_path}")
|
|
|
|
return config
|