added config file.
This commit is contained in:
80
modules/config.py
Normal file
80
modules/config.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
Configuration management for Google Maps Reviews Scraper.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
|
||||
import yaml
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(message)s")
|
||||
log = logging.getLogger("scraper")
|
||||
|
||||
# Default configuration path
|
||||
DEFAULT_CONFIG_PATH = Path("config.yaml")
|
||||
|
||||
# Default configuration - will be overridden by config file
|
||||
DEFAULT_CONFIG = {
|
||||
"url": "https://maps.app.goo.gl/6tkNMDjcj3SS6LJe9",
|
||||
"headless": True,
|
||||
"sort_by": "relevance",
|
||||
"stop_on_match": False,
|
||||
"overwrite_existing": False,
|
||||
"use_mongodb": True,
|
||||
"mongodb": {
|
||||
"uri": "mongodb://localhost:27017",
|
||||
"database": "reviews",
|
||||
"collection": "google_reviews"
|
||||
},
|
||||
"backup_to_json": True,
|
||||
"json_path": "google_reviews.json",
|
||||
"seen_ids_path": "google_reviews.ids",
|
||||
"convert_dates": True,
|
||||
"download_images": True,
|
||||
"image_dir": "review_images",
|
||||
"download_threads": 4,
|
||||
"store_local_paths": True, # Option to control storing local image paths
|
||||
"replace_urls": False, # Option to control URL replacement
|
||||
"custom_url_base": "https://mycustomurl.com", # Base URL for replacement
|
||||
"custom_url_profiles": "/profiles/", # Path for profile images
|
||||
"custom_url_reviews": "/reviews/", # Path for review images
|
||||
"preserve_original_urls": True, # Option to preserve original URLs
|
||||
"custom_params": { # Custom parameters to add to each document
|
||||
"company": "Thaitours", # Default example
|
||||
"source": "Google Maps" # Default example
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def load_config(config_path: Path = DEFAULT_CONFIG_PATH) -> Dict[str, Any]:
|
||||
"""Load configuration from YAML file or use defaults"""
|
||||
config = DEFAULT_CONFIG.copy()
|
||||
|
||||
if config_path.exists():
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
user_config = yaml.safe_load(f)
|
||||
if user_config:
|
||||
# Merge configs, with nested dictionary support
|
||||
def deep_update(d, u):
|
||||
for k, v in u.items():
|
||||
if isinstance(v, dict) and k in d and isinstance(d[k], dict):
|
||||
deep_update(d[k], v)
|
||||
else:
|
||||
d[k] = v
|
||||
|
||||
deep_update(config, user_config)
|
||||
log.info(f"Loaded configuration from {config_path}")
|
||||
except Exception as e:
|
||||
log.error(f"Error loading config from {config_path}: {e}")
|
||||
log.info("Using default configuration")
|
||||
else:
|
||||
log.info(f"Config file {config_path} not found, using default configuration")
|
||||
# Create a default config file for future use
|
||||
with open(config_path, 'w') as f:
|
||||
yaml.dump(config, f, default_flow_style=False)
|
||||
log.info(f"Created default configuration file at {config_path}")
|
||||
|
||||
return config
|
||||
Reference in New Issue
Block a user