Files
whyrating-engine-legacy/start.py
George Khananaev 5bbaf455d8 Release Google Reviews Scraper Pro v1.0.0 (2025)
Initial release with multi-language support, MongoDB integration, image handling, URL replacement, and robust error handling. Includes detailed documentation, usage examples, and recommended usage guidelines. Built to effectively handle Google's 2025 interface changes.
2025-04-24 22:12:07 +07:00

74 lines
2.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
GoogleMaps review scraper with MongoDB integration
=================================================
Main entry point for the scraper.
"""
from modules.cli import parse_arguments
from modules.config import load_config
from modules.scraper import GoogleReviewsScraper
def main():
"""Main function to initialize and run the scraper"""
# Parse command line arguments
args = parse_arguments()
# Load configuration
config = load_config(args.config)
# Override config with command line arguments if provided
if args.headless:
config["headless"] = True
if args.sort_by is not None:
config["sort_by"] = args.sort_by
if args.stop_on_match:
config["stop_on_match"] = True
if args.url is not None:
config["url"] = args.url
if args.overwrite_existing:
config["overwrite_existing"] = True
if args.use_mongodb is not None:
config["use_mongodb"] = args.use_mongodb
# Handle arguments for date conversion and image downloading
if args.convert_dates is not None:
config["convert_dates"] = args.convert_dates
if args.download_images is not None:
config["download_images"] = args.download_images
if args.image_dir is not None:
config["image_dir"] = args.image_dir
if args.download_threads is not None:
config["download_threads"] = args.download_threads
# Handle arguments for local image paths and URL replacement
if args.store_local_paths is not None:
config["store_local_paths"] = args.store_local_paths
if args.replace_urls is not None:
config["replace_urls"] = args.replace_urls
if args.custom_url_base is not None:
config["custom_url_base"] = args.custom_url_base
if args.custom_url_profiles is not None:
config["custom_url_profiles"] = args.custom_url_profiles
if args.custom_url_reviews is not None:
config["custom_url_reviews"] = args.custom_url_reviews
if args.preserve_original_urls is not None:
config["preserve_original_urls"] = args.preserve_original_urls
# Handle custom parameters
if args.custom_params is not None:
if "custom_params" not in config:
config["custom_params"] = {}
# Update config with the provided custom parameters
config["custom_params"].update(args.custom_params)
# Initialize and run scraper
scraper = GoogleReviewsScraper(config)
scraper.scrape()
if __name__ == "__main__":
main()