Release Google Reviews Scraper Pro v1.0.0 (2025)

Initial release with multi-language support, MongoDB integration, image handling, URL replacement, and robust error handling. Includes detailed documentation, usage examples, and recommended usage guidelines. Built to effectively handle Google's 2025 interface changes.
This commit is contained in:
George Khananaev
2025-04-24 22:12:07 +07:00
commit 5bbaf455d8
14 changed files with 4032 additions and 0 deletions

73
start.py Normal file
View File

@@ -0,0 +1,73 @@
#!/usr/bin/env python3
"""
GoogleMaps review scraper with MongoDB integration
=================================================
Main entry point for the scraper.
"""
from modules.cli import parse_arguments
from modules.config import load_config
from modules.scraper import GoogleReviewsScraper
def main():
"""Main function to initialize and run the scraper"""
# Parse command line arguments
args = parse_arguments()
# Load configuration
config = load_config(args.config)
# Override config with command line arguments if provided
if args.headless:
config["headless"] = True
if args.sort_by is not None:
config["sort_by"] = args.sort_by
if args.stop_on_match:
config["stop_on_match"] = True
if args.url is not None:
config["url"] = args.url
if args.overwrite_existing:
config["overwrite_existing"] = True
if args.use_mongodb is not None:
config["use_mongodb"] = args.use_mongodb
# Handle arguments for date conversion and image downloading
if args.convert_dates is not None:
config["convert_dates"] = args.convert_dates
if args.download_images is not None:
config["download_images"] = args.download_images
if args.image_dir is not None:
config["image_dir"] = args.image_dir
if args.download_threads is not None:
config["download_threads"] = args.download_threads
# Handle arguments for local image paths and URL replacement
if args.store_local_paths is not None:
config["store_local_paths"] = args.store_local_paths
if args.replace_urls is not None:
config["replace_urls"] = args.replace_urls
if args.custom_url_base is not None:
config["custom_url_base"] = args.custom_url_base
if args.custom_url_profiles is not None:
config["custom_url_profiles"] = args.custom_url_profiles
if args.custom_url_reviews is not None:
config["custom_url_reviews"] = args.custom_url_reviews
if args.preserve_original_urls is not None:
config["preserve_original_urls"] = args.preserve_original_urls
# Handle custom parameters
if args.custom_params is not None:
if "custom_params" not in config:
config["custom_params"] = {}
# Update config with the provided custom parameters
config["custom_params"].update(args.custom_params)
# Initialize and run scraper
scraper = GoogleReviewsScraper(config)
scraper.scrape()
if __name__ == "__main__":
main()