""" Base Scraper Interface This module defines the abstract base class that all scrapers must implement. It ensures consistent interface across different scraper implementations. """ from abc import ABC, abstractmethod from typing import Any, Callable, Dict, List, Optional class BaseScraper(ABC): """ Abstract base class for all scrapers in the ReviewIQ system. All concrete scraper implementations must inherit from this class and implement the required abstract methods. """ @abstractmethod def scrape( self, driver: Any, url: str, max_reviews: int = 5000, timeout_no_new: int = 15, flush_callback: Optional[Callable[[List[Dict]], None]] = None, flush_batch_size: int = 500, progress_callback: Optional[Callable[[int, Optional[int]], None]] = None, validation_only: bool = False ) -> Dict[str, Any]: """ Scrape reviews from the given URL. Args: driver: WebDriver instance (e.g., Selenium WebDriver) url: The URL to scrape reviews from max_reviews: Maximum number of reviews to collect timeout_no_new: Seconds to wait with no new reviews before stopping flush_callback: Optional callback called with reviews batches for streaming flush_batch_size: Number of reviews before triggering flush_callback progress_callback: Optional callback(current_count, total_count) for progress validation_only: If True, return early after extracting metadata only Returns: Dictionary containing: - reviews: List of review dictionaries - total: Total number of reviews collected - error: Error message if any, None otherwise - Additional scraper-specific metadata """ pass @abstractmethod def validate_url(self, url: str) -> bool: """ Validate if the given URL is supported by this scraper. Args: url: The URL to validate Returns: True if the URL is valid for this scraper, False otherwise """ pass @abstractmethod def get_business_info(self, driver: Any, url: str) -> Dict[str, Any]: """ Extract business information from the URL without scraping reviews. Args: driver: WebDriver instance url: The URL to extract info from Returns: Dictionary containing business metadata (name, rating, address, etc.) """ pass @property @abstractmethod def name(self) -> str: """Return the human-readable name of this scraper.""" pass @property @abstractmethod def version(self) -> str: """Return the version string of this scraper.""" pass @property @abstractmethod def supported_domains(self) -> List[str]: """Return list of domains this scraper supports.""" pass