'use client'; import { useState, useEffect, useRef, useCallback } from 'react'; import ReviewAnalytics from './ReviewAnalytics'; const API_BASE = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000'; interface ScraperType { job_type: string; version: string; variant: string; label: string; } interface Review { author: string; rating: number; text: string | null; date_text: string; avatar_url: string | null; profile_url: string | null; review_id: string; } export interface JobStatus { job_id: string; status: 'pending' | 'running' | 'completed' | 'failed' | 'partial'; url: string; created_at: string; started_at: string | null; completed_at: string | null; updated_at: string | null; // Last update time for progress tracking reviews_count: number | null; total_reviews: number | null; scrape_time: number | null; error_message: string | null; // Business metadata for tracking and comparison business_name: string | null; business_address: string | null; business_category: string | null; rating_snapshot: number | null; total_reviews_snapshot: number | null; // Review topics extracted from Google Maps review_topics: { topic: string; count: number }[] | null; } interface ScraperTestProps { onJobsChange?: (jobs: JobStatus[]) => void; onSelectReviews?: (reviews: Review[], businessName: string, jobId: string) => void; } export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTestProps = {}) { const [searchQuery, setSearchQuery] = useState(''); const [searchedQuery, setSearchedQuery] = useState(''); const [jobs, setJobs] = useState>(new Map()); const [activeJobId, setActiveJobId] = useState(null); const [reviews, setReviews] = useState([]); const [error, setError] = useState(''); const [isSubmitting, setIsSubmitting] = useState(false); const [showAnalytics, setShowAnalytics] = useState(false); const [isLoadingReviews, setIsLoadingReviews] = useState(false); const [showConfirmModal, setShowConfirmModal] = useState(false); const [isCheckingReviews, setIsCheckingReviews] = useState(false); const [hasReviews, setHasReviews] = useState(null); const [availableReviewCount, setAvailableReviewCount] = useState(null); const [businessName, setBusinessName] = useState(null); const [businessAddress, setBusinessAddress] = useState(null); const [businessRating, setBusinessRating] = useState(null); const [businessImage, setBusinessImage] = useState(null); const [businessCategory, setBusinessCategory] = useState(null); // Scraper type selection const [availableScrapers, setAvailableScrapers] = useState([]); const [selectedScraper, setSelectedScraper] = useState(null); const [scrapersLoading, setScrapersLoading] = useState(true); const [userFingerprint, setUserFingerprint] = useState<{ geolocation?: {lat: number, lng: number}, userAgent?: string, viewport?: {width: number, height: number}, timezone?: string, language?: string, platform?: string }>({}); const debounceRef = useRef(null); // Collect browser fingerprint on mount (no permissions needed) useEffect(() => { const collectFingerprint = async () => { const fingerprint: typeof userFingerprint = {}; // User agent fingerprint.userAgent = navigator.userAgent; // Screen/viewport size fingerprint.viewport = { width: window.screen.width, height: window.screen.height }; // Timezone fingerprint.timezone = Intl.DateTimeFormat().resolvedOptions().timeZone; // Language fingerprint.language = navigator.language; // Platform fingerprint.platform = navigator.platform; // Get approximate location from IP (no permission needed) try { const response = await fetch('https://ipapi.co/json/', { signal: AbortSignal.timeout(3000) }); if (response.ok) { const data = await response.json(); if (data.latitude && data.longitude) { fingerprint.geolocation = { lat: data.latitude, lng: data.longitude }; console.log('IP location:', data.city, data.country_name); } } } catch (error) { console.log('IP geolocation not available'); } setUserFingerprint(fingerprint); console.log('Browser fingerprint:', fingerprint); }; collectFingerprint(); }, []); // Fetch available scraper types on mount const fetchScrapers = useCallback(async () => { try { const response = await fetch(`${API_BASE}/api/admin/scrapers`); if (response.ok) { const data = await response.json(); // Transform to ScraperType format and filter to active scrapers const scrapers: ScraperType[] = data .filter((s: { deprecated_at: string | null; traffic_pct: number }) => !s.deprecated_at && s.traffic_pct > 0) .map((s: { job_type: string; version: string; variant: string }) => ({ job_type: s.job_type, version: s.version, variant: s.variant, // Format job_type nicely: google_reviews or google-reviews -> "Google Reviews" label: `${s.job_type.split(/[-_]/).map((w: string) => w.charAt(0).toUpperCase() + w.slice(1)).join(' ')} v${s.version}${s.variant !== 'stable' ? ` (${s.variant})` : ''}`, })); setAvailableScrapers(scrapers); // Auto-select first scraper (usually google-reviews stable) if (scrapers.length > 0 && !selectedScraper) { setSelectedScraper(scrapers[0]); } } } catch (err) { console.error('Failed to fetch scrapers:', err); // Fallback to default google-reviews const defaultScraper: ScraperType = { job_type: 'google-reviews', version: '1.0.0', variant: 'stable', label: 'Google Reviews v1.0.0', }; setAvailableScrapers([defaultScraper]); setSelectedScraper(defaultScraper); } finally { setScrapersLoading(false); } }, [selectedScraper]); useEffect(() => { fetchScrapers(); }, [fetchScrapers]); const pollingIntervals = useRef>(new Map()); const abortControllerRef = useRef(null); // Debounce: update map preview as user types (500ms after stopping) useEffect(() => { if (searchQuery.trim().length >= 2) { if (debounceRef.current) { clearTimeout(debounceRef.current); } debounceRef.current = setTimeout(() => { setSearchedQuery(searchQuery.trim()); }, 500); return () => { if (debounceRef.current) { clearTimeout(debounceRef.current); } }; } }, [searchQuery]); // Clear validation results when user starts typing a new search useEffect(() => { // If searchQuery is different from searchedQuery, clear results if (searchQuery.trim() !== searchedQuery && searchedQuery) { // Abort any pending validation request if (abortControllerRef.current) { abortControllerRef.current.abort(); } setHasReviews(null); setAvailableReviewCount(null); setBusinessName(null); setBusinessAddress(null); setBusinessRating(null); setBusinessImage(null); setBusinessCategory(null); } }, [searchQuery, searchedQuery]); // Notify parent when jobs change useEffect(() => { if (onJobsChange) { onJobsChange(Array.from(jobs.values())); } }, [jobs, onJobsChange]); // Check for reviews function (called manually when user clicks Validate) const checkReviews = async (query: string) => { // Abort any previous validation request if (abortControllerRef.current) { abortControllerRef.current.abort(); } setIsCheckingReviews(true); setHasReviews(null); setAvailableReviewCount(null); setBusinessName(null); setBusinessAddress(null); setBusinessRating(null); setBusinessImage(null); setBusinessCategory(null); setError(''); // Create new abort controller with 60 second timeout (validation can be slow) const controller = new AbortController(); abortControllerRef.current = controller; const timeoutId = setTimeout(() => controller.abort(), 60000); try { // Force English with hl=en parameter const url = `https://www.google.com/maps/search/?api=1&query=${encodeURIComponent(query)}&hl=en`; const response = await fetch('/api/check-reviews', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ url, geolocation: userFingerprint.geolocation, browser_fingerprint: userFingerprint // Pass full fingerprint }), signal: controller.signal, }); clearTimeout(timeoutId); const data = await response.json(); if (response.ok && data.success) { setHasReviews(data.has_reviews); setAvailableReviewCount(data.total_reviews || 0); setBusinessName(data.name); setBusinessAddress(data.address); setBusinessRating(data.rating); setBusinessImage(data.image_url); setBusinessCategory(data.category); } else { console.error('Failed to get business info:', data.error); // Business not found setHasReviews(false); setAvailableReviewCount(0); } } catch (err) { clearTimeout(timeoutId); // Check if this is a timeout abort vs user-initiated abort if (err instanceof Error && err.name === 'AbortError') { // Check if it was a timeout (controller still matches) or user started new search if (abortControllerRef.current === controller) { // Timeout - show error console.error('Validation timed out'); setError('Validation timed out. Please try again.'); setHasReviews(false); setAvailableReviewCount(0); } else { // User started a new search - just return silently console.log('Validation cancelled (new validation started)'); return; } } else { console.error('Error getting business info:', err); // Error occurred setHasReviews(false); setAvailableReviewCount(0); } } finally { clearTimeout(timeoutId); // Always clear loading state (even on timeout) setIsCheckingReviews(false); } }; // Poll job status for all active jobs const startPolling = (jobId: string) => { // Don't start if already polling this job if (pollingIntervals.current.has(jobId)) return; const pollInterval = setInterval(async () => { try { const response = await fetch(`/api/jobs/${jobId}`); const data = await response.json(); // Update job in map setJobs(prev => { const newMap = new Map(prev); newMap.set(jobId, data); return newMap; }); // Stop polling if job is done (completed, failed, or partial) if (data.status === 'completed' || data.status === 'failed' || data.status === 'partial') { const interval = pollingIntervals.current.get(jobId); if (interval) { clearInterval(interval); pollingIntervals.current.delete(jobId); } } } catch (err) { console.error('Poll error for job', jobId, err); } }, 2000); // Poll every 2 seconds pollingIntervals.current.set(jobId, pollInterval); }; // Cleanup polling intervals and abort controllers on unmount useEffect(() => { return () => { pollingIntervals.current.forEach(interval => clearInterval(interval)); pollingIntervals.current.clear(); if (abortControllerRef.current) { abortControllerRef.current.abort(); } }; }, []); const handleSearch = () => { if (searchQuery.trim().length < 2) return; const query = searchQuery.trim(); // Clear any pending debounce if (debounceRef.current) { clearTimeout(debounceRef.current); } // Immediately update map preview and trigger validation setSearchedQuery(query); checkReviews(query); }; const handlePreviewBusiness = (e: React.FormEvent) => { e.preventDefault(); setShowConfirmModal(true); }; const handleConfirmScrape = async () => { setError(''); setIsSubmitting(true); setShowConfirmModal(false); // Use the search query to create a Google Maps search URL (force English) const url = `https://www.google.com/maps/search/?api=1&query=${encodeURIComponent(searchedQuery)}&hl=en`; try { // Use selected scraper or default to google-reviews const jobType = selectedScraper?.job_type || 'google-reviews'; const scraperVersion = selectedScraper?.version; const scraperVariant = selectedScraper?.variant; const response = await fetch('/api/scrape', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ url, business_name: businessName, business_address: businessAddress, rating_snapshot: businessRating, total_reviews_snapshot: availableReviewCount, geolocation: userFingerprint.geolocation, browser_fingerprint: userFingerprint, // Pass full fingerprint // Include scraper selection job_type: jobType, scraper_version: scraperVersion, scraper_variant: scraperVariant, }), }); const data = await response.json(); if (!response.ok) { throw new Error(data.error || 'Failed to start scraping'); } // Add job to Map with initial status setJobs(prev => { const newMap = new Map(prev); newMap.set(data.job_id, { job_id: data.job_id, status: 'pending', url: url, created_at: new Date().toISOString(), started_at: null, completed_at: null, updated_at: new Date().toISOString(), reviews_count: null, total_reviews: null, scrape_time: null, error_message: null, business_name: businessName, business_address: businessAddress, business_category: businessCategory, rating_snapshot: businessRating, total_reviews_snapshot: availableReviewCount, review_topics: null, // Will be populated when job completes }); return newMap; }); // Set as active job and start polling setActiveJobId(data.job_id); startPolling(data.job_id); } catch (err) { setError(err instanceof Error ? err.message : 'Failed to submit job'); } finally { setIsSubmitting(false); } }; const getStatusColor = (status: string) => { switch (status) { case 'completed': return 'text-green-700'; case 'running': return 'text-blue-700'; case 'failed': return 'text-red-700'; case 'partial': return 'text-orange-700'; default: return 'text-gray-800'; } }; const getStatusIcon = (status: string) => { switch (status) { case 'completed': return ( ); case 'running': return
; case 'failed': return ( ); case 'partial': return ( ); default: return ( ); } }; // Google Maps link for opening in new tab const googleMapsUrl = searchedQuery ? `https://www.google.com/maps/search/?api=1&query=${encodeURIComponent(searchedQuery)}` : ''; const searchInputRef = useRef(null); // Test URLs at different scales const testUrls = [ { name: 'πŸͺ Small (~79)', query: 'R. Fleitas Peluqueros Gran Canaria' }, { name: 'πŸš— Medium (~589)', query: 'ClickRent Gran Canaria' }, { name: 'πŸ₯ Large (~2000+)', query: 'Hospital Universitario Doctor NegrΓ­n Las Palmas' }, { name: 'πŸ›’ Alcampo', query: 'Alcampo Hipermarket Las Palmas' }, ]; return (
{/* Scraper Type Selection */}

Select the type of data to scrape

{scrapersLoading ? (
Loading...
) : ( )}
{/* Show selected scraper info */} {selectedScraper && (
{selectedScraper.job_type.split(/[-_]/).map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ')} v{selectedScraper.version} {selectedScraper.variant !== 'stable' && ( {selectedScraper.variant} )}
)}
{/* Test URL Quick Select */}
Quick Test:
{testUrls.map((test, idx) => ( ))}
{/* Search Interface */} <>
setSearchQuery(e.target.value)} onKeyDown={(e) => { if (e.key === 'Enter' && searchQuery.trim().length >= 2 && !isCheckingReviews) { e.preventDefault(); handleSearch(); } }} placeholder="Business name and location (e.g., Soho Club Vilnius)..." className="w-full pl-12 pr-4 py-3 text-gray-900 bg-white border-2 border-gray-200 rounded-xl focus:border-blue-500 focus:ring-4 focus:ring-blue-100 outline-none transition-all" />
{/* Map Preview Area */}
{searchedQuery ? (
{/* Map Pin Icon with animation */}
{/* Pulse animation */}
{/* Search Query Display */}

πŸ“ {searchedQuery}

Click Validate to check this business on Google Maps

{/* Open in Google Maps button */} Open in Google Maps
) : (

Search for a business to see location preview

)}
{/* Business Card - Validation Results */} {searchedQuery && hasReviews !== null && (
{hasReviews ? ( // Success - Show Business Card
{/* Business Card Layout */}
{/* Business Image */} {businessImage && (
{businessName { // Hide image on error (e.target as HTMLImageElement).style.display = 'none'; }} />
)} {/* Business Info */}
{/* Category Badge + Verified */}
Verified {businessCategory && ( {businessCategory} )}
{/* Business Name */}

{businessName}

{/* Rating + Reviews Row */}
{businessRating && (
{businessRating.toFixed(1)}
{[...Array(5)].map((_, i) => ( ))}
)} {availableReviewCount !== null && availableReviewCount > 0 && ( ({availableReviewCount.toLocaleString()} reviews) )}
{/* Address */} {businessAddress && (
{businessAddress}
)}
{/* Start Scraping Button */}
) : ( // No Reviews - Show Warning

No reviews available

{businessName && (

Business: {businessName}

)}

This business has no reviews to scrape. Try a different search.

)}
)} {/* Error */} {error && (

Error

{error}

)} {/* Jobs List */} {jobs.size > 0 && (

Scraping Jobs

{jobs.size} {jobs.size === 1 ? 'Job' : 'Jobs'}
{Array.from(jobs.values()) .sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime()) .map(job => (
{getStatusIcon(job.status)}

Status: {job.status.toUpperCase()}

{job.job_id}

{job.url}

{/* Progress Bar for Running Jobs */} {job.status === 'running' && job.total_reviews !== null && job.reviews_count !== null && (
Extracting Reviews {job.reviews_count} / {job.total_reviews}
{job.reviews_count > 0 && ( {Math.round((job.reviews_count / job.total_reviews) * 100)}% )}
)}
{job.reviews_count !== null && (
{job.reviews_count}
Reviews
)} {job.scrape_time !== null && (
{job.scrape_time.toFixed(1)}s
Time
)} {job.scrape_time && job.reviews_count && (
{(job.reviews_count / job.scrape_time).toFixed(1)}
Reviews/sec
)} {job.started_at && (
{new Date(job.started_at).toLocaleTimeString()}
Started
)} {job.status === 'running' && job.updated_at && (
{new Date(job.updated_at).toLocaleTimeString()}
Last Update
)}
{/* Action Buttons - Show when completed, partial, or running with reviews */} {(job.status === 'completed' || job.status === 'partial' || (job.status === 'running' && job.reviews_count && job.reviews_count > 0)) && job.reviews_count && job.reviews_count > 0 && (
)} {/* Partial Job Warning */} {job.status === 'partial' && (

Partial Results

This job was interrupted but {job.reviews_count} reviews were saved. {job.error_message && Reason: {job.error_message}}

)} {/* Error Message */} {job.status === 'failed' && job.error_message && (

Error

{job.error_message}

)}
))}
)} {/* Analytics Dashboard or Simple Review List */} {reviews.length > 0 && ( <> {showAnalytics ? (
) : (

Reviews ({reviews.length})

{reviews.map((review, index) => (
{review.avatar_url && ( {review.author} )}
{review.author}
{[...Array(5)].map((_, i) => ( ))}

{review.date_text}

{review.text && (

{review.text}

)}
))}
)} )} {/* Confirmation Modal */} {showConfirmModal && (
setShowConfirmModal(false)} >
e.stopPropagation()} > {/* Header */}

Start Scraping?

{/* Content */}

This will start scraping reviews for:

{businessName}

{businessAddress && (

{businessAddress}

)}

The scraping job will run in the background. You can monitor progress below.

{/* Actions */}
)}
); }