Add browser fingerprint support and analytics metadata display

- Transfer user's browser fingerprint (user-agent, viewport, timezone,
  language, geolocation) to Chrome for more authentic scraping
- Display review topics from Google Maps in analytics dashboard
- Show business category badge in analytics header
- Fix date_text null handling in analytics (handle undefined/timestamp fields)
- Add review_topics and business_category to JobStatus interface

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-24 10:36:06 +00:00
parent 1bd30c0789
commit a540ab97b1
9 changed files with 1214 additions and 231 deletions

View File

@@ -27,6 +27,8 @@ interface SelectedJob {
jobId: string;
newCount?: number;
previousJobId?: string;
businessCategory?: string;
reviewTopics?: { topic: string; count: number }[];
}
type ViewType = 'newScrape' | 'jobs' | 'reports';
@@ -106,6 +108,8 @@ export default function Home() {
jobId: job.job_id,
newCount: data.new_count,
previousJobId: previousJob?.job_id,
businessCategory: job.business_category || undefined,
reviewTopics: job.review_topics || undefined,
});
setActiveView('reports');
}
@@ -155,7 +159,7 @@ export default function Home() {
Back to Reports
</button>
</div>
<ReviewAnalytics reviews={selectedJob.reviews} businessName={selectedJob.businessName} businessUrl={selectedJob.businessUrl} newCount={selectedJob.newCount} />
<ReviewAnalytics reviews={selectedJob.reviews} businessName={selectedJob.businessName} businessUrl={selectedJob.businessUrl} newCount={selectedJob.newCount} businessCategory={selectedJob.businessCategory} reviewTopics={selectedJob.reviewTopics} />
</div>
) : (
<div className="h-full overflow-y-auto p-6">

View File

@@ -22,14 +22,21 @@ interface ReviewWithNew extends Review {
photo_urls?: string[] | null;
}
interface ReviewTopic {
topic: string;
count: number;
}
interface ReviewAnalyticsProps {
reviews: ReviewWithNew[];
businessName?: string;
businessUrl?: string;
newCount?: number;
businessCategory?: string;
reviewTopics?: ReviewTopic[];
}
export default function ReviewAnalytics({ reviews, businessName, businessUrl, newCount }: ReviewAnalyticsProps) {
export default function ReviewAnalytics({ reviews, businessName, businessUrl, newCount, businessCategory, reviewTopics }: ReviewAnalyticsProps) {
const [sorting, setSorting] = useState<SortingState>([{ id: 'date', desc: true }]); // Default: newest first
const [columnFilters, setColumnFiltersState] = useState<ColumnFiltersState>([]);
const [globalFilter, setGlobalFilter] = useState('');
@@ -476,9 +483,16 @@ export default function ReviewAnalytics({ reviews, businessName, businessUrl, ne
{/* Header */}
<div className="flex items-center justify-between">
<div>
<h2 className="text-3xl font-bold text-gray-900">
{businessName || 'Review Analytics'}
</h2>
<div className="flex items-center gap-3">
<h2 className="text-3xl font-bold text-gray-900">
{businessName || 'Review Analytics'}
</h2>
{businessCategory && (
<span className="px-3 py-1 bg-purple-100 text-purple-800 text-sm font-medium rounded-full border border-purple-300">
{businessCategory}
</span>
)}
</div>
{businessUrl && (
<a
href={businessUrl}
@@ -821,6 +835,33 @@ export default function ReviewAnalytics({ reviews, businessName, businessUrl, ne
</div>
</div>
{/* Review Topics - from Google Maps */}
{reviewTopics && reviewTopics.length > 0 && (
<div className="bg-white border-2 border-gray-300 rounded-xl p-5 shadow-md">
<div className="flex items-center gap-2 mb-4">
<MessageSquare className="w-6 h-6 text-indigo-600" />
<h3 className="text-lg font-bold text-gray-900">What People Talk About</h3>
<span className="text-sm text-gray-500">({reviewTopics.length} topics from Google)</span>
</div>
<div className="flex flex-wrap gap-2">
{reviewTopics.slice(0, 15).map((topic, idx) => (
<div
key={idx}
className="px-3 py-1.5 bg-gradient-to-r from-indigo-50 to-purple-50 border border-indigo-200 rounded-full flex items-center gap-2"
>
<span className="text-sm font-medium text-indigo-800">{topic.topic}</span>
<span className="text-xs bg-indigo-200 text-indigo-900 px-1.5 py-0.5 rounded-full font-bold">
{topic.count}
</span>
</div>
))}
</div>
{reviewTopics.length > 15 && (
<p className="text-sm text-gray-500 mt-3">+{reviewTopics.length - 15} more topics</p>
)}
</div>
)}
{/* Rating & Volume Timeline */}
{timelineData.length > 0 && (
<div className={`bg-white rounded-xl p-6 shadow-md transition-all ${

View File

@@ -15,7 +15,7 @@ interface Review {
export interface JobStatus {
job_id: string;
status: 'pending' | 'running' | 'completed' | 'failed';
status: 'pending' | 'running' | 'completed' | 'failed' | 'partial';
url: string;
created_at: string;
started_at: string | null;
@@ -28,8 +28,11 @@ export interface JobStatus {
// Business metadata for tracking and comparison
business_name: string | null;
business_address: string | null;
business_category: string | null;
rating_snapshot: number | null;
total_reviews_snapshot: number | null;
// Review topics extracted from Google Maps
review_topics: { topic: string; count: number }[] | null;
}
interface ScraperTestProps {
@@ -56,7 +59,64 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
const [businessRating, setBusinessRating] = useState<number | null>(null);
const [businessImage, setBusinessImage] = useState<string | null>(null);
const [businessCategory, setBusinessCategory] = useState<string | null>(null);
const [userFingerprint, setUserFingerprint] = useState<{
geolocation?: {lat: number, lng: number},
userAgent?: string,
viewport?: {width: number, height: number},
timezone?: string,
language?: string,
platform?: string
}>({});
const debounceRef = useRef<NodeJS.Timeout | null>(null);
// Collect browser fingerprint on mount (no permissions needed)
useEffect(() => {
const collectFingerprint = async () => {
const fingerprint: typeof userFingerprint = {};
// User agent
fingerprint.userAgent = navigator.userAgent;
// Screen/viewport size
fingerprint.viewport = {
width: window.screen.width,
height: window.screen.height
};
// Timezone
fingerprint.timezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
// Language
fingerprint.language = navigator.language;
// Platform
fingerprint.platform = navigator.platform;
// Get approximate location from IP (no permission needed)
try {
const response = await fetch('https://ipapi.co/json/', {
signal: AbortSignal.timeout(3000)
});
if (response.ok) {
const data = await response.json();
if (data.latitude && data.longitude) {
fingerprint.geolocation = {
lat: data.latitude,
lng: data.longitude
};
console.log('IP location:', data.city, data.country_name);
}
}
} catch (error) {
console.log('IP geolocation not available');
}
setUserFingerprint(fingerprint);
console.log('Browser fingerprint:', fingerprint);
};
collectFingerprint();
}, []);
const pollingIntervals = useRef<Map<string, NodeJS.Timeout>>(new Map());
const abortControllerRef = useRef<AbortController | null>(null);
@@ -121,18 +181,23 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
setBusinessCategory(null);
setError('');
// Create new abort controller with 30 second timeout
// Create new abort controller with 60 second timeout (validation can be slow)
const controller = new AbortController();
abortControllerRef.current = controller;
const timeoutId = setTimeout(() => controller.abort(), 30000);
const timeoutId = setTimeout(() => controller.abort(), 60000);
try {
const url = `https://www.google.com/maps/search/?api=1&query=${encodeURIComponent(query)}`;
// Force English with hl=en parameter
const url = `https://www.google.com/maps/search/?api=1&query=${encodeURIComponent(query)}&hl=en`;
const response = await fetch('/api/check-reviews', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url }),
body: JSON.stringify({
url,
geolocation: userFingerprint.geolocation,
browser_fingerprint: userFingerprint // Pass full fingerprint
}),
signal: controller.signal,
});
@@ -157,21 +222,30 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
} catch (err) {
clearTimeout(timeoutId);
// Ignore AbortError (happens when user starts a new validation)
// Check if this is a timeout abort vs user-initiated abort
if (err instanceof Error && err.name === 'AbortError') {
console.log('Validation cancelled (new validation started)');
return;
// Check if it was a timeout (controller still matches) or user started new search
if (abortControllerRef.current === controller) {
// Timeout - show error
console.error('Validation timed out');
setError('Validation timed out. Please try again.');
setHasReviews(false);
setAvailableReviewCount(0);
} else {
// User started a new search - just return silently
console.log('Validation cancelled (new validation started)');
return;
}
} else {
console.error('Error getting business info:', err);
// Error occurred
setHasReviews(false);
setAvailableReviewCount(0);
}
console.error('Error getting business info:', err);
// Error occurred
setHasReviews(false);
setAvailableReviewCount(0);
} finally {
// Only clear loading state if this controller wasn't aborted
if (!controller.signal.aborted) {
setIsCheckingReviews(false);
}
clearTimeout(timeoutId);
// Always clear loading state (even on timeout)
setIsCheckingReviews(false);
}
};
@@ -192,8 +266,8 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
return newMap;
});
// Stop polling if job is done
if (data.status === 'completed' || data.status === 'failed') {
// Stop polling if job is done (completed, failed, or partial)
if (data.status === 'completed' || data.status === 'failed' || data.status === 'partial') {
const interval = pollingIntervals.current.get(jobId);
if (interval) {
clearInterval(interval);
@@ -244,8 +318,8 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
setIsSubmitting(true);
setShowConfirmModal(false);
// Use the search query to create a Google Maps search URL
const url = `https://www.google.com/maps/search/?api=1&query=${encodeURIComponent(searchedQuery)}`;
// Use the search query to create a Google Maps search URL (force English)
const url = `https://www.google.com/maps/search/?api=1&query=${encodeURIComponent(searchedQuery)}&hl=en`;
try {
const response = await fetch('/api/scrape', {
@@ -257,6 +331,8 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
business_address: businessAddress,
rating_snapshot: businessRating,
total_reviews_snapshot: availableReviewCount,
geolocation: userFingerprint.geolocation,
browser_fingerprint: userFingerprint, // Pass full fingerprint
}),
});
@@ -283,8 +359,10 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
error_message: null,
business_name: businessName,
business_address: businessAddress,
business_category: businessCategory,
rating_snapshot: businessRating,
total_reviews_snapshot: availableReviewCount,
review_topics: null, // Will be populated when job completes
});
return newMap;
});
@@ -305,6 +383,7 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
case 'completed': return 'text-green-700';
case 'running': return 'text-blue-700';
case 'failed': return 'text-red-700';
case 'partial': return 'text-orange-700';
default: return 'text-gray-800';
}
};
@@ -325,6 +404,12 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
<path fillRule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zM8.707 7.293a1 1 0 00-1.414 1.414L8.586 10l-1.293 1.293a1 1 0 101.414 1.414L10 11.414l1.293 1.293a1 1 0 001.414-1.414L11.414 10l1.293-1.293a1 1 0 00-1.414-1.414L10 8.586 8.707 7.293z" clipRule="evenodd" />
</svg>
);
case 'partial':
return (
<svg className="w-5 h-5 text-orange-500" fill="currentColor" viewBox="0 0 20 20">
<path fillRule="evenodd" d="M8.257 3.099c.765-1.36 2.722-1.36 3.486 0l5.58 9.92c.75 1.334-.213 2.98-1.742 2.98H4.42c-1.53 0-2.493-1.646-1.743-2.98l5.58-9.92zM11 13a1 1 0 11-2 0 1 1 0 012 0zm-1-8a1 1 0 00-1 1v3a1 1 0 002 0V6a1 1 0 00-1-1z" clipRule="evenodd" />
</svg>
);
default:
return (
<svg className="w-5 h-5 text-gray-400" fill="currentColor" viewBox="0 0 20 20">
@@ -776,8 +861,8 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
)}
</div>
{/* Action Buttons - Show when completed and has reviews */}
{job.status === 'completed' && job.reviews_count && job.reviews_count > 0 && (
{/* Action Buttons - Show when completed, partial, or running with reviews */}
{(job.status === 'completed' || job.status === 'partial' || (job.status === 'running' && job.reviews_count && job.reviews_count > 0)) && job.reviews_count && job.reviews_count > 0 && (
<div className="flex gap-3">
<button
onClick={async () => {
@@ -818,7 +903,13 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
}
}}
disabled={isLoadingReviews}
className="flex-1 py-4 bg-gradient-to-r from-blue-600 to-indigo-700 text-white rounded-xl font-bold hover:from-blue-700 hover:to-indigo-800 transition-all flex items-center justify-center gap-2 shadow-lg disabled:opacity-50 disabled:cursor-not-allowed text-lg border-2 border-blue-500"
className={`flex-1 py-4 text-white rounded-xl font-bold transition-all flex items-center justify-center gap-2 shadow-lg disabled:opacity-50 disabled:cursor-not-allowed text-lg border-2 ${
job.status === 'partial'
? 'bg-gradient-to-r from-orange-500 to-amber-600 hover:from-orange-600 hover:to-amber-700 border-orange-400'
: job.status === 'running'
? 'bg-gradient-to-r from-blue-500 to-cyan-600 hover:from-blue-600 hover:to-cyan-700 border-blue-400'
: 'bg-gradient-to-r from-blue-600 to-indigo-700 hover:from-blue-700 hover:to-indigo-800 border-blue-500'
}`}
>
{isLoadingReviews ? (
<>
@@ -830,7 +921,7 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
</svg>
📊 Open Analytics Dashboard
📊 {job.status === 'running' ? 'Preview Analytics' : job.status === 'partial' ? 'View Partial Data' : 'Open Analytics Dashboard'}
</>
)}
</button>
@@ -845,7 +936,7 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `reviews-${job.job_id}.json`;
a.download = `reviews-${job.job_id}${job.status === 'partial' ? '-partial' : ''}.json`;
a.click();
}
} catch (err) {
@@ -862,6 +953,24 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
</div>
)}
{/* Partial Job Warning */}
{job.status === 'partial' && (
<div className="mt-4 p-4 bg-orange-100 border-2 border-orange-300 rounded-lg">
<div className="flex items-start gap-2">
<svg className="w-5 h-5 text-orange-700 flex-shrink-0 mt-0.5" fill="currentColor" viewBox="0 0 20 20">
<path fillRule="evenodd" d="M8.257 3.099c.765-1.36 2.722-1.36 3.486 0l5.58 9.92c.75 1.334-.213 2.98-1.742 2.98H4.42c-1.53 0-2.493-1.646-1.743-2.98l5.58-9.92zM11 13a1 1 0 11-2 0 1 1 0 012 0zm-1-8a1 1 0 00-1 1v3a1 1 0 002 0V6a1 1 0 00-1-1z" clipRule="evenodd" />
</svg>
<div>
<p className="font-bold text-orange-900">Partial Results</p>
<p className="text-sm text-orange-800 mt-1">
This job was interrupted but {job.reviews_count} reviews were saved.
{job.error_message && <span className="block mt-1 text-orange-700">Reason: {job.error_message}</span>}
</p>
</div>
</div>
</div>
)}
{/* Error Message */}
{job.status === 'failed' && job.error_message && (
<div className="mt-4 p-4 bg-red-100 border-2 border-red-300 rounded-lg">

View File

@@ -66,7 +66,9 @@ export function calculateReviewStats(reviews: Review[]): ReviewStats {
// Populate minDate/maxDate/centerDate on reviews for display
reviews.forEach(r => {
if (!r.minDate || !r.maxDate || !r.centerDate) {
const range = parseDateTextToRange(r.date_text);
// Handle both date_text and timestamp field names
const dateText = r.date_text || (r as any).timestamp || '';
const range = parseDateTextToRange(dateText);
r.minDate = range.minDate;
r.maxDate = range.maxDate;
// Calculate centerDate as midpoint
@@ -96,8 +98,8 @@ export function calculateReviewStats(reviews: Review[]): ReviewStats {
// Recent reviews (last 30 days - simplified check)
const recentReviews = reviews.filter(r => {
const text = r.date_text.toLowerCase();
return text.includes('day') || text.includes('week') || text.includes('hour');
const text = (r.date_text || (r as any).timestamp || '').toLowerCase();
return text.includes('day') || text.includes('week') || text.includes('hour') || text.includes('minute') || text.includes('second');
}).length;
// Rating distribution
@@ -278,6 +280,14 @@ function extractNumber(text: string): number {
*/
export function parseDateTextToRange(dateText: string): { minDate: Date; maxDate: Date } {
const now = new Date();
// Handle undefined/null dateText
if (!dateText) {
// Return a default range (assume recent - within last month)
const daysAgo = (days: number) => new Date(now.getTime() - days * 24 * 60 * 60 * 1000);
return { minDate: daysAgo(30), maxDate: now };
}
const text = dateText.toLowerCase();
// Remove "Edited " prefix if present
@@ -396,7 +406,8 @@ export function filterReviewsByDateRange(reviews: Review[], range: DateRange): R
// Filter range: [filterStart, filterEnd]
// Overlap occurs when: minDate <= filterEnd AND maxDate >= filterStart
return reviews.filter(r => {
const { minDate, maxDate } = parseDateTextToRange(r.date_text);
const dateText = r.date_text || (r as any).timestamp || '';
const { minDate, maxDate } = parseDateTextToRange(dateText);
return minDate <= filterEnd && maxDate >= filterStart;
});
}
@@ -405,7 +416,8 @@ export function filterReviewsByCustomDateRange(reviews: Review[], fromDate: Date
if (!fromDate && !toDate) return reviews;
return reviews.filter(r => {
const reviewDate = parseDateText(r.date_text);
const dateText = r.date_text || (r as any).timestamp || '';
const reviewDate = parseDateText(dateText);
// If only fromDate is set, filter reviews >= fromDate
if (fromDate && !toDate) {
@@ -429,7 +441,7 @@ export function filterReviewsByCustomDateRange(reviews: Review[], fromDate: Date
export function calculateTimelineData(reviews: Review[]): TimelineDataPoint[] {
// Sort reviews by date (newest first)
const sortedReviews = [...reviews]
.map(r => ({ ...r, parsedDate: parseDateText(r.date_text) }))
.map(r => ({ ...r, parsedDate: parseDateText(r.date_text || (r as any).timestamp || '') }))
.sort((a, b) => b.parsedDate.getTime() - a.parsedDate.getTime());
// Group by month