Performance improvements: - Validation speed: 59.71s → 10.96s (5.5x improvement) - Removed 50+ console.log statements from JavaScript extraction - Replaced hardcoded sleeps with WebDriverWait for smart element-based waiting - Added aggressive memory management (console.clear, GC, image unloading every 20 scrolls) Scraping improvements: - Increased idle detection from 6 to 12 consecutive idle scrolls for completeness - Added real-time progress updates every 5 scrolls with percentage calculation - Added crash recovery to extract partial reviews if Chrome crashes - Removed artificial 200-review limit to scrape ALL reviews Timestamp tracking: - Added updated_at field separate from started_at for progress tracking - Frontend now shows both "Started" (fixed) and "Last Update" (dynamic) Robustness improvements: - Added 5 fallback CSS selectors to handle different Google Maps page structures - Now tries: div.jftiEf.fontBodyMedium, div.jftiEf, div[data-review-id], etc. - Automatic selector detection logs which selector works for debugging Test results: - Successfully scraped 550 reviews in 150.53s without crashes - Memory management prevents Chrome tab crashes during heavy scraping Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
399 lines
13 KiB
TypeScript
399 lines
13 KiB
TypeScript
// Analytics utility functions
|
|
|
|
export interface Review {
|
|
author: string;
|
|
rating: number;
|
|
text: string | null;
|
|
date_text: string;
|
|
avatar_url: string | null;
|
|
profile_url: string | null;
|
|
review_id: string;
|
|
// Derived fields (computed on load)
|
|
parsedDate?: Date;
|
|
dateCategory?: 'recent' | 'month' | 'year' | 'older'; // Time range category
|
|
minDate?: Date; // Earliest possible date (accounting for Google's uncertainty)
|
|
maxDate?: Date; // Latest possible date (accounting for Google's uncertainty)
|
|
centerDate?: Date; // Midpoint of the range (used for calculations and sorting)
|
|
}
|
|
|
|
export type DateRange = 'week' | 'month' | 'year' | 'all';
|
|
|
|
export interface TimelineDataPoint {
|
|
date: string;
|
|
rating: number;
|
|
rollingAvg: number;
|
|
}
|
|
|
|
export interface ReviewStats {
|
|
totalReviews: number;
|
|
averageRating: number;
|
|
sentimentScore: number;
|
|
photoCount: number;
|
|
avgReviewLength: number;
|
|
recentReviews: number;
|
|
ratingDistribution: { rating: number; count: number; percentage: number }[];
|
|
topKeywords: { word: string; count: number }[];
|
|
sentimentBreakdown: { positive: number; neutral: number; negative: number };
|
|
negativeReviews: number;
|
|
responseRate: number;
|
|
averageResponseTime: string;
|
|
}
|
|
|
|
export function calculateReviewStats(reviews: Review[]): ReviewStats {
|
|
// Populate minDate/maxDate/centerDate on reviews for display
|
|
reviews.forEach(r => {
|
|
if (!r.minDate || !r.maxDate || !r.centerDate) {
|
|
const range = parseDateTextToRange(r.date_text);
|
|
r.minDate = range.minDate;
|
|
r.maxDate = range.maxDate;
|
|
// Calculate centerDate as midpoint
|
|
const midpointTime = (range.minDate.getTime() + range.maxDate.getTime()) / 2;
|
|
r.centerDate = new Date(midpointTime);
|
|
}
|
|
});
|
|
|
|
const totalReviews = reviews.length;
|
|
|
|
// Average rating
|
|
const averageRating = reviews.reduce((sum, r) => sum + r.rating, 0) / totalReviews;
|
|
|
|
// Sentiment score (% of 4-5 star reviews)
|
|
const positiveReviews = reviews.filter(r => r.rating >= 4).length;
|
|
const sentimentScore = (positiveReviews / totalReviews) * 100;
|
|
|
|
// Photo count (reviews with avatars as proxy)
|
|
const photoCount = reviews.filter(r => r.avatar_url).length;
|
|
|
|
// Average review length
|
|
const avgReviewLength = Math.round(
|
|
reviews.reduce((sum, r) => sum + (r.text?.split(' ').length || 0), 0) / totalReviews
|
|
);
|
|
|
|
// Recent reviews (last 30 days - simplified check)
|
|
const recentReviews = reviews.filter(r => {
|
|
const text = r.date_text.toLowerCase();
|
|
return text.includes('day') || text.includes('week') || text.includes('hour');
|
|
}).length;
|
|
|
|
// Rating distribution
|
|
const ratingCounts: Record<number, number> = { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0 };
|
|
reviews.forEach(r => {
|
|
ratingCounts[r.rating] = (ratingCounts[r.rating] || 0) + 1;
|
|
});
|
|
|
|
const ratingDistribution = [5, 4, 3, 2, 1].map(rating => ({
|
|
rating,
|
|
count: ratingCounts[rating] || 0,
|
|
percentage: ((ratingCounts[rating] || 0) / totalReviews) * 100,
|
|
}));
|
|
|
|
// Extract keywords from review text
|
|
const allWords = reviews
|
|
.filter(r => r.text)
|
|
.flatMap(r =>
|
|
r.text!
|
|
.toLowerCase()
|
|
.replace(/[^\w\s]/g, '')
|
|
.split(/\s+/)
|
|
.filter(w => w.length > 3)
|
|
);
|
|
|
|
const stopWords = new Set(['this', 'that', 'with', 'from', 'have', 'been', 'were', 'very', 'great', 'good', 'best', 'nice', 'here', 'there', 'they', 'their', 'about', 'would', 'could', 'should', 'place', 'really']);
|
|
|
|
const wordCounts: Record<string, number> = {};
|
|
allWords.forEach(word => {
|
|
if (!stopWords.has(word)) {
|
|
wordCounts[word] = (wordCounts[word] || 0) + 1;
|
|
}
|
|
});
|
|
|
|
const topKeywords = Object.entries(wordCounts)
|
|
.sort(([, a], [, b]) => b - a)
|
|
.slice(0, 10)
|
|
.map(([word, count]) => ({ word, count }));
|
|
|
|
// Sentiment breakdown
|
|
const sentimentBreakdown = {
|
|
positive: reviews.filter(r => r.rating >= 4).length,
|
|
neutral: reviews.filter(r => r.rating === 3).length,
|
|
negative: reviews.filter(r => r.rating <= 2).length,
|
|
};
|
|
|
|
// Negative reviews count
|
|
const negativeReviews = reviews.filter(r => r.rating <= 2).length;
|
|
|
|
// Response rate (placeholder - would need owner_response field)
|
|
const responseRate = 0; // TODO: Calculate when owner responses are available
|
|
|
|
// Average response time (placeholder)
|
|
const averageResponseTime = 'N/A'; // TODO: Calculate when response data is available
|
|
|
|
return {
|
|
totalReviews,
|
|
averageRating,
|
|
sentimentScore,
|
|
photoCount,
|
|
avgReviewLength,
|
|
recentReviews,
|
|
ratingDistribution,
|
|
topKeywords,
|
|
sentimentBreakdown,
|
|
negativeReviews,
|
|
responseRate,
|
|
averageResponseTime,
|
|
};
|
|
}
|
|
|
|
export function getSentimentLabel(rating: number): 'positive' | 'neutral' | 'negative' {
|
|
if (rating >= 4) return 'positive';
|
|
if (rating === 3) return 'neutral';
|
|
return 'negative';
|
|
}
|
|
|
|
// Helper function to get date range boundaries for preset buttons
|
|
export function getDateRangeBoundaries(range: DateRange): { from: Date | null; to: Date | null } {
|
|
if (range === 'all') return { from: null, to: null };
|
|
|
|
const now = new Date();
|
|
const to = new Date(now); // Today as end date
|
|
const from = new Date();
|
|
|
|
switch (range) {
|
|
case 'week':
|
|
from.setDate(now.getDate() - 7);
|
|
break;
|
|
case 'month':
|
|
from.setMonth(now.getMonth() - 1);
|
|
break;
|
|
case 'year':
|
|
from.setFullYear(now.getFullYear() - 1);
|
|
break;
|
|
}
|
|
|
|
// Set to start of day for from, end of day for to
|
|
from.setHours(0, 0, 0, 0);
|
|
to.setHours(23, 59, 59, 999);
|
|
|
|
return { from, to };
|
|
}
|
|
|
|
export function getSentimentColor(sentiment: 'positive' | 'neutral' | 'negative'): string {
|
|
switch (sentiment) {
|
|
case 'positive': return 'text-green-700 bg-green-50 border-green-300';
|
|
case 'neutral': return 'text-yellow-700 bg-yellow-50 border-yellow-300';
|
|
case 'negative': return 'text-red-700 bg-red-50 border-red-300';
|
|
}
|
|
}
|
|
|
|
function extractNumber(text: string): number {
|
|
// Extract first number from text (e.g., "2 weeks ago" -> 2, "Hace 2 semanas" -> 2)
|
|
const match = text.match(/\d+/);
|
|
if (match) {
|
|
return parseInt(match[0]);
|
|
}
|
|
// Handle singular: "a month ago", "un mes", "una semana"
|
|
if (text.match(/^a\s+\w+\s+ago/) || text.includes('un ') || text.includes('una ')) {
|
|
return 1;
|
|
}
|
|
return 1; // Default to 1 if no number found
|
|
}
|
|
|
|
/**
|
|
* Parse date_text into time range boundaries (min/max dates)
|
|
*
|
|
* This accounts for Google's inherent uncertainty in relative dates.
|
|
* Based on reverse-engineered patterns from 244 reviews.
|
|
*
|
|
* Examples:
|
|
* - "a month ago" → { min: 30 days ago, max: 59 days ago }
|
|
* - "2 months ago" → { min: 60 days ago, max: 89 days ago }
|
|
* - "a year ago" → { min: 365 days ago, max: 729 days ago }
|
|
*/
|
|
export function parseDateTextToRange(dateText: string): { minDate: Date; maxDate: Date } {
|
|
const now = new Date();
|
|
const text = dateText.toLowerCase();
|
|
|
|
// Remove "Edited " prefix if present
|
|
const cleaned = text.replace(/^edited\s+/i, '');
|
|
|
|
// Helper to create date from days ago
|
|
const daysAgo = (days: number) => new Date(now.getTime() - days * 24 * 60 * 60 * 1000);
|
|
|
|
// Seconds: 1-59 seconds
|
|
if (cleaned.includes('second')) {
|
|
const seconds = extractNumber(cleaned);
|
|
const minDate = new Date(now.getTime() - seconds * 1000);
|
|
const maxDate = new Date(now.getTime() - seconds * 1000);
|
|
return { minDate, maxDate };
|
|
}
|
|
|
|
// Minutes: 1-59 minutes
|
|
if (cleaned.includes('minute')) {
|
|
const minutes = extractNumber(cleaned);
|
|
const minDate = new Date(now.getTime() - minutes * 60 * 1000);
|
|
const maxDate = new Date(now.getTime() - minutes * 60 * 1000);
|
|
return { minDate, maxDate };
|
|
}
|
|
|
|
// Hours: 1-23 hours
|
|
if (cleaned.includes('hora') || cleaned.includes('hour')) {
|
|
const hours = extractNumber(cleaned);
|
|
const minDate = new Date(now.getTime() - hours * 60 * 60 * 1000);
|
|
const maxDate = new Date(now.getTime() - hours * 60 * 60 * 1000);
|
|
return { minDate, maxDate };
|
|
}
|
|
|
|
// Days: 1-6 days
|
|
if (cleaned.includes('día') || cleaned.includes('day')) {
|
|
const days = extractNumber(cleaned);
|
|
const minDate = daysAgo(days);
|
|
const maxDate = daysAgo(days);
|
|
return { minDate, maxDate };
|
|
}
|
|
|
|
// Weeks: 2-3 weeks (Google never shows "1 week ago" or "4 weeks ago")
|
|
if (cleaned.includes('semana') || cleaned.includes('week')) {
|
|
const weeks = extractNumber(cleaned);
|
|
// Each week pattern represents a 7-day range
|
|
const minDays = weeks * 7;
|
|
const maxDays = weeks * 7 + 6; // Up to 6 extra days before switching to next week
|
|
return { minDate: daysAgo(maxDays), maxDate: daysAgo(minDays) };
|
|
}
|
|
|
|
// Months: Singular "a month ago" or plural "2-11 months ago"
|
|
if (cleaned.includes('mes') || cleaned.includes('month')) {
|
|
const months = extractNumber(cleaned);
|
|
|
|
// "a month ago" = 30-59 days (before switching to "2 months ago")
|
|
if (months === 1) {
|
|
return { minDate: daysAgo(59), maxDate: daysAgo(30) };
|
|
}
|
|
|
|
// "2 months ago" = 60-89 days
|
|
// "3 months ago" = 90-119 days
|
|
// Pattern: N months = (N*30) to ((N+1)*30 - 1) days
|
|
const minDays = months * 30;
|
|
const maxDays = (months + 1) * 30 - 1;
|
|
return { minDate: daysAgo(maxDays), maxDate: daysAgo(minDays) };
|
|
}
|
|
|
|
// Years: Singular "a year ago" or plural "2-11 years ago"
|
|
if (cleaned.includes('año') || cleaned.includes('year')) {
|
|
const years = extractNumber(cleaned);
|
|
|
|
// "a year ago" = 365-729 days (12-24 months before switching to "2 years ago")
|
|
if (years === 1) {
|
|
return { minDate: daysAgo(729), maxDate: daysAgo(365) };
|
|
}
|
|
|
|
// "2 years ago" = 730-1094 days (24-36 months)
|
|
// Pattern: N years = (N*365) to ((N+1)*365 - 1) days
|
|
const minDays = years * 365;
|
|
const maxDays = (years + 1) * 365 - 1;
|
|
return { minDate: daysAgo(maxDays), maxDate: daysAgo(minDays) };
|
|
}
|
|
|
|
// Default: very old (10+ years)
|
|
return { minDate: daysAgo(3650 + 365), maxDate: daysAgo(3650) };
|
|
}
|
|
|
|
export function parseDateText(dateText: string): Date {
|
|
// Get the time range and return the midpoint
|
|
const { minDate, maxDate } = parseDateTextToRange(dateText);
|
|
const midpointTime = (minDate.getTime() + maxDate.getTime()) / 2;
|
|
return new Date(midpointTime);
|
|
}
|
|
|
|
export function filterReviewsByDateRange(reviews: Review[], range: DateRange): Review[] {
|
|
if (range === 'all') return reviews;
|
|
|
|
const now = new Date();
|
|
const filterStart = new Date();
|
|
|
|
switch (range) {
|
|
case 'week':
|
|
filterStart.setDate(now.getDate() - 7);
|
|
break;
|
|
case 'month':
|
|
filterStart.setMonth(now.getMonth() - 1);
|
|
break;
|
|
case 'year':
|
|
filterStart.setFullYear(now.getFullYear() - 1);
|
|
break;
|
|
}
|
|
|
|
const filterEnd = now;
|
|
|
|
// Use range overlap logic: Include review if its time range overlaps with filter range
|
|
// Review range: [minDate, maxDate]
|
|
// Filter range: [filterStart, filterEnd]
|
|
// Overlap occurs when: minDate <= filterEnd AND maxDate >= filterStart
|
|
return reviews.filter(r => {
|
|
const { minDate, maxDate } = parseDateTextToRange(r.date_text);
|
|
return minDate <= filterEnd && maxDate >= filterStart;
|
|
});
|
|
}
|
|
|
|
export function filterReviewsByCustomDateRange(reviews: Review[], fromDate: Date | null, toDate: Date | null): Review[] {
|
|
if (!fromDate && !toDate) return reviews;
|
|
|
|
return reviews.filter(r => {
|
|
const reviewDate = parseDateText(r.date_text);
|
|
|
|
// If only fromDate is set, filter reviews >= fromDate
|
|
if (fromDate && !toDate) {
|
|
return reviewDate >= fromDate;
|
|
}
|
|
|
|
// If only toDate is set, filter reviews <= toDate (end of day)
|
|
if (!fromDate && toDate) {
|
|
const endOfDay = new Date(toDate);
|
|
endOfDay.setHours(23, 59, 59, 999);
|
|
return reviewDate <= endOfDay;
|
|
}
|
|
|
|
// Both dates set - filter reviews within range
|
|
const endOfDay = new Date(toDate!);
|
|
endOfDay.setHours(23, 59, 59, 999);
|
|
return reviewDate >= fromDate! && reviewDate <= endOfDay;
|
|
});
|
|
}
|
|
|
|
export function calculateTimelineData(reviews: Review[]): TimelineDataPoint[] {
|
|
// Sort reviews by date (newest first)
|
|
const sortedReviews = [...reviews]
|
|
.map(r => ({ ...r, parsedDate: parseDateText(r.date_text) }))
|
|
.sort((a, b) => b.parsedDate.getTime() - a.parsedDate.getTime());
|
|
|
|
// Group by month
|
|
const monthlyData: Record<string, { ratings: number[]; date: Date }> = {};
|
|
|
|
sortedReviews.forEach(review => {
|
|
const monthKey = `${review.parsedDate.getFullYear()}-${String(review.parsedDate.getMonth() + 1).padStart(2, '0')}`;
|
|
|
|
if (!monthlyData[monthKey]) {
|
|
monthlyData[monthKey] = { ratings: [], date: review.parsedDate };
|
|
}
|
|
monthlyData[monthKey].ratings.push(review.rating);
|
|
});
|
|
|
|
// Calculate averages and rolling average
|
|
const dataPoints: TimelineDataPoint[] = Object.entries(monthlyData)
|
|
.map(([monthKey, data]) => ({
|
|
date: monthKey,
|
|
rating: data.ratings.reduce((a, b) => a + b, 0) / data.ratings.length,
|
|
rollingAvg: 0, // Will calculate below
|
|
}))
|
|
.sort((a, b) => a.date.localeCompare(b.date));
|
|
|
|
// Calculate 3-month rolling average
|
|
dataPoints.forEach((point, idx) => {
|
|
const start = Math.max(0, idx - 2);
|
|
const end = idx + 1;
|
|
const window = dataPoints.slice(start, end);
|
|
point.rollingAvg = window.reduce((sum, p) => sum + p.rating, 0) / window.length;
|
|
});
|
|
|
|
return dataPoints;
|
|
}
|