Files
whyrating-engine-legacy/web/lib/analytics.ts
Alejandro Gutiérrez a540ab97b1 Add browser fingerprint support and analytics metadata display
- Transfer user's browser fingerprint (user-agent, viewport, timezone,
  language, geolocation) to Chrome for more authentic scraping
- Display review topics from Google Maps in analytics dashboard
- Show business category badge in analytics header
- Fix date_text null handling in analytics (handle undefined/timestamp fields)
- Add review_topics and business_category to JobStatus interface

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 10:36:06 +00:00

488 lines
17 KiB
TypeScript

// Analytics utility functions
export interface OwnerResponse {
text: string;
timestamp?: string;
}
export interface Review {
author: string;
rating: number;
text: string | null;
date_text: string;
avatar_url: string | null;
profile_url: string | null;
review_id: string;
owner_response?: OwnerResponse | null;
photo_urls?: string[] | null;
// Derived fields (computed on load)
parsedDate?: Date;
dateCategory?: 'recent' | 'month' | 'year' | 'older'; // Time range category
minDate?: Date; // Earliest possible date (accounting for Google's uncertainty)
maxDate?: Date; // Latest possible date (accounting for Google's uncertainty)
centerDate?: Date; // Midpoint of the range (used for calculations and sorting)
}
export type DateRange = 'week' | 'month' | 'year' | 'all';
export interface TimelineDataPoint {
date: string;
rating: number;
rollingAvg: number;
count: number; // Number of reviews in this period
}
export interface ReviewStats {
totalReviews: number;
averageRating: number;
sentimentScore: number;
photoCount: number;
avgReviewLength: number;
recentReviews: number;
ratingDistribution: { rating: number; count: number; percentage: number }[];
topKeywords: { word: string; count: number }[];
sentimentBreakdown: { positive: number; neutral: number; negative: number };
negativeReviews: number;
responseRate: number;
averageResponseTime: string;
// Response breakdown
responseBreakdown: { answered: number; notAnswered: number };
// New trend metrics
ratingTrend: {
recentAvg: number;
olderAvg: number;
change: number; // positive = improvement, negative = decline
periodLabel: string;
};
reviewVelocity: {
recentCount: number;
olderCount: number;
changePercent: number; // positive = more reviews, negative = fewer
periodLabel: string;
};
}
export function calculateReviewStats(reviews: Review[]): ReviewStats {
// Populate minDate/maxDate/centerDate on reviews for display
reviews.forEach(r => {
if (!r.minDate || !r.maxDate || !r.centerDate) {
// Handle both date_text and timestamp field names
const dateText = r.date_text || (r as any).timestamp || '';
const range = parseDateTextToRange(dateText);
r.minDate = range.minDate;
r.maxDate = range.maxDate;
// Calculate centerDate as midpoint
const midpointTime = (range.minDate.getTime() + range.maxDate.getTime()) / 2;
r.centerDate = new Date(midpointTime);
}
});
const totalReviews = reviews.length;
// Average rating
const averageRating = totalReviews > 0
? reviews.reduce((sum, r) => sum + r.rating, 0) / totalReviews
: 0;
// Sentiment score (% of 4-5 star reviews)
const positiveReviews = reviews.filter(r => r.rating >= 4).length;
const sentimentScore = totalReviews > 0 ? (positiveReviews / totalReviews) * 100 : 0;
// Photo count (reviews with actual photos attached)
const photoCount = reviews.filter(r => r.photo_urls && r.photo_urls.length > 0).length;
// Average review length
const avgReviewLength = totalReviews > 0
? Math.round(reviews.reduce((sum, r) => sum + (r.text?.split(' ').length || 0), 0) / totalReviews)
: 0;
// Recent reviews (last 30 days - simplified check)
const recentReviews = reviews.filter(r => {
const text = (r.date_text || (r as any).timestamp || '').toLowerCase();
return text.includes('day') || text.includes('week') || text.includes('hour') || text.includes('minute') || text.includes('second');
}).length;
// Rating distribution
const ratingCounts: Record<number, number> = { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0 };
reviews.forEach(r => {
ratingCounts[r.rating] = (ratingCounts[r.rating] || 0) + 1;
});
const ratingDistribution = [5, 4, 3, 2, 1].map(rating => ({
rating,
count: ratingCounts[rating] || 0,
percentage: ((ratingCounts[rating] || 0) / totalReviews) * 100,
}));
// Extract keywords from review text
const allWords = reviews
.filter(r => r.text)
.flatMap(r =>
r.text!
.toLowerCase()
.replace(/[^\w\s]/g, '')
.split(/\s+/)
.filter(w => w.length > 3)
);
const stopWords = new Set(['this', 'that', 'with', 'from', 'have', 'been', 'were', 'very', 'great', 'good', 'best', 'nice', 'here', 'there', 'they', 'their', 'about', 'would', 'could', 'should', 'place', 'really']);
const wordCounts: Record<string, number> = {};
allWords.forEach(word => {
if (!stopWords.has(word)) {
wordCounts[word] = (wordCounts[word] || 0) + 1;
}
});
const topKeywords = Object.entries(wordCounts)
.sort(([, a], [, b]) => b - a)
.slice(0, 10)
.map(([word, count]) => ({ word, count }));
// Sentiment breakdown
const sentimentBreakdown = {
positive: reviews.filter(r => r.rating >= 4).length,
neutral: reviews.filter(r => r.rating === 3).length,
negative: reviews.filter(r => r.rating <= 2).length,
};
// Negative reviews count
const negativeReviews = reviews.filter(r => r.rating <= 2).length;
// Response breakdown - count answered vs not answered reviews
const answeredReviews = reviews.filter(r => r.owner_response?.text).length;
const responseBreakdown = {
answered: answeredReviews,
notAnswered: totalReviews - answeredReviews,
};
// Response rate calculated from actual data
const responseRate = totalReviews > 0 ? (answeredReviews / totalReviews) * 100 : 0;
// Average response time (placeholder - would need response timestamps)
const averageResponseTime = 'N/A'; // TODO: Calculate when response timestamps are available
// Rating Trend - compare recent 3 months vs previous 3 months
const now = new Date();
const threeMonthsAgo = new Date(now.getTime() - 90 * 24 * 60 * 60 * 1000);
const sixMonthsAgo = new Date(now.getTime() - 180 * 24 * 60 * 60 * 1000);
const recentReviewsForTrend = reviews.filter(r => r.centerDate && r.centerDate >= threeMonthsAgo);
const olderReviewsForTrend = reviews.filter(r => r.centerDate && r.centerDate < threeMonthsAgo && r.centerDate >= sixMonthsAgo);
const recentAvg = recentReviewsForTrend.length > 0
? recentReviewsForTrend.reduce((sum, r) => sum + r.rating, 0) / recentReviewsForTrend.length
: 0;
const olderAvg = olderReviewsForTrend.length > 0
? olderReviewsForTrend.reduce((sum, r) => sum + r.rating, 0) / olderReviewsForTrend.length
: 0;
const ratingTrend = {
recentAvg: Math.round(recentAvg * 10) / 10,
olderAvg: Math.round(olderAvg * 10) / 10,
change: Math.round((recentAvg - olderAvg) * 10) / 10,
periodLabel: 'last 3 months vs previous 3 months',
};
// Review Velocity - compare recent 3 months vs previous 3 months
const reviewVelocity = {
recentCount: recentReviewsForTrend.length,
olderCount: olderReviewsForTrend.length,
changePercent: olderReviewsForTrend.length > 0
? Math.round(((recentReviewsForTrend.length - olderReviewsForTrend.length) / olderReviewsForTrend.length) * 100)
: (recentReviewsForTrend.length > 0 ? 100 : 0),
periodLabel: 'last 3 months vs previous 3 months',
};
return {
totalReviews,
averageRating,
sentimentScore,
photoCount,
avgReviewLength,
recentReviews,
ratingDistribution,
topKeywords,
sentimentBreakdown,
negativeReviews,
responseRate,
averageResponseTime,
responseBreakdown,
ratingTrend,
reviewVelocity,
};
}
export function getSentimentLabel(rating: number): 'positive' | 'neutral' | 'negative' {
if (rating >= 4) return 'positive';
if (rating === 3) return 'neutral';
return 'negative';
}
// Helper function to get date range boundaries for preset buttons
export function getDateRangeBoundaries(range: DateRange): { from: Date | null; to: Date | null } {
if (range === 'all') return { from: null, to: null };
const now = new Date();
const to = new Date(now); // Today as end date
const from = new Date();
switch (range) {
case 'week':
from.setDate(now.getDate() - 7);
break;
case 'month':
from.setMonth(now.getMonth() - 1);
break;
case 'year':
from.setFullYear(now.getFullYear() - 1);
break;
}
// Set to start of day for from, end of day for to
from.setHours(0, 0, 0, 0);
to.setHours(23, 59, 59, 999);
return { from, to };
}
export function getSentimentColor(sentiment: 'positive' | 'neutral' | 'negative'): string {
switch (sentiment) {
case 'positive': return 'text-green-700 bg-green-50 border-green-300';
case 'neutral': return 'text-yellow-700 bg-yellow-50 border-yellow-300';
case 'negative': return 'text-red-700 bg-red-50 border-red-300';
}
}
function extractNumber(text: string): number {
// Extract first number from text (e.g., "2 weeks ago" -> 2, "Hace 2 semanas" -> 2)
const match = text.match(/\d+/);
if (match) {
return parseInt(match[0]);
}
// Handle singular: "a month ago", "un mes", "una semana"
if (text.match(/^a\s+\w+\s+ago/) || text.includes('un ') || text.includes('una ')) {
return 1;
}
return 1; // Default to 1 if no number found
}
/**
* Parse date_text into time range boundaries (min/max dates)
*
* This accounts for Google's inherent uncertainty in relative dates.
* Based on reverse-engineered patterns from 244 reviews.
*
* Examples:
* - "a month ago" → { min: 30 days ago, max: 59 days ago }
* - "2 months ago" → { min: 60 days ago, max: 89 days ago }
* - "a year ago" → { min: 365 days ago, max: 729 days ago }
*/
export function parseDateTextToRange(dateText: string): { minDate: Date; maxDate: Date } {
const now = new Date();
// Handle undefined/null dateText
if (!dateText) {
// Return a default range (assume recent - within last month)
const daysAgo = (days: number) => new Date(now.getTime() - days * 24 * 60 * 60 * 1000);
return { minDate: daysAgo(30), maxDate: now };
}
const text = dateText.toLowerCase();
// Remove "Edited " prefix if present
const cleaned = text.replace(/^edited\s+/i, '');
// Helper to create date from days ago
const daysAgo = (days: number) => new Date(now.getTime() - days * 24 * 60 * 60 * 1000);
// Seconds: 1-59 seconds
if (cleaned.includes('second')) {
const seconds = extractNumber(cleaned);
const minDate = new Date(now.getTime() - seconds * 1000);
const maxDate = new Date(now.getTime() - seconds * 1000);
return { minDate, maxDate };
}
// Minutes: 1-59 minutes
if (cleaned.includes('minute')) {
const minutes = extractNumber(cleaned);
const minDate = new Date(now.getTime() - minutes * 60 * 1000);
const maxDate = new Date(now.getTime() - minutes * 60 * 1000);
return { minDate, maxDate };
}
// Hours: 1-23 hours
if (cleaned.includes('hora') || cleaned.includes('hour')) {
const hours = extractNumber(cleaned);
const minDate = new Date(now.getTime() - hours * 60 * 60 * 1000);
const maxDate = new Date(now.getTime() - hours * 60 * 60 * 1000);
return { minDate, maxDate };
}
// Days: 1-6 days
if (cleaned.includes('día') || cleaned.includes('day')) {
const days = extractNumber(cleaned);
const minDate = daysAgo(days);
const maxDate = daysAgo(days);
return { minDate, maxDate };
}
// Weeks: 2-3 weeks (Google never shows "1 week ago" or "4 weeks ago")
if (cleaned.includes('semana') || cleaned.includes('week')) {
const weeks = extractNumber(cleaned);
// Each week pattern represents a 7-day range
const minDays = weeks * 7;
const maxDays = weeks * 7 + 6; // Up to 6 extra days before switching to next week
return { minDate: daysAgo(maxDays), maxDate: daysAgo(minDays) };
}
// Months: Singular "a month ago" or plural "2-11 months ago"
if (cleaned.includes('mes') || cleaned.includes('month')) {
const months = extractNumber(cleaned);
// "a month ago" = 30-59 days (before switching to "2 months ago")
if (months === 1) {
return { minDate: daysAgo(59), maxDate: daysAgo(30) };
}
// "2 months ago" = 60-89 days
// "3 months ago" = 90-119 days
// Pattern: N months = (N*30) to ((N+1)*30 - 1) days
const minDays = months * 30;
const maxDays = (months + 1) * 30 - 1;
return { minDate: daysAgo(maxDays), maxDate: daysAgo(minDays) };
}
// Years: Singular "a year ago" or plural "2-11 years ago"
if (cleaned.includes('año') || cleaned.includes('year')) {
const years = extractNumber(cleaned);
// "a year ago" = 365-729 days (12-24 months before switching to "2 years ago")
if (years === 1) {
return { minDate: daysAgo(729), maxDate: daysAgo(365) };
}
// "2 years ago" = 730-1094 days (24-36 months)
// Pattern: N years = (N*365) to ((N+1)*365 - 1) days
const minDays = years * 365;
const maxDays = (years + 1) * 365 - 1;
return { minDate: daysAgo(maxDays), maxDate: daysAgo(minDays) };
}
// Default: very old (10+ years)
return { minDate: daysAgo(3650 + 365), maxDate: daysAgo(3650) };
}
export function parseDateText(dateText: string): Date {
// Get the time range and return the midpoint
const { minDate, maxDate } = parseDateTextToRange(dateText);
const midpointTime = (minDate.getTime() + maxDate.getTime()) / 2;
return new Date(midpointTime);
}
export function filterReviewsByDateRange(reviews: Review[], range: DateRange): Review[] {
if (range === 'all') return reviews;
const now = new Date();
const filterStart = new Date();
switch (range) {
case 'week':
filterStart.setDate(now.getDate() - 7);
break;
case 'month':
filterStart.setMonth(now.getMonth() - 1);
break;
case 'year':
filterStart.setFullYear(now.getFullYear() - 1);
break;
}
const filterEnd = now;
// Use range overlap logic: Include review if its time range overlaps with filter range
// Review range: [minDate, maxDate]
// Filter range: [filterStart, filterEnd]
// Overlap occurs when: minDate <= filterEnd AND maxDate >= filterStart
return reviews.filter(r => {
const dateText = r.date_text || (r as any).timestamp || '';
const { minDate, maxDate } = parseDateTextToRange(dateText);
return minDate <= filterEnd && maxDate >= filterStart;
});
}
export function filterReviewsByCustomDateRange(reviews: Review[], fromDate: Date | null, toDate: Date | null): Review[] {
if (!fromDate && !toDate) return reviews;
return reviews.filter(r => {
const dateText = r.date_text || (r as any).timestamp || '';
const reviewDate = parseDateText(dateText);
// If only fromDate is set, filter reviews >= fromDate
if (fromDate && !toDate) {
return reviewDate >= fromDate;
}
// If only toDate is set, filter reviews <= toDate (end of day)
if (!fromDate && toDate) {
const endOfDay = new Date(toDate);
endOfDay.setHours(23, 59, 59, 999);
return reviewDate <= endOfDay;
}
// Both dates set - filter reviews within range
const endOfDay = new Date(toDate!);
endOfDay.setHours(23, 59, 59, 999);
return reviewDate >= fromDate! && reviewDate <= endOfDay;
});
}
export function calculateTimelineData(reviews: Review[]): TimelineDataPoint[] {
// Sort reviews by date (newest first)
const sortedReviews = [...reviews]
.map(r => ({ ...r, parsedDate: parseDateText(r.date_text || (r as any).timestamp || '') }))
.sort((a, b) => b.parsedDate.getTime() - a.parsedDate.getTime());
// Group by month
const monthlyData: Record<string, { ratings: number[]; date: Date }> = {};
const monthNames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
sortedReviews.forEach(review => {
const monthKey = `${monthNames[review.parsedDate.getMonth()]} ${review.parsedDate.getFullYear()}`;
if (!monthlyData[monthKey]) {
monthlyData[monthKey] = { ratings: [], date: review.parsedDate };
}
monthlyData[monthKey].ratings.push(review.rating);
});
// Calculate averages and rolling average
const dataPoints: TimelineDataPoint[] = Object.entries(monthlyData)
.map(([monthKey, data]) => ({
date: monthKey,
rating: data.ratings.reduce((a, b) => a + b, 0) / data.ratings.length,
rollingAvg: 0, // Will calculate below
count: data.ratings.length, // Number of reviews this month
}))
.sort((a, b) => {
// Parse "Mon YYYY" format for sorting
const parseMonthYear = (d: string) => {
const [month, year] = d.split(' ');
const monthIndex = monthNames.indexOf(month);
return new Date(parseInt(year), monthIndex, 1).getTime();
};
return parseMonthYear(a.date) - parseMonthYear(b.date);
});
// Calculate 3-month rolling average
dataPoints.forEach((point, idx) => {
const start = Math.max(0, idx - 2);
const end = idx + 1;
const window = dataPoints.slice(start, end);
point.rollingAvg = window.reduce((sum, p) => sum + p.rating, 0) / window.length;
});
return dataPoints;
}