- Task #2: Migrate scraper_clean.py to use StructuredLogger with categories (37 log calls with metrics across browser/scraper/network/system) - Task #4: Add crash_reports table schema and database methods (save_crash_report, get_crash_report, get_crash_stats) - Task #9: Implement crash detection wrapper with metrics sampling (get_chrome_memory, get_dom_node_count, classify_crash) - Task #17: Add topic tags to frontend ReviewAnalytics (topic filter UI, tags on cards, topics in modal) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
489 lines
17 KiB
TypeScript
489 lines
17 KiB
TypeScript
// Analytics utility functions
|
|
|
|
export interface OwnerResponse {
|
|
text: string;
|
|
timestamp?: string;
|
|
}
|
|
|
|
export interface Review {
|
|
author: string;
|
|
rating: number;
|
|
text: string | null;
|
|
date_text: string;
|
|
avatar_url: string | null;
|
|
profile_url: string | null;
|
|
review_id: string;
|
|
owner_response?: OwnerResponse | null;
|
|
photo_urls?: string[] | null;
|
|
topics?: string[]; // Inferred topics from scraper
|
|
// Derived fields (computed on load)
|
|
parsedDate?: Date;
|
|
dateCategory?: 'recent' | 'month' | 'year' | 'older'; // Time range category
|
|
minDate?: Date; // Earliest possible date (accounting for Google's uncertainty)
|
|
maxDate?: Date; // Latest possible date (accounting for Google's uncertainty)
|
|
centerDate?: Date; // Midpoint of the range (used for calculations and sorting)
|
|
}
|
|
|
|
export type DateRange = 'week' | 'month' | 'year' | 'all';
|
|
|
|
export interface TimelineDataPoint {
|
|
date: string;
|
|
rating: number;
|
|
rollingAvg: number;
|
|
count: number; // Number of reviews in this period
|
|
}
|
|
|
|
export interface ReviewStats {
|
|
totalReviews: number;
|
|
averageRating: number;
|
|
sentimentScore: number;
|
|
photoCount: number;
|
|
avgReviewLength: number;
|
|
recentReviews: number;
|
|
ratingDistribution: { rating: number; count: number; percentage: number }[];
|
|
topKeywords: { word: string; count: number }[];
|
|
sentimentBreakdown: { positive: number; neutral: number; negative: number };
|
|
negativeReviews: number;
|
|
responseRate: number;
|
|
averageResponseTime: string;
|
|
// Response breakdown
|
|
responseBreakdown: { answered: number; notAnswered: number };
|
|
// New trend metrics
|
|
ratingTrend: {
|
|
recentAvg: number;
|
|
olderAvg: number;
|
|
change: number; // positive = improvement, negative = decline
|
|
periodLabel: string;
|
|
};
|
|
reviewVelocity: {
|
|
recentCount: number;
|
|
olderCount: number;
|
|
changePercent: number; // positive = more reviews, negative = fewer
|
|
periodLabel: string;
|
|
};
|
|
}
|
|
|
|
export function calculateReviewStats(reviews: Review[]): ReviewStats {
|
|
// Populate minDate/maxDate/centerDate on reviews for display
|
|
reviews.forEach(r => {
|
|
if (!r.minDate || !r.maxDate || !r.centerDate) {
|
|
// Handle both date_text and timestamp field names
|
|
const dateText = r.date_text || (r as any).timestamp || '';
|
|
const range = parseDateTextToRange(dateText);
|
|
r.minDate = range.minDate;
|
|
r.maxDate = range.maxDate;
|
|
// Calculate centerDate as midpoint
|
|
const midpointTime = (range.minDate.getTime() + range.maxDate.getTime()) / 2;
|
|
r.centerDate = new Date(midpointTime);
|
|
}
|
|
});
|
|
|
|
const totalReviews = reviews.length;
|
|
|
|
// Average rating
|
|
const averageRating = totalReviews > 0
|
|
? reviews.reduce((sum, r) => sum + r.rating, 0) / totalReviews
|
|
: 0;
|
|
|
|
// Sentiment score (% of 4-5 star reviews)
|
|
const positiveReviews = reviews.filter(r => r.rating >= 4).length;
|
|
const sentimentScore = totalReviews > 0 ? (positiveReviews / totalReviews) * 100 : 0;
|
|
|
|
// Photo count (reviews with actual photos attached)
|
|
const photoCount = reviews.filter(r => r.photo_urls && r.photo_urls.length > 0).length;
|
|
|
|
// Average review length
|
|
const avgReviewLength = totalReviews > 0
|
|
? Math.round(reviews.reduce((sum, r) => sum + (r.text?.split(' ').length || 0), 0) / totalReviews)
|
|
: 0;
|
|
|
|
// Recent reviews (last 30 days - simplified check)
|
|
const recentReviews = reviews.filter(r => {
|
|
const text = (r.date_text || (r as any).timestamp || '').toLowerCase();
|
|
return text.includes('day') || text.includes('week') || text.includes('hour') || text.includes('minute') || text.includes('second');
|
|
}).length;
|
|
|
|
// Rating distribution
|
|
const ratingCounts: Record<number, number> = { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0 };
|
|
reviews.forEach(r => {
|
|
ratingCounts[r.rating] = (ratingCounts[r.rating] || 0) + 1;
|
|
});
|
|
|
|
const ratingDistribution = [5, 4, 3, 2, 1].map(rating => ({
|
|
rating,
|
|
count: ratingCounts[rating] || 0,
|
|
percentage: ((ratingCounts[rating] || 0) / totalReviews) * 100,
|
|
}));
|
|
|
|
// Extract keywords from review text
|
|
const allWords = reviews
|
|
.filter(r => r.text)
|
|
.flatMap(r =>
|
|
r.text!
|
|
.toLowerCase()
|
|
.replace(/[^\w\s]/g, '')
|
|
.split(/\s+/)
|
|
.filter(w => w.length > 3)
|
|
);
|
|
|
|
const stopWords = new Set(['this', 'that', 'with', 'from', 'have', 'been', 'were', 'very', 'great', 'good', 'best', 'nice', 'here', 'there', 'they', 'their', 'about', 'would', 'could', 'should', 'place', 'really']);
|
|
|
|
const wordCounts: Record<string, number> = {};
|
|
allWords.forEach(word => {
|
|
if (!stopWords.has(word)) {
|
|
wordCounts[word] = (wordCounts[word] || 0) + 1;
|
|
}
|
|
});
|
|
|
|
const topKeywords = Object.entries(wordCounts)
|
|
.sort(([, a], [, b]) => b - a)
|
|
.slice(0, 10)
|
|
.map(([word, count]) => ({ word, count }));
|
|
|
|
// Sentiment breakdown
|
|
const sentimentBreakdown = {
|
|
positive: reviews.filter(r => r.rating >= 4).length,
|
|
neutral: reviews.filter(r => r.rating === 3).length,
|
|
negative: reviews.filter(r => r.rating <= 2).length,
|
|
};
|
|
|
|
// Negative reviews count
|
|
const negativeReviews = reviews.filter(r => r.rating <= 2).length;
|
|
|
|
// Response breakdown - count answered vs not answered reviews
|
|
const answeredReviews = reviews.filter(r => r.owner_response?.text).length;
|
|
const responseBreakdown = {
|
|
answered: answeredReviews,
|
|
notAnswered: totalReviews - answeredReviews,
|
|
};
|
|
|
|
// Response rate calculated from actual data
|
|
const responseRate = totalReviews > 0 ? (answeredReviews / totalReviews) * 100 : 0;
|
|
|
|
// Average response time (placeholder - would need response timestamps)
|
|
const averageResponseTime = 'N/A'; // TODO: Calculate when response timestamps are available
|
|
|
|
// Rating Trend - compare recent 3 months vs previous 3 months
|
|
const now = new Date();
|
|
const threeMonthsAgo = new Date(now.getTime() - 90 * 24 * 60 * 60 * 1000);
|
|
const sixMonthsAgo = new Date(now.getTime() - 180 * 24 * 60 * 60 * 1000);
|
|
|
|
const recentReviewsForTrend = reviews.filter(r => r.centerDate && r.centerDate >= threeMonthsAgo);
|
|
const olderReviewsForTrend = reviews.filter(r => r.centerDate && r.centerDate < threeMonthsAgo && r.centerDate >= sixMonthsAgo);
|
|
|
|
const recentAvg = recentReviewsForTrend.length > 0
|
|
? recentReviewsForTrend.reduce((sum, r) => sum + r.rating, 0) / recentReviewsForTrend.length
|
|
: 0;
|
|
const olderAvg = olderReviewsForTrend.length > 0
|
|
? olderReviewsForTrend.reduce((sum, r) => sum + r.rating, 0) / olderReviewsForTrend.length
|
|
: 0;
|
|
|
|
const ratingTrend = {
|
|
recentAvg: Math.round(recentAvg * 10) / 10,
|
|
olderAvg: Math.round(olderAvg * 10) / 10,
|
|
change: Math.round((recentAvg - olderAvg) * 10) / 10,
|
|
periodLabel: 'last 3 months vs previous 3 months',
|
|
};
|
|
|
|
// Review Velocity - compare recent 3 months vs previous 3 months
|
|
const reviewVelocity = {
|
|
recentCount: recentReviewsForTrend.length,
|
|
olderCount: olderReviewsForTrend.length,
|
|
changePercent: olderReviewsForTrend.length > 0
|
|
? Math.round(((recentReviewsForTrend.length - olderReviewsForTrend.length) / olderReviewsForTrend.length) * 100)
|
|
: (recentReviewsForTrend.length > 0 ? 100 : 0),
|
|
periodLabel: 'last 3 months vs previous 3 months',
|
|
};
|
|
|
|
return {
|
|
totalReviews,
|
|
averageRating,
|
|
sentimentScore,
|
|
photoCount,
|
|
avgReviewLength,
|
|
recentReviews,
|
|
ratingDistribution,
|
|
topKeywords,
|
|
sentimentBreakdown,
|
|
negativeReviews,
|
|
responseRate,
|
|
averageResponseTime,
|
|
responseBreakdown,
|
|
ratingTrend,
|
|
reviewVelocity,
|
|
};
|
|
}
|
|
|
|
export function getSentimentLabel(rating: number): 'positive' | 'neutral' | 'negative' {
|
|
if (rating >= 4) return 'positive';
|
|
if (rating === 3) return 'neutral';
|
|
return 'negative';
|
|
}
|
|
|
|
// Helper function to get date range boundaries for preset buttons
|
|
export function getDateRangeBoundaries(range: DateRange): { from: Date | null; to: Date | null } {
|
|
if (range === 'all') return { from: null, to: null };
|
|
|
|
const now = new Date();
|
|
const to = new Date(now); // Today as end date
|
|
const from = new Date();
|
|
|
|
switch (range) {
|
|
case 'week':
|
|
from.setDate(now.getDate() - 7);
|
|
break;
|
|
case 'month':
|
|
from.setMonth(now.getMonth() - 1);
|
|
break;
|
|
case 'year':
|
|
from.setFullYear(now.getFullYear() - 1);
|
|
break;
|
|
}
|
|
|
|
// Set to start of day for from, end of day for to
|
|
from.setHours(0, 0, 0, 0);
|
|
to.setHours(23, 59, 59, 999);
|
|
|
|
return { from, to };
|
|
}
|
|
|
|
export function getSentimentColor(sentiment: 'positive' | 'neutral' | 'negative'): string {
|
|
switch (sentiment) {
|
|
case 'positive': return 'text-green-700 bg-green-50 border-green-300';
|
|
case 'neutral': return 'text-yellow-700 bg-yellow-50 border-yellow-300';
|
|
case 'negative': return 'text-red-700 bg-red-50 border-red-300';
|
|
}
|
|
}
|
|
|
|
function extractNumber(text: string): number {
|
|
// Extract first number from text (e.g., "2 weeks ago" -> 2, "Hace 2 semanas" -> 2)
|
|
const match = text.match(/\d+/);
|
|
if (match) {
|
|
return parseInt(match[0]);
|
|
}
|
|
// Handle singular: "a month ago", "un mes", "una semana"
|
|
if (text.match(/^a\s+\w+\s+ago/) || text.includes('un ') || text.includes('una ')) {
|
|
return 1;
|
|
}
|
|
return 1; // Default to 1 if no number found
|
|
}
|
|
|
|
/**
|
|
* Parse date_text into time range boundaries (min/max dates)
|
|
*
|
|
* This accounts for Google's inherent uncertainty in relative dates.
|
|
* Based on reverse-engineered patterns from 244 reviews.
|
|
*
|
|
* Examples:
|
|
* - "a month ago" → { min: 30 days ago, max: 59 days ago }
|
|
* - "2 months ago" → { min: 60 days ago, max: 89 days ago }
|
|
* - "a year ago" → { min: 365 days ago, max: 729 days ago }
|
|
*/
|
|
export function parseDateTextToRange(dateText: string): { minDate: Date; maxDate: Date } {
|
|
const now = new Date();
|
|
|
|
// Handle undefined/null dateText
|
|
if (!dateText) {
|
|
// Return a default range (assume recent - within last month)
|
|
const daysAgo = (days: number) => new Date(now.getTime() - days * 24 * 60 * 60 * 1000);
|
|
return { minDate: daysAgo(30), maxDate: now };
|
|
}
|
|
|
|
const text = dateText.toLowerCase();
|
|
|
|
// Remove "Edited " prefix if present
|
|
const cleaned = text.replace(/^edited\s+/i, '');
|
|
|
|
// Helper to create date from days ago
|
|
const daysAgo = (days: number) => new Date(now.getTime() - days * 24 * 60 * 60 * 1000);
|
|
|
|
// Seconds: 1-59 seconds
|
|
if (cleaned.includes('second')) {
|
|
const seconds = extractNumber(cleaned);
|
|
const minDate = new Date(now.getTime() - seconds * 1000);
|
|
const maxDate = new Date(now.getTime() - seconds * 1000);
|
|
return { minDate, maxDate };
|
|
}
|
|
|
|
// Minutes: 1-59 minutes
|
|
if (cleaned.includes('minute')) {
|
|
const minutes = extractNumber(cleaned);
|
|
const minDate = new Date(now.getTime() - minutes * 60 * 1000);
|
|
const maxDate = new Date(now.getTime() - minutes * 60 * 1000);
|
|
return { minDate, maxDate };
|
|
}
|
|
|
|
// Hours: 1-23 hours
|
|
if (cleaned.includes('hora') || cleaned.includes('hour')) {
|
|
const hours = extractNumber(cleaned);
|
|
const minDate = new Date(now.getTime() - hours * 60 * 60 * 1000);
|
|
const maxDate = new Date(now.getTime() - hours * 60 * 60 * 1000);
|
|
return { minDate, maxDate };
|
|
}
|
|
|
|
// Days: 1-6 days
|
|
if (cleaned.includes('día') || cleaned.includes('day')) {
|
|
const days = extractNumber(cleaned);
|
|
const minDate = daysAgo(days);
|
|
const maxDate = daysAgo(days);
|
|
return { minDate, maxDate };
|
|
}
|
|
|
|
// Weeks: 2-3 weeks (Google never shows "1 week ago" or "4 weeks ago")
|
|
if (cleaned.includes('semana') || cleaned.includes('week')) {
|
|
const weeks = extractNumber(cleaned);
|
|
// Each week pattern represents a 7-day range
|
|
const minDays = weeks * 7;
|
|
const maxDays = weeks * 7 + 6; // Up to 6 extra days before switching to next week
|
|
return { minDate: daysAgo(maxDays), maxDate: daysAgo(minDays) };
|
|
}
|
|
|
|
// Months: Singular "a month ago" or plural "2-11 months ago"
|
|
if (cleaned.includes('mes') || cleaned.includes('month')) {
|
|
const months = extractNumber(cleaned);
|
|
|
|
// "a month ago" = 30-59 days (before switching to "2 months ago")
|
|
if (months === 1) {
|
|
return { minDate: daysAgo(59), maxDate: daysAgo(30) };
|
|
}
|
|
|
|
// "2 months ago" = 60-89 days
|
|
// "3 months ago" = 90-119 days
|
|
// Pattern: N months = (N*30) to ((N+1)*30 - 1) days
|
|
const minDays = months * 30;
|
|
const maxDays = (months + 1) * 30 - 1;
|
|
return { minDate: daysAgo(maxDays), maxDate: daysAgo(minDays) };
|
|
}
|
|
|
|
// Years: Singular "a year ago" or plural "2-11 years ago"
|
|
if (cleaned.includes('año') || cleaned.includes('year')) {
|
|
const years = extractNumber(cleaned);
|
|
|
|
// "a year ago" = 365-729 days (12-24 months before switching to "2 years ago")
|
|
if (years === 1) {
|
|
return { minDate: daysAgo(729), maxDate: daysAgo(365) };
|
|
}
|
|
|
|
// "2 years ago" = 730-1094 days (24-36 months)
|
|
// Pattern: N years = (N*365) to ((N+1)*365 - 1) days
|
|
const minDays = years * 365;
|
|
const maxDays = (years + 1) * 365 - 1;
|
|
return { minDate: daysAgo(maxDays), maxDate: daysAgo(minDays) };
|
|
}
|
|
|
|
// Default: very old (10+ years)
|
|
return { minDate: daysAgo(3650 + 365), maxDate: daysAgo(3650) };
|
|
}
|
|
|
|
export function parseDateText(dateText: string): Date {
|
|
// Get the time range and return the midpoint
|
|
const { minDate, maxDate } = parseDateTextToRange(dateText);
|
|
const midpointTime = (minDate.getTime() + maxDate.getTime()) / 2;
|
|
return new Date(midpointTime);
|
|
}
|
|
|
|
export function filterReviewsByDateRange(reviews: Review[], range: DateRange): Review[] {
|
|
if (range === 'all') return reviews;
|
|
|
|
const now = new Date();
|
|
const filterStart = new Date();
|
|
|
|
switch (range) {
|
|
case 'week':
|
|
filterStart.setDate(now.getDate() - 7);
|
|
break;
|
|
case 'month':
|
|
filterStart.setMonth(now.getMonth() - 1);
|
|
break;
|
|
case 'year':
|
|
filterStart.setFullYear(now.getFullYear() - 1);
|
|
break;
|
|
}
|
|
|
|
const filterEnd = now;
|
|
|
|
// Use range overlap logic: Include review if its time range overlaps with filter range
|
|
// Review range: [minDate, maxDate]
|
|
// Filter range: [filterStart, filterEnd]
|
|
// Overlap occurs when: minDate <= filterEnd AND maxDate >= filterStart
|
|
return reviews.filter(r => {
|
|
const dateText = r.date_text || (r as any).timestamp || '';
|
|
const { minDate, maxDate } = parseDateTextToRange(dateText);
|
|
return minDate <= filterEnd && maxDate >= filterStart;
|
|
});
|
|
}
|
|
|
|
export function filterReviewsByCustomDateRange(reviews: Review[], fromDate: Date | null, toDate: Date | null): Review[] {
|
|
if (!fromDate && !toDate) return reviews;
|
|
|
|
return reviews.filter(r => {
|
|
const dateText = r.date_text || (r as any).timestamp || '';
|
|
const reviewDate = parseDateText(dateText);
|
|
|
|
// If only fromDate is set, filter reviews >= fromDate
|
|
if (fromDate && !toDate) {
|
|
return reviewDate >= fromDate;
|
|
}
|
|
|
|
// If only toDate is set, filter reviews <= toDate (end of day)
|
|
if (!fromDate && toDate) {
|
|
const endOfDay = new Date(toDate);
|
|
endOfDay.setHours(23, 59, 59, 999);
|
|
return reviewDate <= endOfDay;
|
|
}
|
|
|
|
// Both dates set - filter reviews within range
|
|
const endOfDay = new Date(toDate!);
|
|
endOfDay.setHours(23, 59, 59, 999);
|
|
return reviewDate >= fromDate! && reviewDate <= endOfDay;
|
|
});
|
|
}
|
|
|
|
export function calculateTimelineData(reviews: Review[]): TimelineDataPoint[] {
|
|
// Sort reviews by date (newest first)
|
|
const sortedReviews = [...reviews]
|
|
.map(r => ({ ...r, parsedDate: parseDateText(r.date_text || (r as any).timestamp || '') }))
|
|
.sort((a, b) => b.parsedDate.getTime() - a.parsedDate.getTime());
|
|
|
|
// Group by month
|
|
const monthlyData: Record<string, { ratings: number[]; date: Date }> = {};
|
|
const monthNames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
|
|
|
|
sortedReviews.forEach(review => {
|
|
const monthKey = `${monthNames[review.parsedDate.getMonth()]} ${review.parsedDate.getFullYear()}`;
|
|
|
|
if (!monthlyData[monthKey]) {
|
|
monthlyData[monthKey] = { ratings: [], date: review.parsedDate };
|
|
}
|
|
monthlyData[monthKey].ratings.push(review.rating);
|
|
});
|
|
|
|
// Calculate averages and rolling average
|
|
const dataPoints: TimelineDataPoint[] = Object.entries(monthlyData)
|
|
.map(([monthKey, data]) => ({
|
|
date: monthKey,
|
|
rating: data.ratings.reduce((a, b) => a + b, 0) / data.ratings.length,
|
|
rollingAvg: 0, // Will calculate below
|
|
count: data.ratings.length, // Number of reviews this month
|
|
}))
|
|
.sort((a, b) => {
|
|
// Parse "Mon YYYY" format for sorting
|
|
const parseMonthYear = (d: string) => {
|
|
const [month, year] = d.split(' ');
|
|
const monthIndex = monthNames.indexOf(month);
|
|
return new Date(parseInt(year), monthIndex, 1).getTime();
|
|
};
|
|
return parseMonthYear(a.date) - parseMonthYear(b.date);
|
|
});
|
|
|
|
// Calculate 3-month rolling average
|
|
dataPoints.forEach((point, idx) => {
|
|
const start = Math.max(0, idx - 2);
|
|
const end = idx + 1;
|
|
const window = dataPoints.slice(start, end);
|
|
point.rollingAvg = window.reduce((sum, p) => sum + p.rating, 0) / window.length;
|
|
});
|
|
|
|
return dataPoints;
|
|
}
|