Clean up project root - remove 51 obsolete files

Deleted:
- 26 old markdown summary/documentation files
- 16 debug/test Python scripts (debug_*, test_*, diagnose_*)
- 10 untracked JSON files from api_response_samples
- terms-of-usage.md, pane_not_found.png

Also includes pending web app changes:
- Jobs management UI (JobsView, Sidebar components)
- API routes for job streaming and comparison
- Enhanced ReviewAnalytics and ScraperTest components

Final clean structure:
├── api_server_production.py  (main entry)
├── modules/                  (core Python)
├── web/                      (Next.js frontend)
├── tests/                    (test suite)
├── docs/                     (documentation)
└── examples/                 (usage examples)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-23 17:31:53 +00:00
parent 8ccf72a489
commit 47bb032011
69 changed files with 3417 additions and 11347 deletions

View File

@@ -0,0 +1,86 @@
import { NextRequest, NextResponse } from 'next/server';
const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
// GET /api/jobs/[jobId]/compare?previous=<previousJobId>
// Returns reviews from current job with a flag indicating if they're new
export async function GET(
request: NextRequest,
{ params }: { params: Promise<{ jobId: string }> }
) {
try {
const { jobId } = await params;
const { searchParams } = new URL(request.url);
const previousJobId = searchParams.get('previous');
// Fetch current job reviews
const currentResponse = await fetch(`${API_BASE_URL}/jobs/${jobId}/reviews?limit=10000`);
if (!currentResponse.ok) {
return NextResponse.json(
{ error: 'Failed to get current job reviews' },
{ status: currentResponse.status }
);
}
const currentData = await currentResponse.json();
const currentReviews = currentData.reviews || [];
// If no previous job to compare, all reviews are "new"
if (!previousJobId) {
const reviewsWithStatus = currentReviews.map((review: Record<string, unknown>) => ({
...review,
is_new: true,
}));
return NextResponse.json({
reviews: reviewsWithStatus,
total_count: reviewsWithStatus.length,
new_count: reviewsWithStatus.length,
previous_job_id: null,
});
}
// Fetch previous job reviews
const previousResponse = await fetch(`${API_BASE_URL}/jobs/${previousJobId}/reviews?limit=10000`);
if (!previousResponse.ok) {
// Previous job not found, treat all as new
const reviewsWithStatus = currentReviews.map((review: Record<string, unknown>) => ({
...review,
is_new: true,
}));
return NextResponse.json({
reviews: reviewsWithStatus,
total_count: reviewsWithStatus.length,
new_count: reviewsWithStatus.length,
previous_job_id: previousJobId,
});
}
const previousData = await previousResponse.json();
const previousReviews = previousData.reviews || [];
// Create a Set of previous review IDs for O(1) lookup
const previousReviewIds = new Set(
previousReviews.map((r: { review_id: string }) => r.review_id)
);
// Mark reviews as new if they weren't in the previous job
const reviewsWithStatus = currentReviews.map((review: { review_id: string }) => ({
...review,
is_new: !previousReviewIds.has(review.review_id),
}));
const newCount = reviewsWithStatus.filter((r: { is_new: boolean }) => r.is_new).length;
return NextResponse.json({
reviews: reviewsWithStatus,
total_count: reviewsWithStatus.length,
new_count: newCount,
previous_job_id: previousJobId,
});
} catch (error) {
console.error('Compare API error:', error);
return NextResponse.json(
{ error: 'Failed to compare reviews' },
{ status: 500 }
);
}
}

View File

@@ -0,0 +1,30 @@
import { NextRequest, NextResponse } from 'next/server';
const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
export async function GET(
request: NextRequest,
{ params }: { params: Promise<{ jobId: string }> }
) {
try {
const { jobId } = await params;
const response = await fetch(`${API_BASE_URL}/jobs/${jobId}/logs`);
if (!response.ok) {
return NextResponse.json(
{ error: 'Failed to get logs' },
{ status: response.status }
);
}
const data = await response.json();
return NextResponse.json(data);
} catch (error) {
console.error('Logs API error:', error);
return NextResponse.json(
{ error: 'Failed to get logs' },
{ status: 500 }
);
}
}

View File

@@ -28,3 +28,32 @@ export async function GET(
);
}
}
export async function DELETE(
request: NextRequest,
{ params }: { params: Promise<{ jobId: string }> }
) {
try {
const { jobId } = await params;
const response = await fetch(`${API_BASE_URL}/jobs/${jobId}`, {
method: 'DELETE',
});
if (!response.ok) {
const data = await response.json();
return NextResponse.json(
{ error: data.detail || 'Failed to delete job' },
{ status: response.status }
);
}
return NextResponse.json({ success: true, message: 'Job deleted successfully' });
} catch (error) {
console.error('Delete job API error:', error);
return NextResponse.json(
{ error: 'Failed to delete job' },
{ status: 500 }
);
}
}

View File

@@ -0,0 +1,57 @@
import { NextRequest } from 'next/server';
const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
export const dynamic = 'force-dynamic';
export async function GET(
request: NextRequest,
{ params }: { params: Promise<{ jobId: string }> }
) {
const { jobId } = await params;
const encoder = new TextEncoder();
const stream = new ReadableStream({
async start(controller) {
try {
const response = await fetch(`${API_BASE_URL}/jobs/${jobId}/stream`, {
headers: {
'Accept': 'text/event-stream',
'Cache-Control': 'no-cache',
},
});
if (!response.ok || !response.body) {
controller.enqueue(encoder.encode(`event: error\ndata: {"error": "Failed to connect to backend"}\n\n`));
controller.close();
return;
}
const reader = response.body.getReader();
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
// Forward the SSE data as-is
controller.enqueue(value);
}
} catch (error) {
console.error('SSE stream error:', error);
controller.enqueue(encoder.encode(`event: error\ndata: {"error": "Stream connection failed"}\n\n`));
} finally {
controller.close();
}
},
});
return new Response(stream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache, no-transform',
'Connection': 'keep-alive',
'X-Accel-Buffering': 'no',
},
});
}

30
web/app/api/jobs/route.ts Normal file
View File

@@ -0,0 +1,30 @@
import { NextRequest, NextResponse } from 'next/server';
const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
export async function GET(request: NextRequest) {
try {
const { searchParams } = new URL(request.url);
const limit = searchParams.get('limit') || '100';
const response = await fetch(`${API_BASE_URL}/jobs?limit=${limit}`);
if (!response.ok) {
return NextResponse.json(
{ error: 'Failed to get jobs' },
{ status: response.status }
);
}
const data = await response.json();
// Backend returns array directly, not { jobs: [...] }
const jobs = Array.isArray(data) ? data : (data.jobs || []);
return NextResponse.json({ jobs });
} catch (error) {
console.error('Jobs API error:', error);
return NextResponse.json(
{ error: 'Failed to get jobs' },
{ status: 500 }
);
}
}

View File

@@ -0,0 +1,54 @@
import { NextRequest } from 'next/server';
const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
export const dynamic = 'force-dynamic';
export async function GET(request: NextRequest) {
const encoder = new TextEncoder();
const stream = new ReadableStream({
async start(controller) {
try {
const response = await fetch(`${API_BASE_URL}/jobs/stream`, {
headers: {
'Accept': 'text/event-stream',
'Cache-Control': 'no-cache',
},
});
if (!response.ok || !response.body) {
controller.enqueue(encoder.encode(`event: error\ndata: {"error": "Failed to connect to backend"}\n\n`));
controller.close();
return;
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
// Forward the SSE data as-is
controller.enqueue(value);
}
} catch (error) {
console.error('SSE stream error:', error);
controller.enqueue(encoder.encode(`event: error\ndata: {"error": "Stream connection failed"}\n\n`));
} finally {
controller.close();
}
},
});
return new Response(stream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache, no-transform',
'Connection': 'keep-alive',
'X-Accel-Buffering': 'no',
},
});
}

View File

@@ -4,17 +4,26 @@ const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
export async function POST(request: NextRequest) {
try {
const { url } = await request.json();
const body = await request.json();
const { url, business_name, business_address, rating_snapshot, total_reviews_snapshot } = body;
if (!url) {
return NextResponse.json({ error: 'URL is required' }, { status: 400 });
}
// Call the containerized scraper API
// Call the containerized scraper API with business metadata
const response = await fetch(`${API_BASE_URL}/scrape`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url }),
body: JSON.stringify({
url,
metadata: {
business_name,
business_address,
rating_snapshot,
total_reviews_snapshot,
},
}),
});
const data = await response.json();

View File

@@ -1,38 +1,259 @@
import ScraperTest from '@/components/ScraperTest';
'use client';
import ScraperTest, { JobStatus } from '@/components/ScraperTest';
import ReviewAnalytics from '@/components/ReviewAnalytics';
import Sidebar from '@/components/Sidebar';
import JobsView from '@/components/JobsView';
import { useState, useCallback, useEffect } from 'react';
interface Review {
author: string;
rating: number;
text: string | null;
date_text: string;
avatar_url: string | null;
profile_url: string | null;
review_id: string;
}
interface ReviewWithNew extends Review {
is_new?: boolean;
}
interface SelectedJob {
reviews: ReviewWithNew[];
businessName: string;
businessUrl: string;
jobId: string;
newCount?: number;
previousJobId?: string;
}
type ViewType = 'newScrape' | 'jobs' | 'reports';
export default function Home() {
const [activeView, setActiveView] = useState<ViewType>('newScrape');
const [jobs, setJobs] = useState<JobStatus[]>([]);
const [selectedJob, setSelectedJob] = useState<SelectedJob | null>(null);
const [isLoadingJob, setIsLoadingJob] = useState<string | null>(null);
// Load jobs from API
const refreshJobs = useCallback(async () => {
try {
const response = await fetch('/api/jobs?limit=100');
if (response.ok) {
const data = await response.json();
if (data.jobs) {
setJobs(data.jobs);
}
}
} catch (err) {
console.error('Failed to load jobs:', err);
}
}, []);
// Load jobs from API on mount
useEffect(() => {
refreshJobs();
}, [refreshJobs]);
const handleJobsChange = useCallback((newJobs: JobStatus[]) => {
setJobs(prev => {
// Merge new jobs with existing, updating duplicates
const jobMap = new Map(prev.map(j => [j.job_id, j]));
newJobs.forEach(job => jobMap.set(job.job_id, job));
return Array.from(jobMap.values());
});
}, []);
const handleSelectReviews = useCallback((reviews: Review[], businessName: string, jobId: string, businessUrl?: string) => {
setSelectedJob({ reviews, businessName, businessUrl: businessUrl || '', jobId });
setActiveView('reports');
}, []);
const loadJobReviews = async (job: JobStatus, previousJob?: JobStatus) => {
if (job.status !== 'completed' || !job.reviews_count) return;
setIsLoadingJob(job.job_id);
try {
// Use compare API if we have a previous job
const url = previousJob
? `/api/jobs/${job.job_id}/compare?previous=${previousJob.job_id}`
: `/api/jobs/${job.job_id}/reviews?limit=10000`;
const response = await fetch(url);
if (!response.ok) throw new Error('Failed to fetch reviews');
const data = await response.json();
const reviews = data.reviews || [];
if (reviews.length > 0) {
// Extract business name from URL query param as fallback
let businessName = job.business_name;
if (!businessName) {
try {
const urlObj = new URL(job.url);
const query = urlObj.searchParams.get('query');
businessName = query ? decodeURIComponent(query) : 'Unknown Business';
} catch {
businessName = 'Unknown Business';
}
}
setSelectedJob({
reviews,
businessName,
businessUrl: job.url,
jobId: job.job_id,
newCount: data.new_count,
previousJobId: previousJob?.job_id,
});
setActiveView('reports');
}
} catch (err) {
console.error('Failed to load job reviews:', err);
} finally {
setIsLoadingJob(null);
}
};
const renderMainContent = () => {
switch (activeView) {
case 'newScrape':
return (
<div className="h-full overflow-y-auto p-6">
<ScraperTest onJobsChange={handleJobsChange} onSelectReviews={handleSelectReviews} />
</div>
);
case 'jobs':
return (
<JobsView
jobs={jobs}
onSelectJob={loadJobReviews}
isLoadingJob={isLoadingJob}
onRefresh={refreshJobs}
/>
);
case 'reports': {
// Get completed jobs with reviews
const completedJobs = jobs
.filter(j => j.status === 'completed' && j.reviews_count && j.reviews_count > 0)
.sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime());
return selectedJob ? (
<div className="h-full overflow-y-auto p-6">
<div className="mb-4 flex items-center justify-between">
<h2 className="text-xl font-bold text-gray-900">Analytics</h2>
<button
onClick={() => setSelectedJob(null)}
className="px-4 py-2 bg-gray-200 hover:bg-gray-300 text-gray-700 rounded-lg font-medium transition-colors flex items-center gap-2"
>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M10 19l-7-7m0 0l7-7m-7 7h18" />
</svg>
Back to Reports
</button>
</div>
<ReviewAnalytics reviews={selectedJob.reviews} businessName={selectedJob.businessName} businessUrl={selectedJob.businessUrl} newCount={selectedJob.newCount} />
</div>
) : (
<div className="h-full overflow-y-auto p-6">
<div className="mb-6">
<h2 className="text-2xl font-bold text-gray-900">Reports</h2>
<p className="text-sm text-gray-600 mt-1">
{completedJobs.length} completed {completedJobs.length === 1 ? 'scrape' : 'scrapes'} with reviews
</p>
</div>
{completedJobs.length === 0 ? (
<div className="flex flex-col items-center justify-center py-16 text-gray-500">
<svg className="w-20 h-20 mb-4 opacity-30" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
</svg>
<h3 className="text-xl font-semibold text-gray-700 mb-2">No Reports Yet</h3>
<p className="text-sm text-gray-500 mb-4">Complete a scrape job to see analytics reports</p>
<button
onClick={() => setActiveView('newScrape')}
className="px-4 py-2 bg-blue-600 text-white rounded-lg font-medium hover:bg-blue-700 transition-colors"
>
Start New Scrape
</button>
</div>
) : (
<div className="grid gap-4 md:grid-cols-2 lg:grid-cols-3">
{completedJobs.map(job => {
// Extract business name from URL as fallback
let businessName = job.business_name;
if (!businessName) {
try {
const urlObj = new URL(job.url);
const query = urlObj.searchParams.get('query');
businessName = query ? decodeURIComponent(query) : 'Unknown Business';
} catch {
businessName = 'Unknown Business';
}
}
return (
<div
key={job.job_id}
onClick={() => loadJobReviews(job)}
className="bg-white rounded-xl border-2 border-gray-200 p-5 cursor-pointer hover:border-blue-400 hover:shadow-lg transition-all"
>
<div className="flex items-start justify-between mb-3">
<h3 className="font-bold text-gray-900 truncate flex-1" title={businessName}>
{businessName}
</h3>
{job.rating_snapshot && (
<span className="flex items-center gap-1 text-yellow-600 font-semibold ml-2">
<svg className="w-4 h-4" fill="currentColor" viewBox="0 0 20 20">
<path d="M9.049 2.927c.3-.921 1.603-.921 1.902 0l1.07 3.292a1 1 0 00.95.69h3.462c.969 0 1.371 1.24.588 1.81l-2.8 2.034a1 1 0 00-.364 1.118l1.07 3.292c.3.921-.755 1.688-1.54 1.118l-2.8-2.034a1 1 0 00-1.175 0l-2.8 2.034c-.784.57-1.838-.197-1.539-1.118l1.07-3.292a1 1 0 00-.364-1.118L2.98 8.72c-.783-.57-.38-1.81.588-1.81h3.461a1 1 0 00.951-.69l1.07-3.292z" />
</svg>
{job.rating_snapshot.toFixed(1)}
</span>
)}
</div>
<div className="flex items-center gap-4 text-sm text-gray-600 mb-3">
<span className="font-semibold text-blue-700">{job.reviews_count} reviews</span>
{job.scrape_time && <span>{job.scrape_time.toFixed(1)}s</span>}
</div>
<div className="text-xs text-gray-500">
{new Date(job.created_at).toLocaleDateString()} at {new Date(job.created_at).toLocaleTimeString()}
</div>
{isLoadingJob === job.job_id && (
<div className="mt-3 flex items-center gap-2 text-blue-600">
<div className="w-4 h-4 border-2 border-blue-500 border-t-transparent rounded-full animate-spin" />
<span className="text-sm font-medium">Loading...</span>
</div>
)}
</div>
);
})}
</div>
)}
</div>
);
}
}
};
return (
<div className="min-h-screen bg-gradient-to-br from-blue-600 to-indigo-700 py-12 px-4">
<main className="max-w-5xl mx-auto">
<div className="text-center mb-10">
<h1 className="text-4xl md:text-5xl font-bold text-white mb-3">
Google Reviews Scraper
</h1>
<p className="text-blue-100 text-lg">
Test the containerized scraper API
</p>
<div className="mt-4 inline-flex items-center gap-2 px-4 py-2 bg-blue-500/30 rounded-lg text-blue-100 text-sm">
<div className="w-2 h-2 bg-green-400 rounded-full animate-pulse"></div>
Powered by SeleniumBase UC Mode
</div>
</div>
<div className="h-screen w-screen overflow-hidden flex">
{/* Sidebar */}
<Sidebar
activeView={activeView}
onViewChange={setActiveView}
jobCount={jobs.length}
/>
<div className="bg-white rounded-2xl shadow-2xl p-6 md:p-8">
<ScraperTest />
</div>
<div className="mt-8 text-center text-blue-100 text-sm space-y-2">
<p className="font-medium">💡 Example URLs to test:</p>
<div className="space-y-1 text-xs">
<p className="font-mono bg-blue-500/20 rounded px-3 py-1 inline-block">
https://www.google.com/maps/place/Soho+Club/...
</p>
</div>
<p className="mt-4 text-blue-200">
API running at: <span className="font-mono">localhost:8000</span>
</p>
</div>
</main>
{/* Main Content */}
<div className="flex-1 bg-gray-50 overflow-hidden">
{renderMainContent()}
</div>
</div>
);
}

1586
web/components/JobsView.tsx Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -13,7 +13,7 @@ interface Review {
review_id: string;
}
interface JobStatus {
export interface JobStatus {
job_id: string;
status: 'pending' | 'running' | 'completed' | 'failed';
url: string;
@@ -25,9 +25,19 @@ interface JobStatus {
total_reviews: number | null;
scrape_time: number | null;
error_message: string | null;
// Business metadata for tracking and comparison
business_name: string | null;
business_address: string | null;
rating_snapshot: number | null;
total_reviews_snapshot: number | null;
}
export default function ScraperTest() {
interface ScraperTestProps {
onJobsChange?: (jobs: JobStatus[]) => void;
onSelectReviews?: (reviews: Review[], businessName: string, jobId: string) => void;
}
export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTestProps = {}) {
const [searchQuery, setSearchQuery] = useState('');
const [searchedQuery, setSearchedQuery] = useState('');
const [jobs, setJobs] = useState<Map<string, JobStatus>>(new Map());
@@ -44,6 +54,8 @@ export default function ScraperTest() {
const [businessName, setBusinessName] = useState<string | null>(null);
const [businessAddress, setBusinessAddress] = useState<string | null>(null);
const [businessRating, setBusinessRating] = useState<number | null>(null);
const [businessImage, setBusinessImage] = useState<string | null>(null);
const [businessCategory, setBusinessCategory] = useState<string | null>(null);
const debounceRef = useRef<NodeJS.Timeout | null>(null);
const pollingIntervals = useRef<Map<string, NodeJS.Timeout>>(new Map());
const abortControllerRef = useRef<AbortController | null>(null);
@@ -80,9 +92,18 @@ export default function ScraperTest() {
setBusinessName(null);
setBusinessAddress(null);
setBusinessRating(null);
setBusinessImage(null);
setBusinessCategory(null);
}
}, [searchQuery, searchedQuery]);
// Notify parent when jobs change
useEffect(() => {
if (onJobsChange) {
onJobsChange(Array.from(jobs.values()));
}
}, [jobs, onJobsChange]);
// Check for reviews function (called manually when user clicks Validate)
const checkReviews = async (query: string) => {
// Abort any previous validation request
@@ -96,6 +117,8 @@ export default function ScraperTest() {
setBusinessName(null);
setBusinessAddress(null);
setBusinessRating(null);
setBusinessImage(null);
setBusinessCategory(null);
setError('');
// Create new abort controller with 30 second timeout
@@ -123,6 +146,8 @@ export default function ScraperTest() {
setBusinessName(data.name);
setBusinessAddress(data.address);
setBusinessRating(data.rating);
setBusinessImage(data.image_url);
setBusinessCategory(data.category);
} else {
console.error('Failed to get business info:', data.error);
// Business not found
@@ -226,7 +251,13 @@ export default function ScraperTest() {
const response = await fetch('/api/scrape', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url }),
body: JSON.stringify({
url,
business_name: businessName,
business_address: businessAddress,
rating_snapshot: businessRating,
total_reviews_snapshot: availableReviewCount,
}),
});
const data = await response.json();
@@ -245,10 +276,15 @@ export default function ScraperTest() {
created_at: new Date().toISOString(),
started_at: null,
completed_at: null,
updated_at: new Date().toISOString(),
reviews_count: null,
total_reviews: null,
scrape_time: null,
error_message: null,
business_name: businessName,
business_address: businessAddress,
rating_snapshot: businessRating,
total_reviews_snapshot: availableReviewCount,
});
return newMap;
});
@@ -323,6 +359,7 @@ export default function ScraperTest() {
{ name: '🏪 Small (~79)', query: 'R. Fleitas Peluqueros Gran Canaria' },
{ name: '🚗 Medium (~589)', query: 'ClickRent Gran Canaria' },
{ name: '🏥 Large (~2000+)', query: 'Hospital Universitario Doctor Negrín Las Palmas' },
{ name: '🛒 Alcampo', query: 'Alcampo Hipermarket Las Palmas' },
];
return (
@@ -376,13 +413,33 @@ export default function ScraperTest() {
<button
onClick={handleSearch}
disabled={searchQuery.trim().length < 2 || isCheckingReviews}
className="px-6 py-3 bg-blue-600 text-white font-semibold rounded-xl hover:bg-blue-700 disabled:bg-gray-300 disabled:cursor-not-allowed transition-colors flex items-center gap-2"
className={`px-6 py-3 font-semibold rounded-xl transition-all flex items-center gap-2 ${
hasReviews === true && searchQuery.trim() === searchedQuery
? 'bg-green-600 text-white hover:bg-green-700'
: hasReviews === false && searchQuery.trim() === searchedQuery
? 'bg-yellow-500 text-white hover:bg-yellow-600'
: 'bg-blue-600 text-white hover:bg-blue-700'
} disabled:bg-gray-300 disabled:cursor-not-allowed`}
>
{isCheckingReviews ? (
<>
<div className="w-4 h-4 border-2 border-white border-t-transparent rounded-full animate-spin" />
Validating...
</>
) : hasReviews === true && searchQuery.trim() === searchedQuery ? (
<>
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
{availableReviewCount?.toLocaleString()} reviews
</>
) : hasReviews === false && searchQuery.trim() === searchedQuery ? (
<>
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
</svg>
No reviews
</>
) : (
<>
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
@@ -477,49 +534,84 @@ export default function ScraperTest() {
{hasReviews ? (
// Success - Show Business Card
<div className="bg-white border-2 border-green-500 rounded-2xl shadow-lg overflow-hidden mb-4">
{/* Header */}
<div className="bg-gradient-to-r from-green-500 to-emerald-500 px-6 py-4">
<div className="flex items-center gap-2 text-white">
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
<span className="font-bold text-lg">Business Found</span>
{/* Business Card Layout */}
<div className="flex">
{/* Business Image */}
{businessImage && (
<div className="w-40 h-40 flex-shrink-0 bg-gray-200">
<img
src={businessImage}
alt={businessName || 'Business'}
className="w-full h-full object-cover"
onError={(e) => {
// Hide image on error
(e.target as HTMLImageElement).style.display = 'none';
}}
/>
</div>
)}
{/* Business Info */}
<div className="flex-1 p-5">
{/* Category Badge + Verified */}
<div className="flex items-center gap-2 mb-2">
<span className="inline-flex items-center gap-1 px-2 py-0.5 bg-green-100 text-green-700 text-xs font-semibold rounded-full">
<svg className="w-3 h-3" fill="currentColor" viewBox="0 0 20 20">
<path fillRule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-9.293a1 1 0 00-1.414-1.414L9 10.586 7.707 9.293a1 1 0 00-1.414 1.414l2 2a1 1 0 001.414 0l4-4z" clipRule="evenodd" />
</svg>
Verified
</span>
{businessCategory && (
<span className="px-2 py-0.5 bg-gray-100 text-gray-600 text-xs font-medium rounded-full">
{businessCategory}
</span>
)}
</div>
{/* Business Name */}
<h3 className="text-xl font-bold text-gray-900 mb-2 leading-tight">{businessName}</h3>
{/* Rating + Reviews Row */}
<div className="flex items-center gap-3 mb-2">
{businessRating && (
<div className="flex items-center gap-1">
<span className="text-lg font-bold text-gray-900">{businessRating.toFixed(1)}</span>
<div className="flex items-center">
{[...Array(5)].map((_, i) => (
<svg
key={i}
className={`w-4 h-4 ${i < Math.floor(businessRating) ? 'text-yellow-400' : 'text-gray-300'}`}
fill="currentColor"
viewBox="0 0 20 20"
>
<path d="M9.049 2.927c.3-.921 1.603-.921 1.902 0l1.07 3.292a1 1 0 00.95.69h3.462c.969 0 1.371 1.24.588 1.81l-2.8 2.034a1 1 0 00-.364 1.118l1.07 3.292c.3.921-.755 1.688-1.54 1.118l-2.8-2.034a1 1 0 00-1.175 0l-2.8 2.034c-.784.57-1.838-.197-1.539-1.118l1.07-3.292a1 1 0 00-.364-1.118L2.98 8.72c-.783-.57-.38-1.81.588-1.81h3.461a1 1 0 00.951-.69l1.07-3.292z" />
</svg>
))}
</div>
</div>
)}
{availableReviewCount !== null && availableReviewCount > 0 && (
<span className="text-sm text-gray-600 font-medium">
({availableReviewCount.toLocaleString()} reviews)
</span>
)}
</div>
{/* Address */}
{businessAddress && (
<div className="flex items-start gap-1.5 text-gray-500 text-sm">
<svg className="w-4 h-4 mt-0.5 flex-shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M17.657 16.657L13.414 20.9a1.998 1.998 0 01-2.827 0l-4.244-4.243a8 8 0 1111.314 0z" />
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 11a3 3 0 11-6 0 3 3 0 016 0z" />
</svg>
<span className="line-clamp-2">{businessAddress}</span>
</div>
)}
</div>
</div>
{/* Business Info */}
<div className="p-6">
{/* Business Name */}
<h3 className="text-2xl font-bold text-gray-900 mb-3">{businessName}</h3>
{/* Rating */}
{businessRating && (
<div className="flex items-center gap-1 mb-3">
<span className="text-2xl font-bold text-gray-900">{businessRating.toFixed(1)}</span>
<div className="flex items-center ml-1">
{[...Array(5)].map((_, i) => (
<svg
key={i}
className={`w-5 h-5 ${i < Math.floor(businessRating) ? 'text-yellow-400' : 'text-gray-300'}`}
fill="currentColor"
viewBox="0 0 20 20"
>
<path d="M9.049 2.927c.3-.921 1.603-.921 1.902 0l1.07 3.292a1 1 0 00.95.69h3.462c.969 0 1.371 1.24.588 1.81l-2.8 2.034a1 1 0 00-.364 1.118l1.07 3.292c.3.921-.755 1.688-1.54 1.118l-2.8-2.034a1 1 0 00-1.175 0l-2.8 2.034c-.784.57-1.838-.197-1.539-1.118l1.07-3.292a1 1 0 00-.364-1.118L2.98 8.72c-.783-.57-.38-1.81.588-1.81h3.461a1 1 0 00.951-.69l1.07-3.292z" />
</svg>
))}
</div>
</div>
)}
{/* Address */}
{businessAddress && (
<div className="flex items-start gap-2 text-gray-600 mb-4">
<span className="text-lg">📍</span>
<span className="text-sm">{businessAddress}</span>
</div>
)}
{/* Start Scraping Button */}
{/* Start Scraping Button */}
<div className="px-5 pb-5">
<form onSubmit={handlePreviewBusiness}>
<button
type="submit"
@@ -536,7 +628,7 @@ export default function ScraperTest() {
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 10V3L4 14h7v7l9-11h-7z" />
</svg>
Start Scraping Reviews
Scrape {availableReviewCount?.toLocaleString()} Reviews
</>
)}
</button>
@@ -711,7 +803,13 @@ export default function ScraperTest() {
setReviews(reviewsData.reviews);
setActiveJobId(job.job_id);
setShowAnalytics(true);
// Call parent callback if provided (for right panel display)
if (onSelectReviews) {
onSelectReviews(reviewsData.reviews, searchedQuery || 'Business', job.job_id);
} else {
setShowAnalytics(true);
}
} catch (err) {
console.error('Failed to fetch reviews:', err);
setError(err instanceof Error ? err.message : 'Failed to load reviews for analysis');

View File

@@ -0,0 +1,65 @@
'use client';
interface SidebarProps {
activeView: 'newScrape' | 'jobs' | 'reports';
onViewChange: (view: 'newScrape' | 'jobs' | 'reports') => void;
jobCount: number;
}
export default function Sidebar({ activeView, onViewChange, jobCount }: SidebarProps) {
const navItems = [
{
id: 'newScrape' as const,
icon: (
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 4v16m8-8H4" />
</svg>
),
label: 'New Scrape',
},
{
id: 'jobs' as const,
icon: (
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2" />
</svg>
),
label: 'Jobs',
badge: jobCount > 0 ? jobCount : undefined,
},
{
id: 'reports' as const,
icon: (
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
</svg>
),
label: 'Reports',
},
];
return (
<div className="w-20 bg-gray-900 flex flex-col items-center py-6 gap-2">
{navItems.map((item) => (
<button
key={item.id}
onClick={() => onViewChange(item.id)}
className={`relative w-14 h-14 rounded-xl flex flex-col items-center justify-center gap-1 transition-all ${
activeView === item.id
? 'bg-blue-600 text-white shadow-lg'
: 'text-gray-400 hover:bg-gray-800 hover:text-white'
}`}
title={item.label}
>
{item.icon}
<span className="text-[10px] font-medium">{item.label.split(' ')[0]}</span>
{item.badge !== undefined && (
<span className="absolute -top-1 -right-1 w-5 h-5 bg-red-500 text-white text-xs font-bold rounded-full flex items-center justify-center">
{item.badge > 99 ? '99+' : item.badge}
</span>
)}
</button>
))}
</div>
);
}

View File

@@ -1,5 +1,10 @@
// Analytics utility functions
export interface OwnerResponse {
text: string;
timestamp?: string;
}
export interface Review {
author: string;
rating: number;
@@ -8,6 +13,8 @@ export interface Review {
avatar_url: string | null;
profile_url: string | null;
review_id: string;
owner_response?: OwnerResponse | null;
photo_urls?: string[] | null;
// Derived fields (computed on load)
parsedDate?: Date;
dateCategory?: 'recent' | 'month' | 'year' | 'older'; // Time range category
@@ -22,6 +29,7 @@ export interface TimelineDataPoint {
date: string;
rating: number;
rollingAvg: number;
count: number; // Number of reviews in this period
}
export interface ReviewStats {
@@ -37,6 +45,21 @@ export interface ReviewStats {
negativeReviews: number;
responseRate: number;
averageResponseTime: string;
// Response breakdown
responseBreakdown: { answered: number; notAnswered: number };
// New trend metrics
ratingTrend: {
recentAvg: number;
olderAvg: number;
change: number; // positive = improvement, negative = decline
periodLabel: string;
};
reviewVelocity: {
recentCount: number;
olderCount: number;
changePercent: number; // positive = more reviews, negative = fewer
periodLabel: string;
};
}
export function calculateReviewStats(reviews: Review[]): ReviewStats {
@@ -55,19 +78,21 @@ export function calculateReviewStats(reviews: Review[]): ReviewStats {
const totalReviews = reviews.length;
// Average rating
const averageRating = reviews.reduce((sum, r) => sum + r.rating, 0) / totalReviews;
const averageRating = totalReviews > 0
? reviews.reduce((sum, r) => sum + r.rating, 0) / totalReviews
: 0;
// Sentiment score (% of 4-5 star reviews)
const positiveReviews = reviews.filter(r => r.rating >= 4).length;
const sentimentScore = (positiveReviews / totalReviews) * 100;
const sentimentScore = totalReviews > 0 ? (positiveReviews / totalReviews) * 100 : 0;
// Photo count (reviews with avatars as proxy)
const photoCount = reviews.filter(r => r.avatar_url).length;
// Photo count (reviews with actual photos attached)
const photoCount = reviews.filter(r => r.photo_urls && r.photo_urls.length > 0).length;
// Average review length
const avgReviewLength = Math.round(
reviews.reduce((sum, r) => sum + (r.text?.split(' ').length || 0), 0) / totalReviews
);
const avgReviewLength = totalReviews > 0
? Math.round(reviews.reduce((sum, r) => sum + (r.text?.split(' ').length || 0), 0) / totalReviews)
: 0;
// Recent reviews (last 30 days - simplified check)
const recentReviews = reviews.filter(r => {
@@ -122,11 +147,50 @@ export function calculateReviewStats(reviews: Review[]): ReviewStats {
// Negative reviews count
const negativeReviews = reviews.filter(r => r.rating <= 2).length;
// Response rate (placeholder - would need owner_response field)
const responseRate = 0; // TODO: Calculate when owner responses are available
// Response breakdown - count answered vs not answered reviews
const answeredReviews = reviews.filter(r => r.owner_response?.text).length;
const responseBreakdown = {
answered: answeredReviews,
notAnswered: totalReviews - answeredReviews,
};
// Average response time (placeholder)
const averageResponseTime = 'N/A'; // TODO: Calculate when response data is available
// Response rate calculated from actual data
const responseRate = totalReviews > 0 ? (answeredReviews / totalReviews) * 100 : 0;
// Average response time (placeholder - would need response timestamps)
const averageResponseTime = 'N/A'; // TODO: Calculate when response timestamps are available
// Rating Trend - compare recent 3 months vs previous 3 months
const now = new Date();
const threeMonthsAgo = new Date(now.getTime() - 90 * 24 * 60 * 60 * 1000);
const sixMonthsAgo = new Date(now.getTime() - 180 * 24 * 60 * 60 * 1000);
const recentReviewsForTrend = reviews.filter(r => r.centerDate && r.centerDate >= threeMonthsAgo);
const olderReviewsForTrend = reviews.filter(r => r.centerDate && r.centerDate < threeMonthsAgo && r.centerDate >= sixMonthsAgo);
const recentAvg = recentReviewsForTrend.length > 0
? recentReviewsForTrend.reduce((sum, r) => sum + r.rating, 0) / recentReviewsForTrend.length
: 0;
const olderAvg = olderReviewsForTrend.length > 0
? olderReviewsForTrend.reduce((sum, r) => sum + r.rating, 0) / olderReviewsForTrend.length
: 0;
const ratingTrend = {
recentAvg: Math.round(recentAvg * 10) / 10,
olderAvg: Math.round(olderAvg * 10) / 10,
change: Math.round((recentAvg - olderAvg) * 10) / 10,
periodLabel: 'last 3 months vs previous 3 months',
};
// Review Velocity - compare recent 3 months vs previous 3 months
const reviewVelocity = {
recentCount: recentReviewsForTrend.length,
olderCount: olderReviewsForTrend.length,
changePercent: olderReviewsForTrend.length > 0
? Math.round(((recentReviewsForTrend.length - olderReviewsForTrend.length) / olderReviewsForTrend.length) * 100)
: (recentReviewsForTrend.length > 0 ? 100 : 0),
periodLabel: 'last 3 months vs previous 3 months',
};
return {
totalReviews,
@@ -141,6 +205,9 @@ export function calculateReviewStats(reviews: Review[]): ReviewStats {
negativeReviews,
responseRate,
averageResponseTime,
responseBreakdown,
ratingTrend,
reviewVelocity,
};
}
@@ -367,9 +434,10 @@ export function calculateTimelineData(reviews: Review[]): TimelineDataPoint[] {
// Group by month
const monthlyData: Record<string, { ratings: number[]; date: Date }> = {};
const monthNames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
sortedReviews.forEach(review => {
const monthKey = `${review.parsedDate.getFullYear()}-${String(review.parsedDate.getMonth() + 1).padStart(2, '0')}`;
const monthKey = `${monthNames[review.parsedDate.getMonth()]} ${review.parsedDate.getFullYear()}`;
if (!monthlyData[monthKey]) {
monthlyData[monthKey] = { ratings: [], date: review.parsedDate };
@@ -383,8 +451,17 @@ export function calculateTimelineData(reviews: Review[]): TimelineDataPoint[] {
date: monthKey,
rating: data.ratings.reduce((a, b) => a + b, 0) / data.ratings.length,
rollingAvg: 0, // Will calculate below
count: data.ratings.length, // Number of reviews this month
}))
.sort((a, b) => a.date.localeCompare(b.date));
.sort((a, b) => {
// Parse "Mon YYYY" format for sorting
const parseMonthYear = (d: string) => {
const [month, year] = d.split(' ');
const monthIndex = monthNames.indexOf(month);
return new Date(parseInt(year), monthIndex, 1).getTime();
};
return parseMonthYear(a.date) - parseMonthYear(b.date);
});
// Calculate 3-month rolling average
dataPoints.forEach((point, idx) => {