Clean up project root - remove 51 obsolete files
Deleted: - 26 old markdown summary/documentation files - 16 debug/test Python scripts (debug_*, test_*, diagnose_*) - 10 untracked JSON files from api_response_samples - terms-of-usage.md, pane_not_found.png Also includes pending web app changes: - Jobs management UI (JobsView, Sidebar components) - API routes for job streaming and comparison - Enhanced ReviewAnalytics and ScraperTest components Final clean structure: ├── api_server_production.py (main entry) ├── modules/ (core Python) ├── web/ (Next.js frontend) ├── tests/ (test suite) ├── docs/ (documentation) └── examples/ (usage examples) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
283
web/app/page.tsx
283
web/app/page.tsx
@@ -1,38 +1,259 @@
|
||||
import ScraperTest from '@/components/ScraperTest';
|
||||
'use client';
|
||||
|
||||
import ScraperTest, { JobStatus } from '@/components/ScraperTest';
|
||||
import ReviewAnalytics from '@/components/ReviewAnalytics';
|
||||
import Sidebar from '@/components/Sidebar';
|
||||
import JobsView from '@/components/JobsView';
|
||||
import { useState, useCallback, useEffect } from 'react';
|
||||
|
||||
interface Review {
|
||||
author: string;
|
||||
rating: number;
|
||||
text: string | null;
|
||||
date_text: string;
|
||||
avatar_url: string | null;
|
||||
profile_url: string | null;
|
||||
review_id: string;
|
||||
}
|
||||
|
||||
interface ReviewWithNew extends Review {
|
||||
is_new?: boolean;
|
||||
}
|
||||
|
||||
interface SelectedJob {
|
||||
reviews: ReviewWithNew[];
|
||||
businessName: string;
|
||||
businessUrl: string;
|
||||
jobId: string;
|
||||
newCount?: number;
|
||||
previousJobId?: string;
|
||||
}
|
||||
|
||||
type ViewType = 'newScrape' | 'jobs' | 'reports';
|
||||
|
||||
export default function Home() {
|
||||
const [activeView, setActiveView] = useState<ViewType>('newScrape');
|
||||
const [jobs, setJobs] = useState<JobStatus[]>([]);
|
||||
const [selectedJob, setSelectedJob] = useState<SelectedJob | null>(null);
|
||||
const [isLoadingJob, setIsLoadingJob] = useState<string | null>(null);
|
||||
|
||||
// Load jobs from API
|
||||
const refreshJobs = useCallback(async () => {
|
||||
try {
|
||||
const response = await fetch('/api/jobs?limit=100');
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
if (data.jobs) {
|
||||
setJobs(data.jobs);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('Failed to load jobs:', err);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Load jobs from API on mount
|
||||
useEffect(() => {
|
||||
refreshJobs();
|
||||
}, [refreshJobs]);
|
||||
|
||||
const handleJobsChange = useCallback((newJobs: JobStatus[]) => {
|
||||
setJobs(prev => {
|
||||
// Merge new jobs with existing, updating duplicates
|
||||
const jobMap = new Map(prev.map(j => [j.job_id, j]));
|
||||
newJobs.forEach(job => jobMap.set(job.job_id, job));
|
||||
return Array.from(jobMap.values());
|
||||
});
|
||||
}, []);
|
||||
|
||||
const handleSelectReviews = useCallback((reviews: Review[], businessName: string, jobId: string, businessUrl?: string) => {
|
||||
setSelectedJob({ reviews, businessName, businessUrl: businessUrl || '', jobId });
|
||||
setActiveView('reports');
|
||||
}, []);
|
||||
|
||||
const loadJobReviews = async (job: JobStatus, previousJob?: JobStatus) => {
|
||||
if (job.status !== 'completed' || !job.reviews_count) return;
|
||||
|
||||
setIsLoadingJob(job.job_id);
|
||||
try {
|
||||
// Use compare API if we have a previous job
|
||||
const url = previousJob
|
||||
? `/api/jobs/${job.job_id}/compare?previous=${previousJob.job_id}`
|
||||
: `/api/jobs/${job.job_id}/reviews?limit=10000`;
|
||||
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) throw new Error('Failed to fetch reviews');
|
||||
const data = await response.json();
|
||||
|
||||
const reviews = data.reviews || [];
|
||||
if (reviews.length > 0) {
|
||||
// Extract business name from URL query param as fallback
|
||||
let businessName = job.business_name;
|
||||
if (!businessName) {
|
||||
try {
|
||||
const urlObj = new URL(job.url);
|
||||
const query = urlObj.searchParams.get('query');
|
||||
businessName = query ? decodeURIComponent(query) : 'Unknown Business';
|
||||
} catch {
|
||||
businessName = 'Unknown Business';
|
||||
}
|
||||
}
|
||||
|
||||
setSelectedJob({
|
||||
reviews,
|
||||
businessName,
|
||||
businessUrl: job.url,
|
||||
jobId: job.job_id,
|
||||
newCount: data.new_count,
|
||||
previousJobId: previousJob?.job_id,
|
||||
});
|
||||
setActiveView('reports');
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('Failed to load job reviews:', err);
|
||||
} finally {
|
||||
setIsLoadingJob(null);
|
||||
}
|
||||
};
|
||||
|
||||
const renderMainContent = () => {
|
||||
switch (activeView) {
|
||||
case 'newScrape':
|
||||
return (
|
||||
<div className="h-full overflow-y-auto p-6">
|
||||
<ScraperTest onJobsChange={handleJobsChange} onSelectReviews={handleSelectReviews} />
|
||||
</div>
|
||||
);
|
||||
|
||||
case 'jobs':
|
||||
return (
|
||||
<JobsView
|
||||
jobs={jobs}
|
||||
onSelectJob={loadJobReviews}
|
||||
isLoadingJob={isLoadingJob}
|
||||
onRefresh={refreshJobs}
|
||||
/>
|
||||
);
|
||||
|
||||
case 'reports': {
|
||||
// Get completed jobs with reviews
|
||||
const completedJobs = jobs
|
||||
.filter(j => j.status === 'completed' && j.reviews_count && j.reviews_count > 0)
|
||||
.sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime());
|
||||
|
||||
return selectedJob ? (
|
||||
<div className="h-full overflow-y-auto p-6">
|
||||
<div className="mb-4 flex items-center justify-between">
|
||||
<h2 className="text-xl font-bold text-gray-900">Analytics</h2>
|
||||
<button
|
||||
onClick={() => setSelectedJob(null)}
|
||||
className="px-4 py-2 bg-gray-200 hover:bg-gray-300 text-gray-700 rounded-lg font-medium transition-colors flex items-center gap-2"
|
||||
>
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M10 19l-7-7m0 0l7-7m-7 7h18" />
|
||||
</svg>
|
||||
Back to Reports
|
||||
</button>
|
||||
</div>
|
||||
<ReviewAnalytics reviews={selectedJob.reviews} businessName={selectedJob.businessName} businessUrl={selectedJob.businessUrl} newCount={selectedJob.newCount} />
|
||||
</div>
|
||||
) : (
|
||||
<div className="h-full overflow-y-auto p-6">
|
||||
<div className="mb-6">
|
||||
<h2 className="text-2xl font-bold text-gray-900">Reports</h2>
|
||||
<p className="text-sm text-gray-600 mt-1">
|
||||
{completedJobs.length} completed {completedJobs.length === 1 ? 'scrape' : 'scrapes'} with reviews
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{completedJobs.length === 0 ? (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-gray-500">
|
||||
<svg className="w-20 h-20 mb-4 opacity-30" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
|
||||
</svg>
|
||||
<h3 className="text-xl font-semibold text-gray-700 mb-2">No Reports Yet</h3>
|
||||
<p className="text-sm text-gray-500 mb-4">Complete a scrape job to see analytics reports</p>
|
||||
<button
|
||||
onClick={() => setActiveView('newScrape')}
|
||||
className="px-4 py-2 bg-blue-600 text-white rounded-lg font-medium hover:bg-blue-700 transition-colors"
|
||||
>
|
||||
Start New Scrape
|
||||
</button>
|
||||
</div>
|
||||
) : (
|
||||
<div className="grid gap-4 md:grid-cols-2 lg:grid-cols-3">
|
||||
{completedJobs.map(job => {
|
||||
// Extract business name from URL as fallback
|
||||
let businessName = job.business_name;
|
||||
if (!businessName) {
|
||||
try {
|
||||
const urlObj = new URL(job.url);
|
||||
const query = urlObj.searchParams.get('query');
|
||||
businessName = query ? decodeURIComponent(query) : 'Unknown Business';
|
||||
} catch {
|
||||
businessName = 'Unknown Business';
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
key={job.job_id}
|
||||
onClick={() => loadJobReviews(job)}
|
||||
className="bg-white rounded-xl border-2 border-gray-200 p-5 cursor-pointer hover:border-blue-400 hover:shadow-lg transition-all"
|
||||
>
|
||||
<div className="flex items-start justify-between mb-3">
|
||||
<h3 className="font-bold text-gray-900 truncate flex-1" title={businessName}>
|
||||
{businessName}
|
||||
</h3>
|
||||
{job.rating_snapshot && (
|
||||
<span className="flex items-center gap-1 text-yellow-600 font-semibold ml-2">
|
||||
<svg className="w-4 h-4" fill="currentColor" viewBox="0 0 20 20">
|
||||
<path d="M9.049 2.927c.3-.921 1.603-.921 1.902 0l1.07 3.292a1 1 0 00.95.69h3.462c.969 0 1.371 1.24.588 1.81l-2.8 2.034a1 1 0 00-.364 1.118l1.07 3.292c.3.921-.755 1.688-1.54 1.118l-2.8-2.034a1 1 0 00-1.175 0l-2.8 2.034c-.784.57-1.838-.197-1.539-1.118l1.07-3.292a1 1 0 00-.364-1.118L2.98 8.72c-.783-.57-.38-1.81.588-1.81h3.461a1 1 0 00.951-.69l1.07-3.292z" />
|
||||
</svg>
|
||||
{job.rating_snapshot.toFixed(1)}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-4 text-sm text-gray-600 mb-3">
|
||||
<span className="font-semibold text-blue-700">{job.reviews_count} reviews</span>
|
||||
{job.scrape_time && <span>{job.scrape_time.toFixed(1)}s</span>}
|
||||
</div>
|
||||
|
||||
<div className="text-xs text-gray-500">
|
||||
{new Date(job.created_at).toLocaleDateString()} at {new Date(job.created_at).toLocaleTimeString()}
|
||||
</div>
|
||||
|
||||
{isLoadingJob === job.job_id && (
|
||||
<div className="mt-3 flex items-center gap-2 text-blue-600">
|
||||
<div className="w-4 h-4 border-2 border-blue-500 border-t-transparent rounded-full animate-spin" />
|
||||
<span className="text-sm font-medium">Loading...</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-blue-600 to-indigo-700 py-12 px-4">
|
||||
<main className="max-w-5xl mx-auto">
|
||||
<div className="text-center mb-10">
|
||||
<h1 className="text-4xl md:text-5xl font-bold text-white mb-3">
|
||||
Google Reviews Scraper
|
||||
</h1>
|
||||
<p className="text-blue-100 text-lg">
|
||||
Test the containerized scraper API
|
||||
</p>
|
||||
<div className="mt-4 inline-flex items-center gap-2 px-4 py-2 bg-blue-500/30 rounded-lg text-blue-100 text-sm">
|
||||
<div className="w-2 h-2 bg-green-400 rounded-full animate-pulse"></div>
|
||||
Powered by SeleniumBase UC Mode
|
||||
</div>
|
||||
</div>
|
||||
<div className="h-screen w-screen overflow-hidden flex">
|
||||
{/* Sidebar */}
|
||||
<Sidebar
|
||||
activeView={activeView}
|
||||
onViewChange={setActiveView}
|
||||
jobCount={jobs.length}
|
||||
/>
|
||||
|
||||
<div className="bg-white rounded-2xl shadow-2xl p-6 md:p-8">
|
||||
<ScraperTest />
|
||||
</div>
|
||||
|
||||
<div className="mt-8 text-center text-blue-100 text-sm space-y-2">
|
||||
<p className="font-medium">💡 Example URLs to test:</p>
|
||||
<div className="space-y-1 text-xs">
|
||||
<p className="font-mono bg-blue-500/20 rounded px-3 py-1 inline-block">
|
||||
https://www.google.com/maps/place/Soho+Club/...
|
||||
</p>
|
||||
</div>
|
||||
<p className="mt-4 text-blue-200">
|
||||
API running at: <span className="font-mono">localhost:8000</span>
|
||||
</p>
|
||||
</div>
|
||||
</main>
|
||||
{/* Main Content */}
|
||||
<div className="flex-1 bg-gray-50 overflow-hidden">
|
||||
{renderMainContent()}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user