Clean up project root - remove 51 obsolete files

Deleted:
- 26 old markdown summary/documentation files
- 16 debug/test Python scripts (debug_*, test_*, diagnose_*)
- 10 untracked JSON files from api_response_samples
- terms-of-usage.md, pane_not_found.png

Also includes pending web app changes:
- Jobs management UI (JobsView, Sidebar components)
- API routes for job streaming and comparison
- Enhanced ReviewAnalytics and ScraperTest components

Final clean structure:
├── api_server_production.py  (main entry)
├── modules/                  (core Python)
├── web/                      (Next.js frontend)
├── tests/                    (test suite)
├── docs/                     (documentation)
└── examples/                 (usage examples)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-23 17:31:53 +00:00
parent 8ccf72a489
commit 47bb032011
69 changed files with 3417 additions and 11347 deletions

View File

@@ -1,38 +1,259 @@
import ScraperTest from '@/components/ScraperTest';
'use client';
import ScraperTest, { JobStatus } from '@/components/ScraperTest';
import ReviewAnalytics from '@/components/ReviewAnalytics';
import Sidebar from '@/components/Sidebar';
import JobsView from '@/components/JobsView';
import { useState, useCallback, useEffect } from 'react';
interface Review {
author: string;
rating: number;
text: string | null;
date_text: string;
avatar_url: string | null;
profile_url: string | null;
review_id: string;
}
interface ReviewWithNew extends Review {
is_new?: boolean;
}
interface SelectedJob {
reviews: ReviewWithNew[];
businessName: string;
businessUrl: string;
jobId: string;
newCount?: number;
previousJobId?: string;
}
type ViewType = 'newScrape' | 'jobs' | 'reports';
export default function Home() {
const [activeView, setActiveView] = useState<ViewType>('newScrape');
const [jobs, setJobs] = useState<JobStatus[]>([]);
const [selectedJob, setSelectedJob] = useState<SelectedJob | null>(null);
const [isLoadingJob, setIsLoadingJob] = useState<string | null>(null);
// Load jobs from API
const refreshJobs = useCallback(async () => {
try {
const response = await fetch('/api/jobs?limit=100');
if (response.ok) {
const data = await response.json();
if (data.jobs) {
setJobs(data.jobs);
}
}
} catch (err) {
console.error('Failed to load jobs:', err);
}
}, []);
// Load jobs from API on mount
useEffect(() => {
refreshJobs();
}, [refreshJobs]);
const handleJobsChange = useCallback((newJobs: JobStatus[]) => {
setJobs(prev => {
// Merge new jobs with existing, updating duplicates
const jobMap = new Map(prev.map(j => [j.job_id, j]));
newJobs.forEach(job => jobMap.set(job.job_id, job));
return Array.from(jobMap.values());
});
}, []);
const handleSelectReviews = useCallback((reviews: Review[], businessName: string, jobId: string, businessUrl?: string) => {
setSelectedJob({ reviews, businessName, businessUrl: businessUrl || '', jobId });
setActiveView('reports');
}, []);
const loadJobReviews = async (job: JobStatus, previousJob?: JobStatus) => {
if (job.status !== 'completed' || !job.reviews_count) return;
setIsLoadingJob(job.job_id);
try {
// Use compare API if we have a previous job
const url = previousJob
? `/api/jobs/${job.job_id}/compare?previous=${previousJob.job_id}`
: `/api/jobs/${job.job_id}/reviews?limit=10000`;
const response = await fetch(url);
if (!response.ok) throw new Error('Failed to fetch reviews');
const data = await response.json();
const reviews = data.reviews || [];
if (reviews.length > 0) {
// Extract business name from URL query param as fallback
let businessName = job.business_name;
if (!businessName) {
try {
const urlObj = new URL(job.url);
const query = urlObj.searchParams.get('query');
businessName = query ? decodeURIComponent(query) : 'Unknown Business';
} catch {
businessName = 'Unknown Business';
}
}
setSelectedJob({
reviews,
businessName,
businessUrl: job.url,
jobId: job.job_id,
newCount: data.new_count,
previousJobId: previousJob?.job_id,
});
setActiveView('reports');
}
} catch (err) {
console.error('Failed to load job reviews:', err);
} finally {
setIsLoadingJob(null);
}
};
const renderMainContent = () => {
switch (activeView) {
case 'newScrape':
return (
<div className="h-full overflow-y-auto p-6">
<ScraperTest onJobsChange={handleJobsChange} onSelectReviews={handleSelectReviews} />
</div>
);
case 'jobs':
return (
<JobsView
jobs={jobs}
onSelectJob={loadJobReviews}
isLoadingJob={isLoadingJob}
onRefresh={refreshJobs}
/>
);
case 'reports': {
// Get completed jobs with reviews
const completedJobs = jobs
.filter(j => j.status === 'completed' && j.reviews_count && j.reviews_count > 0)
.sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime());
return selectedJob ? (
<div className="h-full overflow-y-auto p-6">
<div className="mb-4 flex items-center justify-between">
<h2 className="text-xl font-bold text-gray-900">Analytics</h2>
<button
onClick={() => setSelectedJob(null)}
className="px-4 py-2 bg-gray-200 hover:bg-gray-300 text-gray-700 rounded-lg font-medium transition-colors flex items-center gap-2"
>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M10 19l-7-7m0 0l7-7m-7 7h18" />
</svg>
Back to Reports
</button>
</div>
<ReviewAnalytics reviews={selectedJob.reviews} businessName={selectedJob.businessName} businessUrl={selectedJob.businessUrl} newCount={selectedJob.newCount} />
</div>
) : (
<div className="h-full overflow-y-auto p-6">
<div className="mb-6">
<h2 className="text-2xl font-bold text-gray-900">Reports</h2>
<p className="text-sm text-gray-600 mt-1">
{completedJobs.length} completed {completedJobs.length === 1 ? 'scrape' : 'scrapes'} with reviews
</p>
</div>
{completedJobs.length === 0 ? (
<div className="flex flex-col items-center justify-center py-16 text-gray-500">
<svg className="w-20 h-20 mb-4 opacity-30" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
</svg>
<h3 className="text-xl font-semibold text-gray-700 mb-2">No Reports Yet</h3>
<p className="text-sm text-gray-500 mb-4">Complete a scrape job to see analytics reports</p>
<button
onClick={() => setActiveView('newScrape')}
className="px-4 py-2 bg-blue-600 text-white rounded-lg font-medium hover:bg-blue-700 transition-colors"
>
Start New Scrape
</button>
</div>
) : (
<div className="grid gap-4 md:grid-cols-2 lg:grid-cols-3">
{completedJobs.map(job => {
// Extract business name from URL as fallback
let businessName = job.business_name;
if (!businessName) {
try {
const urlObj = new URL(job.url);
const query = urlObj.searchParams.get('query');
businessName = query ? decodeURIComponent(query) : 'Unknown Business';
} catch {
businessName = 'Unknown Business';
}
}
return (
<div
key={job.job_id}
onClick={() => loadJobReviews(job)}
className="bg-white rounded-xl border-2 border-gray-200 p-5 cursor-pointer hover:border-blue-400 hover:shadow-lg transition-all"
>
<div className="flex items-start justify-between mb-3">
<h3 className="font-bold text-gray-900 truncate flex-1" title={businessName}>
{businessName}
</h3>
{job.rating_snapshot && (
<span className="flex items-center gap-1 text-yellow-600 font-semibold ml-2">
<svg className="w-4 h-4" fill="currentColor" viewBox="0 0 20 20">
<path d="M9.049 2.927c.3-.921 1.603-.921 1.902 0l1.07 3.292a1 1 0 00.95.69h3.462c.969 0 1.371 1.24.588 1.81l-2.8 2.034a1 1 0 00-.364 1.118l1.07 3.292c.3.921-.755 1.688-1.54 1.118l-2.8-2.034a1 1 0 00-1.175 0l-2.8 2.034c-.784.57-1.838-.197-1.539-1.118l1.07-3.292a1 1 0 00-.364-1.118L2.98 8.72c-.783-.57-.38-1.81.588-1.81h3.461a1 1 0 00.951-.69l1.07-3.292z" />
</svg>
{job.rating_snapshot.toFixed(1)}
</span>
)}
</div>
<div className="flex items-center gap-4 text-sm text-gray-600 mb-3">
<span className="font-semibold text-blue-700">{job.reviews_count} reviews</span>
{job.scrape_time && <span>{job.scrape_time.toFixed(1)}s</span>}
</div>
<div className="text-xs text-gray-500">
{new Date(job.created_at).toLocaleDateString()} at {new Date(job.created_at).toLocaleTimeString()}
</div>
{isLoadingJob === job.job_id && (
<div className="mt-3 flex items-center gap-2 text-blue-600">
<div className="w-4 h-4 border-2 border-blue-500 border-t-transparent rounded-full animate-spin" />
<span className="text-sm font-medium">Loading...</span>
</div>
)}
</div>
);
})}
</div>
)}
</div>
);
}
}
};
return (
<div className="min-h-screen bg-gradient-to-br from-blue-600 to-indigo-700 py-12 px-4">
<main className="max-w-5xl mx-auto">
<div className="text-center mb-10">
<h1 className="text-4xl md:text-5xl font-bold text-white mb-3">
Google Reviews Scraper
</h1>
<p className="text-blue-100 text-lg">
Test the containerized scraper API
</p>
<div className="mt-4 inline-flex items-center gap-2 px-4 py-2 bg-blue-500/30 rounded-lg text-blue-100 text-sm">
<div className="w-2 h-2 bg-green-400 rounded-full animate-pulse"></div>
Powered by SeleniumBase UC Mode
</div>
</div>
<div className="h-screen w-screen overflow-hidden flex">
{/* Sidebar */}
<Sidebar
activeView={activeView}
onViewChange={setActiveView}
jobCount={jobs.length}
/>
<div className="bg-white rounded-2xl shadow-2xl p-6 md:p-8">
<ScraperTest />
</div>
<div className="mt-8 text-center text-blue-100 text-sm space-y-2">
<p className="font-medium">💡 Example URLs to test:</p>
<div className="space-y-1 text-xs">
<p className="font-mono bg-blue-500/20 rounded px-3 py-1 inline-block">
https://www.google.com/maps/place/Soho+Club/...
</p>
</div>
<p className="mt-4 text-blue-200">
API running at: <span className="font-mono">localhost:8000</span>
</p>
</div>
</main>
{/* Main Content */}
<div className="flex-1 bg-gray-50 overflow-hidden">
{renderMainContent()}
</div>
</div>
);
}