Align artifacts with canonical URT v5.1 specification

Fixes inconsistencies discovered during audit against urt-taxonomy/:

- urt_profile ENUM: Add 'lite' and 'core' profiles (was missing)
- USN format: Use canonical regex from spec (was non-compliant)
- USN valence encoding: Add V0 (0) and V± (±) support
- USN grammar: Add Lite (URT:L:) and Core (URT:C:) formats
- Dimension codes: Fix temporal (TC/TR/TH/TF), evidence (ES/EI/EC),
  comparative (CR-N/CR-B/CR-W/CR-S) in decisions doc
- LLM contract: Full USN regex validation pattern

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-24 16:21:21 +00:00
parent 7666b7aea2
commit 43fd1515d2
7 changed files with 389 additions and 163 deletions

View File

@@ -0,0 +1,60 @@
'use client';
import { useRouter } from 'next/navigation';
import { useCallback } from 'react';
import ScraperTest from '@/components/ScraperTest';
import { useJobs } from '@/contexts/JobsContext';
import { JobStatus } from '@/components/ScraperTest';
import Link from 'next/link';
export default function GoogleReviewsScraperPage() {
const router = useRouter();
const { addJob } = useJobs();
const handleJobsChange = useCallback((jobs: JobStatus[]) => {
// Add new jobs to context (addJob handles deduplication)
jobs.forEach(job => addJob(job));
}, [addJob]);
const handleSelectReviews = (reviews: unknown[], businessName: string, jobId: string) => {
// Navigate to analytics page for this job
router.push(`/analytics/${jobId}`);
};
return (
<div className="h-full overflow-y-auto p-6">
{/* Breadcrumb */}
<div className="max-w-4xl mx-auto mb-6">
<nav className="flex items-center gap-2 text-sm text-gray-500">
<Link href="/new" className="hover:text-blue-600 transition-colors">
New Scrape
</Link>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
</svg>
<span className="text-gray-900 font-medium">Google Reviews</span>
</nav>
</div>
{/* Header */}
<div className="max-w-4xl mx-auto mb-6">
<div className="flex items-center gap-4">
<div className="w-14 h-14 bg-gradient-to-br from-blue-500 to-indigo-600 rounded-xl flex items-center justify-center shadow-lg">
<svg className="w-7 h-7 text-white" viewBox="0 0 24 24" fill="currentColor">
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-1 17.93c-3.95-.49-7-3.85-7-7.93 0-.62.08-1.21.21-1.79L9 15v1c0 1.1.9 2 2 2v1.93zm6.9-2.54c-.26-.81-1-1.39-1.9-1.39h-1v-3c0-.55-.45-1-1-1H8v-2h2c.55 0 1-.45 1-1V7h2c1.1 0 2-.9 2-2v-.41c2.93 1.19 5 4.06 5 7.41 0 2.08-.8 3.97-2.1 5.39z"/>
</svg>
</div>
<div>
<h1 className="text-2xl font-bold text-gray-900">Google Reviews Scraper</h1>
<p className="text-gray-500">Extract reviews from any Google Maps business listing</p>
</div>
</div>
</div>
<ScraperTest
onJobsChange={handleJobsChange}
onSelectReviews={handleSelectReviews}
/>
</div>
);
}

View File

@@ -1,31 +1,312 @@
'use client';
import { useRouter } from 'next/navigation';
import { useCallback } from 'react';
import ScraperTest from '@/components/ScraperTest';
import { useJobs } from '@/contexts/JobsContext';
import { JobStatus } from '@/components/ScraperTest';
import { useState, useEffect, useCallback } from 'react';
import Link from 'next/link';
const API_BASE = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
interface ScraperInfo {
job_type: string;
version: string;
variant: string;
traffic_pct: number;
deprecated_at: string | null;
}
interface ScraperTypeCard {
job_type: string;
name: string;
description: string;
icon: React.ReactNode;
route: string;
color: string;
available: boolean;
versions: string[];
}
// Define scraper type metadata (icons, descriptions, routes)
const SCRAPER_METADATA: Record<string, Omit<ScraperTypeCard, 'job_type' | 'available' | 'versions'>> = {
'google-reviews': {
name: 'Google Reviews',
description: 'Extract reviews from Google Maps business listings. Supports any business with a Google Maps presence.',
icon: (
<svg className="w-8 h-8" viewBox="0 0 24 24" fill="currentColor">
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-1 17.93c-3.95-.49-7-3.85-7-7.93 0-.62.08-1.21.21-1.79L9 15v1c0 1.1.9 2 2 2v1.93zm6.9-2.54c-.26-.81-1-1.39-1.9-1.39h-1v-3c0-.55-.45-1-1-1H8v-2h2c.55 0 1-.45 1-1V7h2c1.1 0 2-.9 2-2v-.41c2.93 1.19 5 4.06 5 7.41 0 2.08-.8 3.97-2.1 5.39z"/>
</svg>
),
route: '/new/google-reviews',
color: 'from-blue-500 to-indigo-600',
},
'google_reviews': {
name: 'Google Reviews',
description: 'Extract reviews from Google Maps business listings. Supports any business with a Google Maps presence.',
icon: (
<svg className="w-8 h-8" viewBox="0 0 24 24" fill="currentColor">
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-1 17.93c-3.95-.49-7-3.85-7-7.93 0-.62.08-1.21.21-1.79L9 15v1c0 1.1.9 2 2 2v1.93zm6.9-2.54c-.26-.81-1-1.39-1.9-1.39h-1v-3c0-.55-.45-1-1-1H8v-2h2c.55 0 1-.45 1-1V7h2c1.1 0 2-.9 2-2v-.41c2.93 1.19 5 4.06 5 7.41 0 2.08-.8 3.97-2.1 5.39z"/>
</svg>
),
route: '/new/google-reviews',
color: 'from-blue-500 to-indigo-600',
},
'yelp-reviews': {
name: 'Yelp Reviews',
description: 'Extract reviews from Yelp business pages. Perfect for restaurants, services, and local businesses.',
icon: (
<svg className="w-8 h-8" viewBox="0 0 24 24" fill="currentColor">
<path d="M12.14 9.27l-1.39 4.28c-.11.34-.08.71.09 1.03.17.32.45.56.79.67l5.38 1.79c.34.11.71.08 1.03-.09.32-.17.56-.45.67-.79l1.39-4.28c.11-.34.08-.71-.09-1.03-.17-.32-.45-.56-.79-.67l-5.38-1.79c-.34-.11-.71-.08-1.03.09-.32.17-.56.45-.67.79zM6.5 8.5c-.83 0-1.5.67-1.5 1.5v8c0 .83.67 1.5 1.5 1.5s1.5-.67 1.5-1.5v-8c0-.83-.67-1.5-1.5-1.5z"/>
</svg>
),
route: '/new/yelp-reviews',
color: 'from-red-500 to-rose-600',
},
'tripadvisor-reviews': {
name: 'TripAdvisor Reviews',
description: 'Extract reviews from TripAdvisor. Ideal for hotels, restaurants, and tourist attractions.',
icon: (
<svg className="w-8 h-8" viewBox="0 0 24 24" fill="currentColor">
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 15l-5-5 1.41-1.41L10 14.17l7.59-7.59L19 8l-9 9z"/>
</svg>
),
route: '/new/tripadvisor-reviews',
color: 'from-green-500 to-emerald-600',
},
};
// Fallback for unknown scraper types
const DEFAULT_METADATA = {
name: 'Unknown Scraper',
description: 'A scraper for extracting reviews.',
icon: (
<svg className="w-8 h-8" viewBox="0 0 24 24" fill="currentColor">
<path d="M19.428 15.428a2 2 0 00-1.022-.547l-2.387-.477a6 6 0 00-3.86.517l-.318.158a6 6 0 01-3.86.517L6.05 15.21a2 2 0 00-1.806.547M8 4h8l-1 1v5.172a2 2 0 00.586 1.414l5 5c1.26 1.26.367 3.414-1.415 3.414H4.828c-1.782 0-2.674-2.154-1.414-3.414l5-5A2 2 0 009 10.172V5L8 4z" />
</svg>
),
route: '/new',
color: 'from-gray-500 to-gray-600',
};
export default function NewScrapePage() {
const router = useRouter();
const { addJob } = useJobs();
const [scrapers, setScrapers] = useState<ScraperTypeCard[]>([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const handleJobsChange = useCallback((jobs: JobStatus[]) => {
// Add new jobs to context (addJob handles deduplication)
jobs.forEach(job => addJob(job));
}, [addJob]);
const fetchScrapers = useCallback(async () => {
try {
const response = await fetch(`${API_BASE}/api/admin/scrapers`);
if (!response.ok) throw new Error('Failed to fetch scrapers');
const handleSelectReviews = (reviews: unknown[], businessName: string, jobId: string) => {
// Navigate to analytics page for this job
router.push(`/analytics/${jobId}`);
};
const data: ScraperInfo[] = await response.json();
// Group by job_type and collect versions
const scrapersByType = data.reduce((acc, scraper) => {
const key = scraper.job_type;
if (!acc[key]) {
acc[key] = {
job_type: key,
versions: [],
hasActive: false,
};
}
acc[key].versions.push(`v${scraper.version}${scraper.variant !== 'stable' ? ` (${scraper.variant})` : ''}`);
if (!scraper.deprecated_at && scraper.traffic_pct > 0) {
acc[key].hasActive = true;
}
return acc;
}, {} as Record<string, { job_type: string; versions: string[]; hasActive: boolean }>);
// Transform to ScraperTypeCard array
const cards: ScraperTypeCard[] = Object.values(scrapersByType).map(({ job_type, versions, hasActive }) => {
const metadata = SCRAPER_METADATA[job_type] || {
...DEFAULT_METADATA,
name: job_type.split(/[-_]/).map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' '),
route: `/new/${job_type}`,
};
return {
job_type,
...metadata,
available: hasActive,
versions,
};
});
setScrapers(cards);
} catch (err) {
console.error('Failed to fetch scrapers:', err);
setError('Failed to load available scrapers');
// Fallback to showing Google Reviews as available
setScrapers([{
job_type: 'google-reviews',
...SCRAPER_METADATA['google-reviews'],
available: true,
versions: ['v1.0.0'],
}]);
} finally {
setLoading(false);
}
}, []);
useEffect(() => {
fetchScrapers();
}, [fetchScrapers]);
// Coming soon scrapers (not in registry yet)
const comingSoonScrapers: ScraperTypeCard[] = [
{
job_type: 'yelp-reviews',
...SCRAPER_METADATA['yelp-reviews'],
available: false,
versions: [],
},
{
job_type: 'tripadvisor-reviews',
...SCRAPER_METADATA['tripadvisor-reviews'],
available: false,
versions: [],
},
].filter(s => !scrapers.some(existing => existing.job_type === s.job_type));
return (
<div className="h-full overflow-y-auto p-6">
<ScraperTest
onJobsChange={handleJobsChange}
onSelectReviews={handleSelectReviews}
/>
<div className="max-w-4xl mx-auto">
{/* Header */}
<div className="mb-8">
<h1 className="text-3xl font-bold text-gray-900 mb-2">New Scrape Job</h1>
<p className="text-gray-500">Select a scraper type to start extracting reviews</p>
</div>
{/* Error State */}
{error && (
<div className="mb-6 p-4 bg-yellow-50 border-2 border-yellow-200 rounded-xl">
<div className="flex items-center gap-2 text-yellow-800">
<svg className="w-5 h-5" fill="currentColor" viewBox="0 0 20 20">
<path fillRule="evenodd" d="M8.257 3.099c.765-1.36 2.722-1.36 3.486 0l5.58 9.92c.75 1.334-.213 2.98-1.742 2.98H4.42c-1.53 0-2.493-1.646-1.743-2.98l5.58-9.92zM11 13a1 1 0 11-2 0 1 1 0 012 0zm-1-8a1 1 0 00-1 1v3a1 1 0 002 0V6a1 1 0 00-1-1z" clipRule="evenodd" />
</svg>
<span className="font-medium">{error}</span>
</div>
</div>
)}
{/* Loading State */}
{loading ? (
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
{[1, 2].map(i => (
<div key={i} className="p-6 bg-gray-100 rounded-2xl animate-pulse">
<div className="flex items-start gap-4">
<div className="w-14 h-14 bg-gray-200 rounded-xl"></div>
<div className="flex-1">
<div className="h-6 bg-gray-200 rounded w-32 mb-2"></div>
<div className="h-4 bg-gray-200 rounded w-full mb-1"></div>
<div className="h-4 bg-gray-200 rounded w-3/4"></div>
</div>
</div>
</div>
))}
</div>
) : (
<>
{/* Available Scrapers */}
<div className="mb-8">
<h2 className="text-lg font-semibold text-gray-700 mb-4 flex items-center gap-2">
<span className="w-2 h-2 bg-green-500 rounded-full"></span>
Available Scrapers
</h2>
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
{scrapers.filter(s => s.available).map(scraper => (
<Link
key={scraper.job_type}
href={scraper.route}
className="group p-6 bg-white border-2 border-gray-200 rounded-2xl shadow-sm hover:border-blue-500 hover:shadow-lg transition-all"
>
<div className="flex items-start gap-4">
<div className={`w-14 h-14 bg-gradient-to-br ${scraper.color} rounded-xl flex items-center justify-center text-white shadow-lg group-hover:scale-110 transition-transform`}>
{scraper.icon}
</div>
<div className="flex-1">
<div className="flex items-center gap-2 mb-1">
<h3 className="text-xl font-bold text-gray-900 group-hover:text-blue-600 transition-colors">
{scraper.name}
</h3>
<span className="px-2 py-0.5 bg-green-100 text-green-700 text-xs font-semibold rounded-full">
Active
</span>
</div>
<p className="text-gray-500 text-sm mb-3">{scraper.description}</p>
<div className="flex items-center gap-2">
{scraper.versions.slice(0, 2).map(v => (
<span key={v} className="px-2 py-0.5 bg-gray-100 text-gray-600 text-xs rounded">
{v}
</span>
))}
{scraper.versions.length > 2 && (
<span className="text-xs text-gray-400">+{scraper.versions.length - 2} more</span>
)}
</div>
</div>
<svg className="w-6 h-6 text-gray-300 group-hover:text-blue-500 group-hover:translate-x-1 transition-all" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
</svg>
</div>
</Link>
))}
</div>
</div>
{/* Coming Soon Scrapers */}
{comingSoonScrapers.length > 0 && (
<div>
<h2 className="text-lg font-semibold text-gray-400 mb-4 flex items-center gap-2">
<span className="w-2 h-2 bg-gray-300 rounded-full"></span>
Coming Soon
</h2>
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
{comingSoonScrapers.map(scraper => (
<div
key={scraper.job_type}
className="p-6 bg-gray-50 border-2 border-dashed border-gray-200 rounded-2xl opacity-60"
>
<div className="flex items-start gap-4">
<div className={`w-14 h-14 bg-gradient-to-br ${scraper.color} rounded-xl flex items-center justify-center text-white shadow-lg opacity-50`}>
{scraper.icon}
</div>
<div className="flex-1">
<div className="flex items-center gap-2 mb-1">
<h3 className="text-xl font-bold text-gray-500">
{scraper.name}
</h3>
<span className="px-2 py-0.5 bg-gray-200 text-gray-500 text-xs font-semibold rounded-full">
Coming Soon
</span>
</div>
<p className="text-gray-400 text-sm">{scraper.description}</p>
</div>
</div>
</div>
))}
</div>
</div>
)}
</>
)}
{/* Help Section */}
<div className="mt-12 p-6 bg-blue-50 border-2 border-blue-100 rounded-2xl">
<div className="flex items-start gap-4">
<div className="w-10 h-10 bg-blue-500 rounded-lg flex items-center justify-center flex-shrink-0">
<svg className="w-5 h-5 text-white" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
</div>
<div>
<h3 className="font-bold text-blue-900 mb-1">Need a different scraper?</h3>
<p className="text-blue-700 text-sm">
We&apos;re constantly adding new scrapers. If you need reviews from a platform not listed here,{' '}
<a href="mailto:support@reviewiq.com" className="underline hover:no-underline">let us know</a>.
</p>
</div>
</div>
</div>
</div>
</div>
);
}