diff --git a/.artifacts/LLM-Classification-Contract-v1.md b/.artifacts/LLM-Classification-Contract-v1.md
index 239de46..9f686fb 100644
--- a/.artifacts/LLM-Classification-Contract-v1.md
+++ b/.artifacts/LLM-Classification-Contract-v1.md
@@ -243,8 +243,8 @@ Return valid JSON matching the schema exactly. No markdown, no explanations.
},
"usn": {
"type": "string",
- "pattern": "^URT:S:[OPJEAVR][1-4]\\.[0-9]{2}",
- "description": "URT String Notation for audit"
+ "pattern": "^URT:S:[OPJEAVR][1-4]\\.[0-9]{2}(\\+[OPJEAVR][1-4]\\.[0-9]{2}){0,2}:[+\\-0±][123]:[1-3][1-3]T[CRHF]\\.E[SIC]\\.[NBWS]$",
+ "description": "URT String Notation for audit (Standard profile)"
}
}
}
diff --git a/.artifacts/ReviewIQ-Architecture-v3.2.md b/.artifacts/ReviewIQ-Architecture-v3.2.md
index c470b9c..1df2b0d 100644
--- a/.artifacts/ReviewIQ-Architecture-v3.2.md
+++ b/.artifacts/ReviewIQ-Architecture-v3.2.md
@@ -140,7 +140,7 @@ CREATE TYPE urt_actionability AS ENUM ('A1', 'A2', 'A3');
CREATE TYPE urt_temporal AS ENUM ('TC', 'TR', 'TH', 'TF');
CREATE TYPE urt_evidence AS ENUM ('ES', 'EI', 'EC');
CREATE TYPE urt_comparative AS ENUM ('CR-N', 'CR-B', 'CR-W', 'CR-S');
-CREATE TYPE urt_profile AS ENUM ('standard', 'full');
+CREATE TYPE urt_profile AS ENUM ('lite', 'core', 'standard', 'full');
CREATE TYPE urt_confidence AS ENUM ('high', 'medium', 'low');
CREATE TYPE urt_relation AS ENUM ('cause_of', 'effect_of', 'contrast', 'resolution');
CREATE TYPE urt_entity_type AS ENUM ('location', 'staff', 'product', 'process', 'time', 'other');
@@ -411,15 +411,20 @@ ALTER TABLE review_spans
ADD CONSTRAINT chk_no_self_relation
CHECK (related_span_id IS NULL OR related_span_id != span_id);
--- USN format validation based on profile
--- Standard: V[+-0±]:I[123]:CODE (e.g., "V-:I2:J1.01")
--- Full: V[+-0±]:I[123]:CODE:S[123]:A[123]:T[CRHF]:E[SIC] (e.g., "V-:I3:J1.01:S2:A2:TC:ES")
+-- USN format validation based on profile (URT v5.1 canonical format)
+-- Lite: URT:L:{domain}:{valence}{intensity}
+-- Core: URT:C:{category}:{valence}{intensity}
+-- Standard: URT:S:{subcode}[+{sec}]:{valence}{intensity}:{S}{A}{T}.{E}.{CR}
+-- Full: URT:F:{subcode}[+{sec}]:{valence}{intensity}:{S}{A}{T}.{E}.{CR}[:{causal}]
+-- Examples: URT:L:O:+2 | URT:C:J1:-3 | URT:S:J1.03:-2:22TC.ES.N | URT:F:J1.01:-3:23TR.ES.S:CD.O,MG.O
ALTER TABLE review_spans
ADD CONSTRAINT chk_usn_format
CHECK (
usn IS NULL OR
- (profile = 'standard' AND usn ~ '^V[+\-0±]:I[123]:[OPJEAVR][1-4]\.[0-9]{2}$') OR
- (profile = 'full' AND usn ~ '^V[+\-0±]:I[123]:[OPJEAVR][1-4]\.[0-9]{2}:S[123]:A[123]:T[CRHF]:E[SIC]$')
+ (profile = 'lite' AND usn ~ '^URT:L:[OPJEAVR]:[+\-0±][123]$') OR
+ (profile = 'core' AND usn ~ '^URT:C:[OPJEAVR][1-4]:[+\-0±][123]$') OR
+ (profile = 'standard' AND usn ~ '^URT:S:[OPJEAVR][1-4]\.[0-9]{2}(\+[OPJEAVR][1-4]\.[0-9]{2}){0,2}:[+\-0±][123]:[1-3][1-3]T[CRHF]\.E[SIC]\.[NBWS]$') OR
+ (profile = 'full' AND usn ~ '^URT:F:[OPJEAVR][1-4]\.[0-9]{2}(\+[OPJEAVR][1-4]\.[0-9]{2}){0,2}:[+\-0±][123]:[1-3][1-3]T[CRHF]\.E[SIC]\.[NBWS](:(CD|MG|SY)\.[STEOFRPCSHX](,(CD|MG|SY)\.[STEOFRPCSHX])*)?$')
);
-- Foreign keys for review_spans
diff --git a/.artifacts/ReviewIQ-v32-Decisions.md b/.artifacts/ReviewIQ-v32-Decisions.md
index 28e0a38..b3ce44e 100644
--- a/.artifacts/ReviewIQ-v32-Decisions.md
+++ b/.artifacts/ReviewIQ-v32-Decisions.md
@@ -76,15 +76,15 @@ Based on: v3.1.2 (commit f998277)
- `urt_actionability` — A1, A2, A3
**Context & Evidence:**
-- `urt_temporal` — T1, T2, T3
-- `urt_evidence` — E1, E2, E3
-- `urt_comparative` — CR1, CR2, CR3
+- `urt_temporal` — TC (current), TR (recent), TH (historical), TF (future)
+- `urt_evidence` — ES (stated), EI (inferred), EC (contextual)
+- `urt_comparative` — CR-N (none), CR-B (better), CR-W (worse), CR-S (same)
**Classification:**
-- `urt_profile` — factual, emotional, comparative, etc.
+- `urt_profile` — lite, core, standard, full
- `urt_confidence` — low, medium, high
-- `urt_relation` — elaborates, contrasts, causes, etc.
-- `urt_entity_type` — person, product, location, etc.
+- `urt_relation` — cause_of, effect_of, contrast, resolution
+- `urt_entity_type` — location, staff, product, process, time, other
---
diff --git a/.artifacts/URT-v5.1-Reference.md b/.artifacts/URT-v5.1-Reference.md
index 77138aa..1696279 100644
--- a/.artifacts/URT-v5.1-Reference.md
+++ b/.artifacts/URT-v5.1-Reference.md
@@ -6,7 +6,7 @@ The Universal Review Taxonomy (URT) is a classification system for customer feed
### Key Characteristics
-- **Three Profiles**: Core, Standard, Full (increasing detail)
+- **Four Profiles**: Lite, Core, Standard, Full (increasing detail)
- **Seven Domains**: Covering all aspects of customer experience
- **Tier-3 Canonical Codes**: Format `X#.##` (e.g., J1.02, P2.15)
- **Dimensional Annotation**: Valence, intensity, specificity, and more
@@ -129,8 +129,10 @@ USN is a compact string encoding for URT annotations.
### Grammar
```
-Standard: URT:S:{codes}:{V}{I}:{S}{A}{T}.{E}.{CR}
-Full: URT:F:{codes}:{V}{I}:{S}{A}{T}.{E}.{CR}:{causal}
+Lite: URT:L:{domain}:{V}{I}
+Core: URT:C:{category}:{V}{I}
+Standard: URT:S:{subcode}[+{sec}]:{V}{I}:{S}{A}{T}.{E}.{CR}
+Full: URT:F:{subcode}[+{sec}]:{V}{I}:{S}{A}{T}.{E}.{CR}[:{causal}]
```
### Encoding Rules
@@ -138,6 +140,8 @@ Full: URT:F:{codes}:{V}{I}:{S}{A}{T}.{E}.{CR}:{causal}
**Valence**:
- `+` for V+
- `-` for V-
+- `0` for V0
+- `±` for V±
**Intensity**:
- `1` for I1
diff --git a/web/app/new/google-reviews/page.tsx b/web/app/new/google-reviews/page.tsx
new file mode 100644
index 0000000..273b714
--- /dev/null
+++ b/web/app/new/google-reviews/page.tsx
@@ -0,0 +1,60 @@
+'use client';
+
+import { useRouter } from 'next/navigation';
+import { useCallback } from 'react';
+import ScraperTest from '@/components/ScraperTest';
+import { useJobs } from '@/contexts/JobsContext';
+import { JobStatus } from '@/components/ScraperTest';
+import Link from 'next/link';
+
+export default function GoogleReviewsScraperPage() {
+ const router = useRouter();
+ const { addJob } = useJobs();
+
+ const handleJobsChange = useCallback((jobs: JobStatus[]) => {
+ // Add new jobs to context (addJob handles deduplication)
+ jobs.forEach(job => addJob(job));
+ }, [addJob]);
+
+ const handleSelectReviews = (reviews: unknown[], businessName: string, jobId: string) => {
+ // Navigate to analytics page for this job
+ router.push(`/analytics/${jobId}`);
+ };
+
+ return (
+
+ {/* Breadcrumb */}
+
+
+
+
+ {/* Header */}
+
+
+
+
+
Google Reviews Scraper
+
Extract reviews from any Google Maps business listing
+
+
+
+
+
+
+ );
+}
diff --git a/web/app/new/page.tsx b/web/app/new/page.tsx
index 63f648d..62e0123 100644
--- a/web/app/new/page.tsx
+++ b/web/app/new/page.tsx
@@ -1,31 +1,312 @@
'use client';
-import { useRouter } from 'next/navigation';
-import { useCallback } from 'react';
-import ScraperTest from '@/components/ScraperTest';
-import { useJobs } from '@/contexts/JobsContext';
-import { JobStatus } from '@/components/ScraperTest';
+import { useState, useEffect, useCallback } from 'react';
+import Link from 'next/link';
+
+const API_BASE = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+
+interface ScraperInfo {
+ job_type: string;
+ version: string;
+ variant: string;
+ traffic_pct: number;
+ deprecated_at: string | null;
+}
+
+interface ScraperTypeCard {
+ job_type: string;
+ name: string;
+ description: string;
+ icon: React.ReactNode;
+ route: string;
+ color: string;
+ available: boolean;
+ versions: string[];
+}
+
+// Define scraper type metadata (icons, descriptions, routes)
+const SCRAPER_METADATA: Record> = {
+ 'google-reviews': {
+ name: 'Google Reviews',
+ description: 'Extract reviews from Google Maps business listings. Supports any business with a Google Maps presence.',
+ icon: (
+
+ ),
+ route: '/new/google-reviews',
+ color: 'from-blue-500 to-indigo-600',
+ },
+ 'google_reviews': {
+ name: 'Google Reviews',
+ description: 'Extract reviews from Google Maps business listings. Supports any business with a Google Maps presence.',
+ icon: (
+
+ ),
+ route: '/new/google-reviews',
+ color: 'from-blue-500 to-indigo-600',
+ },
+ 'yelp-reviews': {
+ name: 'Yelp Reviews',
+ description: 'Extract reviews from Yelp business pages. Perfect for restaurants, services, and local businesses.',
+ icon: (
+
+ ),
+ route: '/new/yelp-reviews',
+ color: 'from-red-500 to-rose-600',
+ },
+ 'tripadvisor-reviews': {
+ name: 'TripAdvisor Reviews',
+ description: 'Extract reviews from TripAdvisor. Ideal for hotels, restaurants, and tourist attractions.',
+ icon: (
+
+ ),
+ route: '/new/tripadvisor-reviews',
+ color: 'from-green-500 to-emerald-600',
+ },
+};
+
+// Fallback for unknown scraper types
+const DEFAULT_METADATA = {
+ name: 'Unknown Scraper',
+ description: 'A scraper for extracting reviews.',
+ icon: (
+
+ ),
+ route: '/new',
+ color: 'from-gray-500 to-gray-600',
+};
export default function NewScrapePage() {
- const router = useRouter();
- const { addJob } = useJobs();
+ const [scrapers, setScrapers] = useState([]);
+ const [loading, setLoading] = useState(true);
+ const [error, setError] = useState(null);
- const handleJobsChange = useCallback((jobs: JobStatus[]) => {
- // Add new jobs to context (addJob handles deduplication)
- jobs.forEach(job => addJob(job));
- }, [addJob]);
+ const fetchScrapers = useCallback(async () => {
+ try {
+ const response = await fetch(`${API_BASE}/api/admin/scrapers`);
+ if (!response.ok) throw new Error('Failed to fetch scrapers');
- const handleSelectReviews = (reviews: unknown[], businessName: string, jobId: string) => {
- // Navigate to analytics page for this job
- router.push(`/analytics/${jobId}`);
- };
+ const data: ScraperInfo[] = await response.json();
+
+ // Group by job_type and collect versions
+ const scrapersByType = data.reduce((acc, scraper) => {
+ const key = scraper.job_type;
+ if (!acc[key]) {
+ acc[key] = {
+ job_type: key,
+ versions: [],
+ hasActive: false,
+ };
+ }
+ acc[key].versions.push(`v${scraper.version}${scraper.variant !== 'stable' ? ` (${scraper.variant})` : ''}`);
+ if (!scraper.deprecated_at && scraper.traffic_pct > 0) {
+ acc[key].hasActive = true;
+ }
+ return acc;
+ }, {} as Record);
+
+ // Transform to ScraperTypeCard array
+ const cards: ScraperTypeCard[] = Object.values(scrapersByType).map(({ job_type, versions, hasActive }) => {
+ const metadata = SCRAPER_METADATA[job_type] || {
+ ...DEFAULT_METADATA,
+ name: job_type.split(/[-_]/).map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' '),
+ route: `/new/${job_type}`,
+ };
+
+ return {
+ job_type,
+ ...metadata,
+ available: hasActive,
+ versions,
+ };
+ });
+
+ setScrapers(cards);
+ } catch (err) {
+ console.error('Failed to fetch scrapers:', err);
+ setError('Failed to load available scrapers');
+ // Fallback to showing Google Reviews as available
+ setScrapers([{
+ job_type: 'google-reviews',
+ ...SCRAPER_METADATA['google-reviews'],
+ available: true,
+ versions: ['v1.0.0'],
+ }]);
+ } finally {
+ setLoading(false);
+ }
+ }, []);
+
+ useEffect(() => {
+ fetchScrapers();
+ }, [fetchScrapers]);
+
+ // Coming soon scrapers (not in registry yet)
+ const comingSoonScrapers: ScraperTypeCard[] = [
+ {
+ job_type: 'yelp-reviews',
+ ...SCRAPER_METADATA['yelp-reviews'],
+ available: false,
+ versions: [],
+ },
+ {
+ job_type: 'tripadvisor-reviews',
+ ...SCRAPER_METADATA['tripadvisor-reviews'],
+ available: false,
+ versions: [],
+ },
+ ].filter(s => !scrapers.some(existing => existing.job_type === s.job_type));
return (
-
+
+ {/* Header */}
+
+
New Scrape Job
+
Select a scraper type to start extracting reviews
+
+
+ {/* Error State */}
+ {error && (
+
+ )}
+
+ {/* Loading State */}
+ {loading ? (
+
+ {[1, 2].map(i => (
+
+ ))}
+
+ ) : (
+ <>
+ {/* Available Scrapers */}
+
+
+
+ Available Scrapers
+
+
+ {scrapers.filter(s => s.available).map(scraper => (
+
+
+
+ {scraper.icon}
+
+
+
+
+ {scraper.name}
+
+
+ Active
+
+
+
{scraper.description}
+
+ {scraper.versions.slice(0, 2).map(v => (
+
+ {v}
+
+ ))}
+ {scraper.versions.length > 2 && (
+ +{scraper.versions.length - 2} more
+ )}
+
+
+
+
+
+ ))}
+
+
+
+ {/* Coming Soon Scrapers */}
+ {comingSoonScrapers.length > 0 && (
+
+
+
+ Coming Soon
+
+
+ {comingSoonScrapers.map(scraper => (
+
+
+
+ {scraper.icon}
+
+
+
+
+ {scraper.name}
+
+
+ Coming Soon
+
+
+
{scraper.description}
+
+
+
+ ))}
+
+
+ )}
+ >
+ )}
+
+ {/* Help Section */}
+
+
+
+
+
Need a different scraper?
+
+ We're constantly adding new scrapers. If you need reviews from a platform not listed here,{' '}
+ let us know.
+
+
+
+
+
);
}
diff --git a/web/components/ScraperTest.tsx b/web/components/ScraperTest.tsx
index dc92dce..e422063 100644
--- a/web/components/ScraperTest.tsx
+++ b/web/components/ScraperTest.tsx
@@ -1,17 +1,8 @@
'use client';
-import { useState, useEffect, useRef, useCallback } from 'react';
+import { useState, useEffect, useRef } from 'react';
import ReviewAnalytics from './ReviewAnalytics';
-const API_BASE = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
-
-interface ScraperType {
- job_type: string;
- version: string;
- variant: string;
- label: string;
-}
-
interface Review {
author: string;
rating: number;
@@ -69,10 +60,6 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
const [businessImage, setBusinessImage] = useState(null);
const [businessCategory, setBusinessCategory] = useState(null);
- // Scraper type selection
- const [availableScrapers, setAvailableScrapers] = useState([]);
- const [selectedScraper, setSelectedScraper] = useState(null);
- const [scrapersLoading, setScrapersLoading] = useState(true);
const [userFingerprint, setUserFingerprint] = useState<{
geolocation?: {lat: number, lng: number},
userAgent?: string,
@@ -132,48 +119,6 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
collectFingerprint();
}, []);
- // Fetch available scraper types on mount
- const fetchScrapers = useCallback(async () => {
- try {
- const response = await fetch(`${API_BASE}/api/admin/scrapers`);
- if (response.ok) {
- const data = await response.json();
- // Transform to ScraperType format and filter to active scrapers
- const scrapers: ScraperType[] = data
- .filter((s: { deprecated_at: string | null; traffic_pct: number }) => !s.deprecated_at && s.traffic_pct > 0)
- .map((s: { job_type: string; version: string; variant: string }) => ({
- job_type: s.job_type,
- version: s.version,
- variant: s.variant,
- // Format job_type nicely: google_reviews or google-reviews -> "Google Reviews"
- label: `${s.job_type.split(/[-_]/).map((w: string) => w.charAt(0).toUpperCase() + w.slice(1)).join(' ')} v${s.version}${s.variant !== 'stable' ? ` (${s.variant})` : ''}`,
- }));
- setAvailableScrapers(scrapers);
- // Auto-select first scraper (usually google-reviews stable)
- if (scrapers.length > 0 && !selectedScraper) {
- setSelectedScraper(scrapers[0]);
- }
- }
- } catch (err) {
- console.error('Failed to fetch scrapers:', err);
- // Fallback to default google-reviews
- const defaultScraper: ScraperType = {
- job_type: 'google-reviews',
- version: '1.0.0',
- variant: 'stable',
- label: 'Google Reviews v1.0.0',
- };
- setAvailableScrapers([defaultScraper]);
- setSelectedScraper(defaultScraper);
- } finally {
- setScrapersLoading(false);
- }
- }, [selectedScraper]);
-
- useEffect(() => {
- fetchScrapers();
- }, [fetchScrapers]);
-
const pollingIntervals = useRef