Align artifacts with canonical URT v5.1 specification
Fixes inconsistencies discovered during audit against urt-taxonomy/: - urt_profile ENUM: Add 'lite' and 'core' profiles (was missing) - USN format: Use canonical regex from spec (was non-compliant) - USN valence encoding: Add V0 (0) and V± (±) support - USN grammar: Add Lite (URT:L:) and Core (URT:C:) formats - Dimension codes: Fix temporal (TC/TR/TH/TF), evidence (ES/EI/EC), comparative (CR-N/CR-B/CR-W/CR-S) in decisions doc - LLM contract: Full USN regex validation pattern Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -243,8 +243,8 @@ Return valid JSON matching the schema exactly. No markdown, no explanations.
|
|||||||
},
|
},
|
||||||
"usn": {
|
"usn": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"pattern": "^URT:S:[OPJEAVR][1-4]\\.[0-9]{2}",
|
"pattern": "^URT:S:[OPJEAVR][1-4]\\.[0-9]{2}(\\+[OPJEAVR][1-4]\\.[0-9]{2}){0,2}:[+\\-0±][123]:[1-3][1-3]T[CRHF]\\.E[SIC]\\.[NBWS]$",
|
||||||
"description": "URT String Notation for audit"
|
"description": "URT String Notation for audit (Standard profile)"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -140,7 +140,7 @@ CREATE TYPE urt_actionability AS ENUM ('A1', 'A2', 'A3');
|
|||||||
CREATE TYPE urt_temporal AS ENUM ('TC', 'TR', 'TH', 'TF');
|
CREATE TYPE urt_temporal AS ENUM ('TC', 'TR', 'TH', 'TF');
|
||||||
CREATE TYPE urt_evidence AS ENUM ('ES', 'EI', 'EC');
|
CREATE TYPE urt_evidence AS ENUM ('ES', 'EI', 'EC');
|
||||||
CREATE TYPE urt_comparative AS ENUM ('CR-N', 'CR-B', 'CR-W', 'CR-S');
|
CREATE TYPE urt_comparative AS ENUM ('CR-N', 'CR-B', 'CR-W', 'CR-S');
|
||||||
CREATE TYPE urt_profile AS ENUM ('standard', 'full');
|
CREATE TYPE urt_profile AS ENUM ('lite', 'core', 'standard', 'full');
|
||||||
CREATE TYPE urt_confidence AS ENUM ('high', 'medium', 'low');
|
CREATE TYPE urt_confidence AS ENUM ('high', 'medium', 'low');
|
||||||
CREATE TYPE urt_relation AS ENUM ('cause_of', 'effect_of', 'contrast', 'resolution');
|
CREATE TYPE urt_relation AS ENUM ('cause_of', 'effect_of', 'contrast', 'resolution');
|
||||||
CREATE TYPE urt_entity_type AS ENUM ('location', 'staff', 'product', 'process', 'time', 'other');
|
CREATE TYPE urt_entity_type AS ENUM ('location', 'staff', 'product', 'process', 'time', 'other');
|
||||||
@@ -411,15 +411,20 @@ ALTER TABLE review_spans
|
|||||||
ADD CONSTRAINT chk_no_self_relation
|
ADD CONSTRAINT chk_no_self_relation
|
||||||
CHECK (related_span_id IS NULL OR related_span_id != span_id);
|
CHECK (related_span_id IS NULL OR related_span_id != span_id);
|
||||||
|
|
||||||
-- USN format validation based on profile
|
-- USN format validation based on profile (URT v5.1 canonical format)
|
||||||
-- Standard: V[+-0±]:I[123]:CODE (e.g., "V-:I2:J1.01")
|
-- Lite: URT:L:{domain}:{valence}{intensity}
|
||||||
-- Full: V[+-0±]:I[123]:CODE:S[123]:A[123]:T[CRHF]:E[SIC] (e.g., "V-:I3:J1.01:S2:A2:TC:ES")
|
-- Core: URT:C:{category}:{valence}{intensity}
|
||||||
|
-- Standard: URT:S:{subcode}[+{sec}]:{valence}{intensity}:{S}{A}{T}.{E}.{CR}
|
||||||
|
-- Full: URT:F:{subcode}[+{sec}]:{valence}{intensity}:{S}{A}{T}.{E}.{CR}[:{causal}]
|
||||||
|
-- Examples: URT:L:O:+2 | URT:C:J1:-3 | URT:S:J1.03:-2:22TC.ES.N | URT:F:J1.01:-3:23TR.ES.S:CD.O,MG.O
|
||||||
ALTER TABLE review_spans
|
ALTER TABLE review_spans
|
||||||
ADD CONSTRAINT chk_usn_format
|
ADD CONSTRAINT chk_usn_format
|
||||||
CHECK (
|
CHECK (
|
||||||
usn IS NULL OR
|
usn IS NULL OR
|
||||||
(profile = 'standard' AND usn ~ '^V[+\-0±]:I[123]:[OPJEAVR][1-4]\.[0-9]{2}$') OR
|
(profile = 'lite' AND usn ~ '^URT:L:[OPJEAVR]:[+\-0±][123]$') OR
|
||||||
(profile = 'full' AND usn ~ '^V[+\-0±]:I[123]:[OPJEAVR][1-4]\.[0-9]{2}:S[123]:A[123]:T[CRHF]:E[SIC]$')
|
(profile = 'core' AND usn ~ '^URT:C:[OPJEAVR][1-4]:[+\-0±][123]$') OR
|
||||||
|
(profile = 'standard' AND usn ~ '^URT:S:[OPJEAVR][1-4]\.[0-9]{2}(\+[OPJEAVR][1-4]\.[0-9]{2}){0,2}:[+\-0±][123]:[1-3][1-3]T[CRHF]\.E[SIC]\.[NBWS]$') OR
|
||||||
|
(profile = 'full' AND usn ~ '^URT:F:[OPJEAVR][1-4]\.[0-9]{2}(\+[OPJEAVR][1-4]\.[0-9]{2}){0,2}:[+\-0±][123]:[1-3][1-3]T[CRHF]\.E[SIC]\.[NBWS](:(CD|MG|SY)\.[STEOFRPCSHX](,(CD|MG|SY)\.[STEOFRPCSHX])*)?$')
|
||||||
);
|
);
|
||||||
|
|
||||||
-- Foreign keys for review_spans
|
-- Foreign keys for review_spans
|
||||||
|
|||||||
@@ -76,15 +76,15 @@ Based on: v3.1.2 (commit f998277)
|
|||||||
- `urt_actionability` — A1, A2, A3
|
- `urt_actionability` — A1, A2, A3
|
||||||
|
|
||||||
**Context & Evidence:**
|
**Context & Evidence:**
|
||||||
- `urt_temporal` — T1, T2, T3
|
- `urt_temporal` — TC (current), TR (recent), TH (historical), TF (future)
|
||||||
- `urt_evidence` — E1, E2, E3
|
- `urt_evidence` — ES (stated), EI (inferred), EC (contextual)
|
||||||
- `urt_comparative` — CR1, CR2, CR3
|
- `urt_comparative` — CR-N (none), CR-B (better), CR-W (worse), CR-S (same)
|
||||||
|
|
||||||
**Classification:**
|
**Classification:**
|
||||||
- `urt_profile` — factual, emotional, comparative, etc.
|
- `urt_profile` — lite, core, standard, full
|
||||||
- `urt_confidence` — low, medium, high
|
- `urt_confidence` — low, medium, high
|
||||||
- `urt_relation` — elaborates, contrasts, causes, etc.
|
- `urt_relation` — cause_of, effect_of, contrast, resolution
|
||||||
- `urt_entity_type` — person, product, location, etc.
|
- `urt_entity_type` — location, staff, product, process, time, other
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ The Universal Review Taxonomy (URT) is a classification system for customer feed
|
|||||||
|
|
||||||
### Key Characteristics
|
### Key Characteristics
|
||||||
|
|
||||||
- **Three Profiles**: Core, Standard, Full (increasing detail)
|
- **Four Profiles**: Lite, Core, Standard, Full (increasing detail)
|
||||||
- **Seven Domains**: Covering all aspects of customer experience
|
- **Seven Domains**: Covering all aspects of customer experience
|
||||||
- **Tier-3 Canonical Codes**: Format `X#.##` (e.g., J1.02, P2.15)
|
- **Tier-3 Canonical Codes**: Format `X#.##` (e.g., J1.02, P2.15)
|
||||||
- **Dimensional Annotation**: Valence, intensity, specificity, and more
|
- **Dimensional Annotation**: Valence, intensity, specificity, and more
|
||||||
@@ -129,8 +129,10 @@ USN is a compact string encoding for URT annotations.
|
|||||||
### Grammar
|
### Grammar
|
||||||
|
|
||||||
```
|
```
|
||||||
Standard: URT:S:{codes}:{V}{I}:{S}{A}{T}.{E}.{CR}
|
Lite: URT:L:{domain}:{V}{I}
|
||||||
Full: URT:F:{codes}:{V}{I}:{S}{A}{T}.{E}.{CR}:{causal}
|
Core: URT:C:{category}:{V}{I}
|
||||||
|
Standard: URT:S:{subcode}[+{sec}]:{V}{I}:{S}{A}{T}.{E}.{CR}
|
||||||
|
Full: URT:F:{subcode}[+{sec}]:{V}{I}:{S}{A}{T}.{E}.{CR}[:{causal}]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Encoding Rules
|
### Encoding Rules
|
||||||
@@ -138,6 +140,8 @@ Full: URT:F:{codes}:{V}{I}:{S}{A}{T}.{E}.{CR}:{causal}
|
|||||||
**Valence**:
|
**Valence**:
|
||||||
- `+` for V+
|
- `+` for V+
|
||||||
- `-` for V-
|
- `-` for V-
|
||||||
|
- `0` for V0
|
||||||
|
- `±` for V±
|
||||||
|
|
||||||
**Intensity**:
|
**Intensity**:
|
||||||
- `1` for I1
|
- `1` for I1
|
||||||
|
|||||||
60
web/app/new/google-reviews/page.tsx
Normal file
60
web/app/new/google-reviews/page.tsx
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
'use client';
|
||||||
|
|
||||||
|
import { useRouter } from 'next/navigation';
|
||||||
|
import { useCallback } from 'react';
|
||||||
|
import ScraperTest from '@/components/ScraperTest';
|
||||||
|
import { useJobs } from '@/contexts/JobsContext';
|
||||||
|
import { JobStatus } from '@/components/ScraperTest';
|
||||||
|
import Link from 'next/link';
|
||||||
|
|
||||||
|
export default function GoogleReviewsScraperPage() {
|
||||||
|
const router = useRouter();
|
||||||
|
const { addJob } = useJobs();
|
||||||
|
|
||||||
|
const handleJobsChange = useCallback((jobs: JobStatus[]) => {
|
||||||
|
// Add new jobs to context (addJob handles deduplication)
|
||||||
|
jobs.forEach(job => addJob(job));
|
||||||
|
}, [addJob]);
|
||||||
|
|
||||||
|
const handleSelectReviews = (reviews: unknown[], businessName: string, jobId: string) => {
|
||||||
|
// Navigate to analytics page for this job
|
||||||
|
router.push(`/analytics/${jobId}`);
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="h-full overflow-y-auto p-6">
|
||||||
|
{/* Breadcrumb */}
|
||||||
|
<div className="max-w-4xl mx-auto mb-6">
|
||||||
|
<nav className="flex items-center gap-2 text-sm text-gray-500">
|
||||||
|
<Link href="/new" className="hover:text-blue-600 transition-colors">
|
||||||
|
New Scrape
|
||||||
|
</Link>
|
||||||
|
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
|
||||||
|
</svg>
|
||||||
|
<span className="text-gray-900 font-medium">Google Reviews</span>
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Header */}
|
||||||
|
<div className="max-w-4xl mx-auto mb-6">
|
||||||
|
<div className="flex items-center gap-4">
|
||||||
|
<div className="w-14 h-14 bg-gradient-to-br from-blue-500 to-indigo-600 rounded-xl flex items-center justify-center shadow-lg">
|
||||||
|
<svg className="w-7 h-7 text-white" viewBox="0 0 24 24" fill="currentColor">
|
||||||
|
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-1 17.93c-3.95-.49-7-3.85-7-7.93 0-.62.08-1.21.21-1.79L9 15v1c0 1.1.9 2 2 2v1.93zm6.9-2.54c-.26-.81-1-1.39-1.9-1.39h-1v-3c0-.55-.45-1-1-1H8v-2h2c.55 0 1-.45 1-1V7h2c1.1 0 2-.9 2-2v-.41c2.93 1.19 5 4.06 5 7.41 0 2.08-.8 3.97-2.1 5.39z"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h1 className="text-2xl font-bold text-gray-900">Google Reviews Scraper</h1>
|
||||||
|
<p className="text-gray-500">Extract reviews from any Google Maps business listing</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<ScraperTest
|
||||||
|
onJobsChange={handleJobsChange}
|
||||||
|
onSelectReviews={handleSelectReviews}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -1,31 +1,312 @@
|
|||||||
'use client';
|
'use client';
|
||||||
|
|
||||||
import { useRouter } from 'next/navigation';
|
import { useState, useEffect, useCallback } from 'react';
|
||||||
import { useCallback } from 'react';
|
import Link from 'next/link';
|
||||||
import ScraperTest from '@/components/ScraperTest';
|
|
||||||
import { useJobs } from '@/contexts/JobsContext';
|
const API_BASE = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
|
||||||
import { JobStatus } from '@/components/ScraperTest';
|
|
||||||
|
interface ScraperInfo {
|
||||||
|
job_type: string;
|
||||||
|
version: string;
|
||||||
|
variant: string;
|
||||||
|
traffic_pct: number;
|
||||||
|
deprecated_at: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ScraperTypeCard {
|
||||||
|
job_type: string;
|
||||||
|
name: string;
|
||||||
|
description: string;
|
||||||
|
icon: React.ReactNode;
|
||||||
|
route: string;
|
||||||
|
color: string;
|
||||||
|
available: boolean;
|
||||||
|
versions: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define scraper type metadata (icons, descriptions, routes)
|
||||||
|
const SCRAPER_METADATA: Record<string, Omit<ScraperTypeCard, 'job_type' | 'available' | 'versions'>> = {
|
||||||
|
'google-reviews': {
|
||||||
|
name: 'Google Reviews',
|
||||||
|
description: 'Extract reviews from Google Maps business listings. Supports any business with a Google Maps presence.',
|
||||||
|
icon: (
|
||||||
|
<svg className="w-8 h-8" viewBox="0 0 24 24" fill="currentColor">
|
||||||
|
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-1 17.93c-3.95-.49-7-3.85-7-7.93 0-.62.08-1.21.21-1.79L9 15v1c0 1.1.9 2 2 2v1.93zm6.9-2.54c-.26-.81-1-1.39-1.9-1.39h-1v-3c0-.55-.45-1-1-1H8v-2h2c.55 0 1-.45 1-1V7h2c1.1 0 2-.9 2-2v-.41c2.93 1.19 5 4.06 5 7.41 0 2.08-.8 3.97-2.1 5.39z"/>
|
||||||
|
</svg>
|
||||||
|
),
|
||||||
|
route: '/new/google-reviews',
|
||||||
|
color: 'from-blue-500 to-indigo-600',
|
||||||
|
},
|
||||||
|
'google_reviews': {
|
||||||
|
name: 'Google Reviews',
|
||||||
|
description: 'Extract reviews from Google Maps business listings. Supports any business with a Google Maps presence.',
|
||||||
|
icon: (
|
||||||
|
<svg className="w-8 h-8" viewBox="0 0 24 24" fill="currentColor">
|
||||||
|
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-1 17.93c-3.95-.49-7-3.85-7-7.93 0-.62.08-1.21.21-1.79L9 15v1c0 1.1.9 2 2 2v1.93zm6.9-2.54c-.26-.81-1-1.39-1.9-1.39h-1v-3c0-.55-.45-1-1-1H8v-2h2c.55 0 1-.45 1-1V7h2c1.1 0 2-.9 2-2v-.41c2.93 1.19 5 4.06 5 7.41 0 2.08-.8 3.97-2.1 5.39z"/>
|
||||||
|
</svg>
|
||||||
|
),
|
||||||
|
route: '/new/google-reviews',
|
||||||
|
color: 'from-blue-500 to-indigo-600',
|
||||||
|
},
|
||||||
|
'yelp-reviews': {
|
||||||
|
name: 'Yelp Reviews',
|
||||||
|
description: 'Extract reviews from Yelp business pages. Perfect for restaurants, services, and local businesses.',
|
||||||
|
icon: (
|
||||||
|
<svg className="w-8 h-8" viewBox="0 0 24 24" fill="currentColor">
|
||||||
|
<path d="M12.14 9.27l-1.39 4.28c-.11.34-.08.71.09 1.03.17.32.45.56.79.67l5.38 1.79c.34.11.71.08 1.03-.09.32-.17.56-.45.67-.79l1.39-4.28c.11-.34.08-.71-.09-1.03-.17-.32-.45-.56-.79-.67l-5.38-1.79c-.34-.11-.71-.08-1.03.09-.32.17-.56.45-.67.79zM6.5 8.5c-.83 0-1.5.67-1.5 1.5v8c0 .83.67 1.5 1.5 1.5s1.5-.67 1.5-1.5v-8c0-.83-.67-1.5-1.5-1.5z"/>
|
||||||
|
</svg>
|
||||||
|
),
|
||||||
|
route: '/new/yelp-reviews',
|
||||||
|
color: 'from-red-500 to-rose-600',
|
||||||
|
},
|
||||||
|
'tripadvisor-reviews': {
|
||||||
|
name: 'TripAdvisor Reviews',
|
||||||
|
description: 'Extract reviews from TripAdvisor. Ideal for hotels, restaurants, and tourist attractions.',
|
||||||
|
icon: (
|
||||||
|
<svg className="w-8 h-8" viewBox="0 0 24 24" fill="currentColor">
|
||||||
|
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 15l-5-5 1.41-1.41L10 14.17l7.59-7.59L19 8l-9 9z"/>
|
||||||
|
</svg>
|
||||||
|
),
|
||||||
|
route: '/new/tripadvisor-reviews',
|
||||||
|
color: 'from-green-500 to-emerald-600',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Fallback for unknown scraper types
|
||||||
|
const DEFAULT_METADATA = {
|
||||||
|
name: 'Unknown Scraper',
|
||||||
|
description: 'A scraper for extracting reviews.',
|
||||||
|
icon: (
|
||||||
|
<svg className="w-8 h-8" viewBox="0 0 24 24" fill="currentColor">
|
||||||
|
<path d="M19.428 15.428a2 2 0 00-1.022-.547l-2.387-.477a6 6 0 00-3.86.517l-.318.158a6 6 0 01-3.86.517L6.05 15.21a2 2 0 00-1.806.547M8 4h8l-1 1v5.172a2 2 0 00.586 1.414l5 5c1.26 1.26.367 3.414-1.415 3.414H4.828c-1.782 0-2.674-2.154-1.414-3.414l5-5A2 2 0 009 10.172V5L8 4z" />
|
||||||
|
</svg>
|
||||||
|
),
|
||||||
|
route: '/new',
|
||||||
|
color: 'from-gray-500 to-gray-600',
|
||||||
|
};
|
||||||
|
|
||||||
export default function NewScrapePage() {
|
export default function NewScrapePage() {
|
||||||
const router = useRouter();
|
const [scrapers, setScrapers] = useState<ScraperTypeCard[]>([]);
|
||||||
const { addJob } = useJobs();
|
const [loading, setLoading] = useState(true);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
const handleJobsChange = useCallback((jobs: JobStatus[]) => {
|
const fetchScrapers = useCallback(async () => {
|
||||||
// Add new jobs to context (addJob handles deduplication)
|
try {
|
||||||
jobs.forEach(job => addJob(job));
|
const response = await fetch(`${API_BASE}/api/admin/scrapers`);
|
||||||
}, [addJob]);
|
if (!response.ok) throw new Error('Failed to fetch scrapers');
|
||||||
|
|
||||||
const handleSelectReviews = (reviews: unknown[], businessName: string, jobId: string) => {
|
const data: ScraperInfo[] = await response.json();
|
||||||
// Navigate to analytics page for this job
|
|
||||||
router.push(`/analytics/${jobId}`);
|
// Group by job_type and collect versions
|
||||||
};
|
const scrapersByType = data.reduce((acc, scraper) => {
|
||||||
|
const key = scraper.job_type;
|
||||||
|
if (!acc[key]) {
|
||||||
|
acc[key] = {
|
||||||
|
job_type: key,
|
||||||
|
versions: [],
|
||||||
|
hasActive: false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
acc[key].versions.push(`v${scraper.version}${scraper.variant !== 'stable' ? ` (${scraper.variant})` : ''}`);
|
||||||
|
if (!scraper.deprecated_at && scraper.traffic_pct > 0) {
|
||||||
|
acc[key].hasActive = true;
|
||||||
|
}
|
||||||
|
return acc;
|
||||||
|
}, {} as Record<string, { job_type: string; versions: string[]; hasActive: boolean }>);
|
||||||
|
|
||||||
|
// Transform to ScraperTypeCard array
|
||||||
|
const cards: ScraperTypeCard[] = Object.values(scrapersByType).map(({ job_type, versions, hasActive }) => {
|
||||||
|
const metadata = SCRAPER_METADATA[job_type] || {
|
||||||
|
...DEFAULT_METADATA,
|
||||||
|
name: job_type.split(/[-_]/).map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' '),
|
||||||
|
route: `/new/${job_type}`,
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
job_type,
|
||||||
|
...metadata,
|
||||||
|
available: hasActive,
|
||||||
|
versions,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
setScrapers(cards);
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Failed to fetch scrapers:', err);
|
||||||
|
setError('Failed to load available scrapers');
|
||||||
|
// Fallback to showing Google Reviews as available
|
||||||
|
setScrapers([{
|
||||||
|
job_type: 'google-reviews',
|
||||||
|
...SCRAPER_METADATA['google-reviews'],
|
||||||
|
available: true,
|
||||||
|
versions: ['v1.0.0'],
|
||||||
|
}]);
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetchScrapers();
|
||||||
|
}, [fetchScrapers]);
|
||||||
|
|
||||||
|
// Coming soon scrapers (not in registry yet)
|
||||||
|
const comingSoonScrapers: ScraperTypeCard[] = [
|
||||||
|
{
|
||||||
|
job_type: 'yelp-reviews',
|
||||||
|
...SCRAPER_METADATA['yelp-reviews'],
|
||||||
|
available: false,
|
||||||
|
versions: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
job_type: 'tripadvisor-reviews',
|
||||||
|
...SCRAPER_METADATA['tripadvisor-reviews'],
|
||||||
|
available: false,
|
||||||
|
versions: [],
|
||||||
|
},
|
||||||
|
].filter(s => !scrapers.some(existing => existing.job_type === s.job_type));
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="h-full overflow-y-auto p-6">
|
<div className="h-full overflow-y-auto p-6">
|
||||||
<ScraperTest
|
<div className="max-w-4xl mx-auto">
|
||||||
onJobsChange={handleJobsChange}
|
{/* Header */}
|
||||||
onSelectReviews={handleSelectReviews}
|
<div className="mb-8">
|
||||||
/>
|
<h1 className="text-3xl font-bold text-gray-900 mb-2">New Scrape Job</h1>
|
||||||
|
<p className="text-gray-500">Select a scraper type to start extracting reviews</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Error State */}
|
||||||
|
{error && (
|
||||||
|
<div className="mb-6 p-4 bg-yellow-50 border-2 border-yellow-200 rounded-xl">
|
||||||
|
<div className="flex items-center gap-2 text-yellow-800">
|
||||||
|
<svg className="w-5 h-5" fill="currentColor" viewBox="0 0 20 20">
|
||||||
|
<path fillRule="evenodd" d="M8.257 3.099c.765-1.36 2.722-1.36 3.486 0l5.58 9.92c.75 1.334-.213 2.98-1.742 2.98H4.42c-1.53 0-2.493-1.646-1.743-2.98l5.58-9.92zM11 13a1 1 0 11-2 0 1 1 0 012 0zm-1-8a1 1 0 00-1 1v3a1 1 0 002 0V6a1 1 0 00-1-1z" clipRule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
<span className="font-medium">{error}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Loading State */}
|
||||||
|
{loading ? (
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||||
|
{[1, 2].map(i => (
|
||||||
|
<div key={i} className="p-6 bg-gray-100 rounded-2xl animate-pulse">
|
||||||
|
<div className="flex items-start gap-4">
|
||||||
|
<div className="w-14 h-14 bg-gray-200 rounded-xl"></div>
|
||||||
|
<div className="flex-1">
|
||||||
|
<div className="h-6 bg-gray-200 rounded w-32 mb-2"></div>
|
||||||
|
<div className="h-4 bg-gray-200 rounded w-full mb-1"></div>
|
||||||
|
<div className="h-4 bg-gray-200 rounded w-3/4"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
{/* Available Scrapers */}
|
||||||
|
<div className="mb-8">
|
||||||
|
<h2 className="text-lg font-semibold text-gray-700 mb-4 flex items-center gap-2">
|
||||||
|
<span className="w-2 h-2 bg-green-500 rounded-full"></span>
|
||||||
|
Available Scrapers
|
||||||
|
</h2>
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||||
|
{scrapers.filter(s => s.available).map(scraper => (
|
||||||
|
<Link
|
||||||
|
key={scraper.job_type}
|
||||||
|
href={scraper.route}
|
||||||
|
className="group p-6 bg-white border-2 border-gray-200 rounded-2xl shadow-sm hover:border-blue-500 hover:shadow-lg transition-all"
|
||||||
|
>
|
||||||
|
<div className="flex items-start gap-4">
|
||||||
|
<div className={`w-14 h-14 bg-gradient-to-br ${scraper.color} rounded-xl flex items-center justify-center text-white shadow-lg group-hover:scale-110 transition-transform`}>
|
||||||
|
{scraper.icon}
|
||||||
|
</div>
|
||||||
|
<div className="flex-1">
|
||||||
|
<div className="flex items-center gap-2 mb-1">
|
||||||
|
<h3 className="text-xl font-bold text-gray-900 group-hover:text-blue-600 transition-colors">
|
||||||
|
{scraper.name}
|
||||||
|
</h3>
|
||||||
|
<span className="px-2 py-0.5 bg-green-100 text-green-700 text-xs font-semibold rounded-full">
|
||||||
|
Active
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<p className="text-gray-500 text-sm mb-3">{scraper.description}</p>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
{scraper.versions.slice(0, 2).map(v => (
|
||||||
|
<span key={v} className="px-2 py-0.5 bg-gray-100 text-gray-600 text-xs rounded">
|
||||||
|
{v}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
{scraper.versions.length > 2 && (
|
||||||
|
<span className="text-xs text-gray-400">+{scraper.versions.length - 2} more</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<svg className="w-6 h-6 text-gray-300 group-hover:text-blue-500 group-hover:translate-x-1 transition-all" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
</Link>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Coming Soon Scrapers */}
|
||||||
|
{comingSoonScrapers.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<h2 className="text-lg font-semibold text-gray-400 mb-4 flex items-center gap-2">
|
||||||
|
<span className="w-2 h-2 bg-gray-300 rounded-full"></span>
|
||||||
|
Coming Soon
|
||||||
|
</h2>
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||||
|
{comingSoonScrapers.map(scraper => (
|
||||||
|
<div
|
||||||
|
key={scraper.job_type}
|
||||||
|
className="p-6 bg-gray-50 border-2 border-dashed border-gray-200 rounded-2xl opacity-60"
|
||||||
|
>
|
||||||
|
<div className="flex items-start gap-4">
|
||||||
|
<div className={`w-14 h-14 bg-gradient-to-br ${scraper.color} rounded-xl flex items-center justify-center text-white shadow-lg opacity-50`}>
|
||||||
|
{scraper.icon}
|
||||||
|
</div>
|
||||||
|
<div className="flex-1">
|
||||||
|
<div className="flex items-center gap-2 mb-1">
|
||||||
|
<h3 className="text-xl font-bold text-gray-500">
|
||||||
|
{scraper.name}
|
||||||
|
</h3>
|
||||||
|
<span className="px-2 py-0.5 bg-gray-200 text-gray-500 text-xs font-semibold rounded-full">
|
||||||
|
Coming Soon
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<p className="text-gray-400 text-sm">{scraper.description}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Help Section */}
|
||||||
|
<div className="mt-12 p-6 bg-blue-50 border-2 border-blue-100 rounded-2xl">
|
||||||
|
<div className="flex items-start gap-4">
|
||||||
|
<div className="w-10 h-10 bg-blue-500 rounded-lg flex items-center justify-center flex-shrink-0">
|
||||||
|
<svg className="w-5 h-5 text-white" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h3 className="font-bold text-blue-900 mb-1">Need a different scraper?</h3>
|
||||||
|
<p className="text-blue-700 text-sm">
|
||||||
|
We're constantly adding new scrapers. If you need reviews from a platform not listed here,{' '}
|
||||||
|
<a href="mailto:support@reviewiq.com" className="underline hover:no-underline">let us know</a>.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,17 +1,8 @@
|
|||||||
'use client';
|
'use client';
|
||||||
|
|
||||||
import { useState, useEffect, useRef, useCallback } from 'react';
|
import { useState, useEffect, useRef } from 'react';
|
||||||
import ReviewAnalytics from './ReviewAnalytics';
|
import ReviewAnalytics from './ReviewAnalytics';
|
||||||
|
|
||||||
const API_BASE = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
|
|
||||||
|
|
||||||
interface ScraperType {
|
|
||||||
job_type: string;
|
|
||||||
version: string;
|
|
||||||
variant: string;
|
|
||||||
label: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface Review {
|
interface Review {
|
||||||
author: string;
|
author: string;
|
||||||
rating: number;
|
rating: number;
|
||||||
@@ -69,10 +60,6 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
|
|||||||
const [businessImage, setBusinessImage] = useState<string | null>(null);
|
const [businessImage, setBusinessImage] = useState<string | null>(null);
|
||||||
const [businessCategory, setBusinessCategory] = useState<string | null>(null);
|
const [businessCategory, setBusinessCategory] = useState<string | null>(null);
|
||||||
|
|
||||||
// Scraper type selection
|
|
||||||
const [availableScrapers, setAvailableScrapers] = useState<ScraperType[]>([]);
|
|
||||||
const [selectedScraper, setSelectedScraper] = useState<ScraperType | null>(null);
|
|
||||||
const [scrapersLoading, setScrapersLoading] = useState(true);
|
|
||||||
const [userFingerprint, setUserFingerprint] = useState<{
|
const [userFingerprint, setUserFingerprint] = useState<{
|
||||||
geolocation?: {lat: number, lng: number},
|
geolocation?: {lat: number, lng: number},
|
||||||
userAgent?: string,
|
userAgent?: string,
|
||||||
@@ -132,48 +119,6 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
|
|||||||
collectFingerprint();
|
collectFingerprint();
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
// Fetch available scraper types on mount
|
|
||||||
const fetchScrapers = useCallback(async () => {
|
|
||||||
try {
|
|
||||||
const response = await fetch(`${API_BASE}/api/admin/scrapers`);
|
|
||||||
if (response.ok) {
|
|
||||||
const data = await response.json();
|
|
||||||
// Transform to ScraperType format and filter to active scrapers
|
|
||||||
const scrapers: ScraperType[] = data
|
|
||||||
.filter((s: { deprecated_at: string | null; traffic_pct: number }) => !s.deprecated_at && s.traffic_pct > 0)
|
|
||||||
.map((s: { job_type: string; version: string; variant: string }) => ({
|
|
||||||
job_type: s.job_type,
|
|
||||||
version: s.version,
|
|
||||||
variant: s.variant,
|
|
||||||
// Format job_type nicely: google_reviews or google-reviews -> "Google Reviews"
|
|
||||||
label: `${s.job_type.split(/[-_]/).map((w: string) => w.charAt(0).toUpperCase() + w.slice(1)).join(' ')} v${s.version}${s.variant !== 'stable' ? ` (${s.variant})` : ''}`,
|
|
||||||
}));
|
|
||||||
setAvailableScrapers(scrapers);
|
|
||||||
// Auto-select first scraper (usually google-reviews stable)
|
|
||||||
if (scrapers.length > 0 && !selectedScraper) {
|
|
||||||
setSelectedScraper(scrapers[0]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
console.error('Failed to fetch scrapers:', err);
|
|
||||||
// Fallback to default google-reviews
|
|
||||||
const defaultScraper: ScraperType = {
|
|
||||||
job_type: 'google-reviews',
|
|
||||||
version: '1.0.0',
|
|
||||||
variant: 'stable',
|
|
||||||
label: 'Google Reviews v1.0.0',
|
|
||||||
};
|
|
||||||
setAvailableScrapers([defaultScraper]);
|
|
||||||
setSelectedScraper(defaultScraper);
|
|
||||||
} finally {
|
|
||||||
setScrapersLoading(false);
|
|
||||||
}
|
|
||||||
}, [selectedScraper]);
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
fetchScrapers();
|
|
||||||
}, [fetchScrapers]);
|
|
||||||
|
|
||||||
const pollingIntervals = useRef<Map<string, NodeJS.Timeout>>(new Map());
|
const pollingIntervals = useRef<Map<string, NodeJS.Timeout>>(new Map());
|
||||||
const abortControllerRef = useRef<AbortController | null>(null);
|
const abortControllerRef = useRef<AbortController | null>(null);
|
||||||
|
|
||||||
@@ -379,11 +324,6 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
|
|||||||
const url = `https://www.google.com/maps/search/?api=1&query=${encodeURIComponent(searchedQuery)}&hl=en`;
|
const url = `https://www.google.com/maps/search/?api=1&query=${encodeURIComponent(searchedQuery)}&hl=en`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Use selected scraper or default to google-reviews
|
|
||||||
const jobType = selectedScraper?.job_type || 'google-reviews';
|
|
||||||
const scraperVersion = selectedScraper?.version;
|
|
||||||
const scraperVariant = selectedScraper?.variant;
|
|
||||||
|
|
||||||
const response = await fetch('/api/scrape', {
|
const response = await fetch('/api/scrape', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
@@ -395,10 +335,8 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
|
|||||||
total_reviews_snapshot: availableReviewCount,
|
total_reviews_snapshot: availableReviewCount,
|
||||||
geolocation: userFingerprint.geolocation,
|
geolocation: userFingerprint.geolocation,
|
||||||
browser_fingerprint: userFingerprint, // Pass full fingerprint
|
browser_fingerprint: userFingerprint, // Pass full fingerprint
|
||||||
// Include scraper selection
|
// Google Reviews scraper (this component is specific to Google Reviews)
|
||||||
job_type: jobType,
|
job_type: 'google-reviews',
|
||||||
scraper_version: scraperVersion,
|
|
||||||
scraper_variant: scraperVariant,
|
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -502,68 +440,6 @@ export default function ScraperTest({ onJobsChange, onSelectReviews }: ScraperTe
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="w-full max-w-4xl mx-auto">
|
<div className="w-full max-w-4xl mx-auto">
|
||||||
{/* Scraper Type Selection */}
|
|
||||||
<div className="mb-4 p-4 bg-white border-2 border-gray-200 rounded-xl shadow-sm">
|
|
||||||
<div className="flex items-center justify-between">
|
|
||||||
<div className="flex items-center gap-3">
|
|
||||||
<div className="w-10 h-10 bg-gradient-to-br from-blue-500 to-indigo-600 rounded-lg flex items-center justify-center">
|
|
||||||
<svg className="w-5 h-5 text-white" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
|
||||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19.428 15.428a2 2 0 00-1.022-.547l-2.387-.477a6 6 0 00-3.86.517l-.318.158a6 6 0 01-3.86.517L6.05 15.21a2 2 0 00-1.806.547M8 4h8l-1 1v5.172a2 2 0 00.586 1.414l5 5c1.26 1.26.367 3.414-1.415 3.414H4.828c-1.782 0-2.674-2.154-1.414-3.414l5-5A2 2 0 009 10.172V5L8 4z" />
|
|
||||||
</svg>
|
|
||||||
</div>
|
|
||||||
<div>
|
|
||||||
<label className="block text-sm font-bold text-gray-700">Scraper Type</label>
|
|
||||||
<p className="text-xs text-gray-500">Select the type of data to scrape</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{scrapersLoading ? (
|
|
||||||
<div className="flex items-center gap-2 text-gray-400">
|
|
||||||
<div className="w-4 h-4 border-2 border-gray-300 border-t-blue-500 rounded-full animate-spin"></div>
|
|
||||||
<span className="text-sm">Loading...</span>
|
|
||||||
</div>
|
|
||||||
) : (
|
|
||||||
<select
|
|
||||||
value={selectedScraper ? `${selectedScraper.job_type}:${selectedScraper.version}:${selectedScraper.variant}` : ''}
|
|
||||||
onChange={(e) => {
|
|
||||||
const [jobType, version, variant] = e.target.value.split(':');
|
|
||||||
const scraper = availableScrapers.find(
|
|
||||||
s => s.job_type === jobType && s.version === version && s.variant === variant
|
|
||||||
);
|
|
||||||
if (scraper) setSelectedScraper(scraper);
|
|
||||||
}}
|
|
||||||
className="px-4 py-2 bg-gray-50 border-2 border-gray-300 rounded-lg text-sm font-medium text-gray-700 hover:border-blue-500 focus:border-blue-500 focus:ring-2 focus:ring-blue-100 outline-none transition-all cursor-pointer min-w-[200px]"
|
|
||||||
>
|
|
||||||
{availableScrapers.map((scraper) => (
|
|
||||||
<option
|
|
||||||
key={`${scraper.job_type}:${scraper.version}:${scraper.variant}`}
|
|
||||||
value={`${scraper.job_type}:${scraper.version}:${scraper.variant}`}
|
|
||||||
>
|
|
||||||
{scraper.label}
|
|
||||||
</option>
|
|
||||||
))}
|
|
||||||
</select>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Show selected scraper info */}
|
|
||||||
{selectedScraper && (
|
|
||||||
<div className="mt-3 pt-3 border-t border-gray-100 flex items-center gap-2 text-xs text-gray-500">
|
|
||||||
<span className="px-2 py-0.5 bg-blue-100 text-blue-700 rounded font-medium">
|
|
||||||
{selectedScraper.job_type.split(/[-_]/).map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ')}
|
|
||||||
</span>
|
|
||||||
<span className="px-2 py-0.5 bg-gray-100 text-gray-600 rounded">
|
|
||||||
v{selectedScraper.version}
|
|
||||||
</span>
|
|
||||||
{selectedScraper.variant !== 'stable' && (
|
|
||||||
<span className="px-2 py-0.5 bg-yellow-100 text-yellow-700 rounded font-medium">
|
|
||||||
{selectedScraper.variant}
|
|
||||||
</span>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Test URL Quick Select */}
|
{/* Test URL Quick Select */}
|
||||||
<div className="mb-4 p-3 bg-gray-50 border-2 border-gray-200 rounded-xl">
|
<div className="mb-4 p-3 bg-gray-50 border-2 border-gray-200 rounded-xl">
|
||||||
<div className="flex items-center gap-2 mb-2">
|
<div className="flex items-center gap-2 mb-2">
|
||||||
|
|||||||
Reference in New Issue
Block a user